mirror of
https://github.com/PDB-REDO/libcifpp.git
synced 2026-06-04 22:14:24 +08:00
Compare commits
238 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
560f6debc6 | ||
|
|
ea1ac33de8 | ||
|
|
7ea30237ae | ||
|
|
bc668487e2 | ||
|
|
1769f9864b | ||
|
|
75ffd97802 | ||
|
|
cfd5b7da0f | ||
|
|
26b7d1df26 | ||
|
|
0747929cd6 | ||
|
|
5bcfb102f4 | ||
|
|
908fb1ccea | ||
|
|
af8389baa4 | ||
|
|
24ca1017cd | ||
|
|
85c21aeb01 | ||
|
|
2f249048d9 | ||
|
|
974cb40ab3 | ||
|
|
c01c16ea60 | ||
|
|
bd157c249c | ||
|
|
b0ac33c1b1 | ||
|
|
82e73a9525 | ||
|
|
adc316d671 | ||
|
|
6a0b6b99ac | ||
|
|
08dd9dd5b4 | ||
|
|
557a1c2d00 | ||
|
|
f77bbfedda | ||
|
|
3aa3fe19e2 | ||
|
|
35fcc0493e | ||
|
|
9485bec2fa | ||
|
|
4b759e731c | ||
|
|
7dd6a8a1aa | ||
|
|
96725ae8b9 | ||
|
|
b3a0ded9a8 | ||
|
|
184c491803 | ||
|
|
f944b3ce00 | ||
|
|
2557f41863 | ||
|
|
2b92cee3f7 | ||
|
|
8071768579 | ||
|
|
71c8541b68 | ||
|
|
3d66c77188 | ||
|
|
8701512961 | ||
|
|
b317c780ba | ||
|
|
681aa3bf8b | ||
|
|
a68e053471 | ||
|
|
25a90e3b32 | ||
|
|
2f62759dfe | ||
|
|
cf9ec46ab8 | ||
|
|
ecbef51b10 | ||
|
|
dfff8c9587 | ||
|
|
cc5d52bbf9 | ||
|
|
a9e9f86c93 | ||
|
|
a2c52713b2 | ||
|
|
545aca88d8 | ||
|
|
ac27248784 | ||
|
|
5758bfbaea | ||
|
|
8d3a079774 | ||
|
|
718c138510 | ||
|
|
29aac70e67 | ||
|
|
700575adfe | ||
|
|
9fe6e5df85 | ||
|
|
ce7434a463 | ||
|
|
ad7d876d07 | ||
|
|
0dc19e86fa | ||
|
|
a12acaa5c7 | ||
|
|
ff62efe720 | ||
|
|
2407877184 | ||
|
|
5fde050738 | ||
|
|
a855f88073 | ||
|
|
cfa2acd61d | ||
|
|
d9db2fe2e7 | ||
|
|
15d62cd3b6 | ||
|
|
19a89aeb7e | ||
|
|
677c61c32f | ||
|
|
4dd4f66397 | ||
|
|
04b7828abc | ||
|
|
9c621ecab8 | ||
|
|
ab9c4d9416 | ||
|
|
e5eb62255a | ||
|
|
98ff79432b | ||
|
|
24fa80ba2a | ||
|
|
3999d792ef | ||
|
|
4db3732749 | ||
|
|
07131e8b40 | ||
|
|
39b91e74c9 | ||
|
|
d4bb7ec3bc | ||
|
|
6175b7e359 | ||
|
|
10442d506a | ||
|
|
573a695c3d | ||
|
|
a76bef0d01 | ||
|
|
e20111b566 | ||
|
|
4a1d9c8f75 | ||
|
|
26c86282e3 | ||
|
|
0eaeb1650d | ||
|
|
f4a6533f6b | ||
|
|
df1b6a13e1 | ||
|
|
e8f24f617c | ||
|
|
9454fdc217 | ||
|
|
22543d8fe5 | ||
|
|
60d1dc82e6 | ||
|
|
87486f87ef | ||
|
|
80e7da0f13 | ||
|
|
3745beae66 | ||
|
|
3965840bfa | ||
|
|
a88c6f3d32 | ||
|
|
ed6c6f0026 | ||
|
|
bdda9d72b5 | ||
|
|
fd080e778e | ||
|
|
9f72df2ecd | ||
|
|
617db012f0 | ||
|
|
9d15541237 | ||
|
|
35c99564c6 | ||
|
|
1d8fe334d6 | ||
|
|
d86bb314ac | ||
|
|
0ef8eb59f8 | ||
|
|
b5fe4a9a87 | ||
|
|
11fea31b98 | ||
|
|
f629275ed5 | ||
|
|
a5f6166469 | ||
|
|
501050e591 | ||
|
|
e1b240b2b2 | ||
|
|
3d79278ed7 | ||
|
|
5e0b197a43 | ||
|
|
9c4170d9e2 | ||
|
|
af721eb196 | ||
|
|
788e315f5e | ||
|
|
4a82a8d5a8 | ||
|
|
11019a26f8 | ||
|
|
6f8909dce9 | ||
|
|
5525103aaf | ||
|
|
291ef737b1 | ||
|
|
af125bdd57 | ||
|
|
79089bbb8c | ||
|
|
1f08498d00 | ||
|
|
49ba714a03 | ||
|
|
85fd9296b2 | ||
|
|
1cda14867f | ||
|
|
2d2b26f7dc | ||
|
|
93b33af44a | ||
|
|
eb80490bcd | ||
|
|
ba2b06f5af | ||
|
|
fecc762db1 | ||
|
|
1e406253ab | ||
|
|
6e3b85f43d | ||
|
|
58f1b626e2 | ||
|
|
c104a08e16 | ||
|
|
dd0f6ca1e6 | ||
|
|
f02ea91b51 | ||
|
|
6768a501a3 | ||
|
|
879e15c759 | ||
|
|
89285b4abc | ||
|
|
c584714f91 | ||
|
|
f5016403b7 | ||
|
|
c8f66ae6bb | ||
|
|
858c967e71 | ||
|
|
f9ca5de5bf | ||
|
|
252c3476a1 | ||
|
|
19210df6db | ||
|
|
15c5730749 | ||
|
|
3764adb7ef | ||
|
|
9160adb1cf | ||
|
|
3ebf4338ab | ||
|
|
2eb4b7b39b | ||
|
|
c241e49b48 | ||
|
|
238c881132 | ||
|
|
49dc733536 | ||
|
|
755bd78f60 | ||
|
|
77f80cd51f | ||
|
|
3df6000635 | ||
|
|
5efee2b40d | ||
|
|
f3c2e59184 | ||
|
|
24ab660e6e | ||
|
|
6c0a418068 | ||
|
|
07a180991e | ||
|
|
4732004b67 | ||
|
|
faa9cd0431 | ||
|
|
e0c3c2394d | ||
|
|
2dec584f54 | ||
|
|
5ab2ccae40 | ||
|
|
1017d08626 | ||
|
|
32b1bbd943 | ||
|
|
1abf31ffa5 | ||
|
|
aec60829d2 | ||
|
|
888c3c38c2 | ||
|
|
e2c4648037 | ||
|
|
f7b98c0530 | ||
|
|
d4bd3faa16 | ||
|
|
c4f3b1cd7b | ||
|
|
74add69a83 | ||
|
|
a490b19d24 | ||
|
|
44cfa2c1a2 | ||
|
|
6dd9522b3f | ||
|
|
5e352cb8e4 | ||
|
|
2fad7315b8 | ||
|
|
520759dfe8 | ||
|
|
577b44ae11 | ||
|
|
66f742d6c0 | ||
|
|
7ba9f688c7 | ||
|
|
883f0307a2 | ||
|
|
c9719f873f | ||
|
|
123d25f853 | ||
|
|
56da42db84 | ||
|
|
7f820449ca | ||
|
|
ecb2cf5f11 | ||
|
|
7f27da9b3b | ||
|
|
01eb499c69 | ||
|
|
1ff6f70682 | ||
|
|
ddde996e10 | ||
|
|
1c9212c7e0 | ||
|
|
a568143991 | ||
|
|
2b6f1bd9ee | ||
|
|
2527aa5ea6 | ||
|
|
4c28091ecd | ||
|
|
d49725423e | ||
|
|
fcb4dc61b5 | ||
|
|
b7330c074f | ||
|
|
e8f4123030 | ||
|
|
975057c4c4 | ||
|
|
a0e01668d1 | ||
|
|
2c77491416 | ||
|
|
be19e4a9cb | ||
|
|
61ce91a9d7 | ||
|
|
18f1d07e85 | ||
|
|
b596976194 | ||
|
|
1f6b86d516 | ||
|
|
31499b977d | ||
|
|
f83850e380 | ||
|
|
1a4ccd86fe | ||
|
|
5c3c6fec09 | ||
|
|
f97e742daa | ||
|
|
7f39d401e2 | ||
|
|
af412c284d | ||
|
|
874cd3bae5 | ||
|
|
ea28ebdd13 | ||
|
|
3ba468933f | ||
|
|
45f33e4bea | ||
|
|
cb3443ffb1 | ||
|
|
7513cc1947 | ||
|
|
ab2dd4b75f | ||
|
|
8bbcba76cf |
5
.gitignore
vendored
5
.gitignore
vendored
@@ -11,4 +11,7 @@ data/components.cif*
|
||||
CMakeSettings.json
|
||||
msvc/
|
||||
Testing/
|
||||
|
||||
rsrc/feature-request.txt
|
||||
test/test-create_sugar_?.cif
|
||||
test/oprofile_data/
|
||||
test/perf.data*
|
||||
|
||||
6
.gitmodules
vendored
Normal file
6
.gitmodules
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
[submodule "regex"]
|
||||
path = regex
|
||||
url = https://github.com/boostorg/regex
|
||||
[submodule "gxrio"]
|
||||
path = gxrio
|
||||
url = https://github.com/mhekkel/gxrio.git
|
||||
33
.travis.yml
33
.travis.yml
@@ -1,33 +0,0 @@
|
||||
language: cpp
|
||||
|
||||
os:
|
||||
- linux
|
||||
- osx
|
||||
|
||||
dist: focal
|
||||
|
||||
osx_image:
|
||||
- xcode12
|
||||
|
||||
compiler:
|
||||
- gcc
|
||||
- clang
|
||||
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- libboost-all-dev
|
||||
|
||||
before_install:
|
||||
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then brew install make; fi
|
||||
|
||||
script:
|
||||
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then ./configure --disable-shared --disable-revision --disable-download-ccd ; else ./configure --disable-revision --disable-download-ccd ; fi
|
||||
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then gmake ; else make ; fi
|
||||
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then gmake test ; else make test ; fi
|
||||
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then sudo gmake install ; else sudo make install; fi
|
||||
|
||||
# jobs:
|
||||
# allow_failures:
|
||||
# - os: osx
|
||||
|
||||
420
CMakeLists.txt
420
CMakeLists.txt
@@ -6,10 +6,10 @@
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer
|
||||
# list of conditions and the following disclaimer
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
@@ -25,32 +25,28 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
# set the project name
|
||||
project(cifpp VERSION 2.0.3 LANGUAGES CXX)
|
||||
project(cifpp VERSION 5.0.0 LANGUAGES CXX)
|
||||
|
||||
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||
|
||||
enable_testing()
|
||||
|
||||
include(GNUInstallDirs)
|
||||
include(CheckFunctionExists)
|
||||
include(CheckIncludeFiles)
|
||||
include(CheckLibraryExists)
|
||||
include(CMakePackageConfigHelpers)
|
||||
include(Dart)
|
||||
include(FindFilesystem)
|
||||
include(GenerateExportHeader)
|
||||
include(CheckCXXSourceCompiles)
|
||||
|
||||
# include(Dart)
|
||||
include(AddGitSubmodule)
|
||||
|
||||
set(CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
find_package(Filesystem REQUIRED)
|
||||
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||
# https://stackoverflow.com/questions/63902528/program-crashes-when-filesystempath-is-destroyed
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
|
||||
elseif(MSVC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
|
||||
endif()
|
||||
|
||||
# Building shared libraries?
|
||||
@@ -61,6 +57,31 @@ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
||||
|
||||
# Optionally build a version to be installed inside CCP4
|
||||
option(BUILD_FOR_CCP4 "Build a version to be installed in CCP4" OFF)
|
||||
|
||||
# Lots of code depend on the availability of the components.cif file
|
||||
option(CIFPP_DOWNLOAD_CCD "Download the CCD file components.cif during installation" ON)
|
||||
|
||||
# An optional cron script can be installed to keep the data files up-to-date
|
||||
if(UNIX)
|
||||
option(CIFPP_INSTALL_UPDATE_SCRIPT "Install the script to update CCD and dictionary files" OFF)
|
||||
endif()
|
||||
|
||||
# When CCP4 is sourced in the environment, we can recreate the symmetry operations table
|
||||
if(EXISTS "$ENV{CCP4}")
|
||||
if(EXISTS "$ENV{CLIBD}/syminfo.lib")
|
||||
option(CIFPP_RECREATE_SYMOP_DATA "Recreate SymOp data table in case it is out of date" ON)
|
||||
else()
|
||||
set(CIFPP_RECREATE_SYMOP_DATA OFF)
|
||||
message(WARNING "Symop data table recreation requested, but file syminfo.lib was not found in $ENV{CLIBD}")
|
||||
endif()
|
||||
else()
|
||||
set(CIFPP_RECREATE_SYMOP_DATA OFF)
|
||||
message("Not trying to recreate symop_table_data.hpp since CCP4 is not defined")
|
||||
endif()
|
||||
|
||||
# Unit tests
|
||||
option(ENABLE_TESTING "Build test exectuables" OFF)
|
||||
|
||||
if(BUILD_FOR_CCP4)
|
||||
if("$ENV{CCP4}" STREQUAL "" OR NOT EXISTS $ENV{CCP4})
|
||||
message(FATAL_ERROR "A CCP4 built was requested but CCP4 was not sourced")
|
||||
@@ -69,43 +90,18 @@ if(BUILD_FOR_CCP4)
|
||||
list(APPEND CMAKE_PREFIX_PATH "$ENV{CCP4}")
|
||||
set(CMAKE_INSTALL_PREFIX "$ENV{CCP4}")
|
||||
|
||||
# This is the only option:
|
||||
if(WIN32)
|
||||
set(BUILD_SHARED_LIBS ON)
|
||||
endif()
|
||||
endif("$ENV{CCP4}" STREQUAL "" OR NOT EXISTS $ENV{CCP4})
|
||||
endif()
|
||||
|
||||
# Check if CCP4 is available
|
||||
if(EXISTS "$ENV{CCP4}")
|
||||
set(CCP4 $ENV{CCP4})
|
||||
set(CLIBD ${CCP4}/lib/data)
|
||||
endif()
|
||||
if(CCP4 AND NOT CLIBD)
|
||||
set(CLIBD ${CCP4}/lib/data)
|
||||
endif()
|
||||
|
||||
# When CCP4 is sourced in the environment, we can recreate the symmetry operations table
|
||||
if(EXISTS "${CCP4}")
|
||||
if(CIFPP_RECREATE_SYMOP_DATA AND NOT EXISTS "${CLIBD}/syminfo.lib")
|
||||
message(WARNING "Symop data table recreation requested, but file syminfo.lib was not found in ${CLIBD}")
|
||||
set(CIFPP_RECREATE_SYMOP_DATA OFF)
|
||||
else()
|
||||
option(CIFPP_RECREATE_SYMOP_DATA "Recreate SymOp data table in case it is out of date" ON)
|
||||
endif()
|
||||
else()
|
||||
set(CIFPP_RECREATE_SYMOP_DATA OFF)
|
||||
message("Not trying to recreate SymOpTable_data.hpp since CCP4 is not defined")
|
||||
endif()
|
||||
|
||||
# set(CMAKE_DEBUG_POSTFIX d)
|
||||
|
||||
if(MSVC)
|
||||
# make msvc standards compliant...
|
||||
add_compile_options(/permissive-)
|
||||
# make msvc standards compliant...
|
||||
add_compile_options(/permissive-)
|
||||
|
||||
macro(get_WIN32_WINNT version)
|
||||
if (WIN32 AND CMAKE_SYSTEM_VERSION)
|
||||
if(WIN32 AND CMAKE_SYSTEM_VERSION)
|
||||
set(ver ${CMAKE_SYSTEM_VERSION})
|
||||
string(REPLACE "." "" ver ${ver})
|
||||
string(REGEX REPLACE "([0-9])" "0\\1" ver ${ver})
|
||||
@@ -116,185 +112,163 @@ if(MSVC)
|
||||
|
||||
get_WIN32_WINNT(ver)
|
||||
add_definitions(-D_WIN32_WINNT=${ver})
|
||||
|
||||
# On Windows, do not install in the system location
|
||||
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT AND NOT BUILD_FOR_CCP4)
|
||||
message(STATUS "The library and auxiliary files will be installed in $ENV{LOCALAPPDATA}/${PROJECT_NAME}")
|
||||
set(CMAKE_INSTALL_PREFIX "$ENV{LOCALAPPDATA}/${PROJECT_NAME}" CACHE PATH "..." FORCE)
|
||||
endif()
|
||||
|
||||
# Find out the processor type for the target
|
||||
if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
|
||||
set(COFF_TYPE "x64")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386")
|
||||
set(COFF_TYPE "x86")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ARM64")
|
||||
set(COFF_TYPE "arm64")
|
||||
else()
|
||||
message(FATAL_ERROR "Unsupported or unknown processor type ${CMAKE_SYSTEM_PROCESSOR}")
|
||||
endif()
|
||||
|
||||
set(COFF_SPEC "--coff=${COFF_TYPE}")
|
||||
|
||||
# for mrc, just in case
|
||||
list(APPEND CMAKE_PREFIX_PATH "$ENV{LOCALAPPDATA}/mrc")
|
||||
endif()
|
||||
|
||||
if(UNIX AND NOT APPLE AND NOT BUILD_FOR_CCP4 AND CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
|
||||
# On Linux, install in the $HOME/.local folder by default
|
||||
message(STATUS "The library and auxiliary files will be installed in $ENV{HOME}/.local")
|
||||
set(CMAKE_INSTALL_PREFIX "$ENV{HOME}/.local" CACHE PATH "..." FORCE)
|
||||
endif()
|
||||
|
||||
# Optionally use mrc to create resources
|
||||
|
||||
if(WIN32 AND BUILD_SHARED_LIBS)
|
||||
message("Not using resources when building shared libraries for Windows")
|
||||
else()
|
||||
find_program(MRC mrc)
|
||||
|
||||
if(MRC)
|
||||
option(CIFPP_USE_RSRC "Use mrc to create resources" ON)
|
||||
else()
|
||||
message("Using resources not possible since mrc was not found")
|
||||
endif()
|
||||
|
||||
if(CIFPP_USE_RSRC STREQUAL "ON")
|
||||
set(CIFPP_USE_RSRC 1)
|
||||
|
||||
message("Using resources compiled with ${MRC}")
|
||||
add_compile_definitions(CIFPP_USE_RSRC)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Libraries
|
||||
|
||||
# Start by finding out if std:regex is usable. Note that the current
|
||||
# implementation in GCC is not acceptable, it crashes on long lines.
|
||||
# The implementation in libc++ (clang) and MSVC seem to be OK.
|
||||
check_cxx_source_compiles("
|
||||
#include <iostream>
|
||||
#ifndef __GLIBCXX__
|
||||
#error
|
||||
#endif
|
||||
int main(int argc, char *argv[]) { return 0; }" GXX_LIBSTDCPP)
|
||||
|
||||
if(GXX_LIBSTDCPP)
|
||||
message(STATUS "Testing for known regex bug, since you're using GNU libstdc++")
|
||||
|
||||
try_run(STD_REGEX_RUNNING STD_REGEX_COMPILING
|
||||
${CMAKE_CURRENT_BINARY_DIR}/test ${PROJECT_SOURCE_DIR}/cmake/test-rx.cpp)
|
||||
|
||||
if(STD_REGEX_RUNNING STREQUAL FAILED_TO_RUN)
|
||||
message(STATUS "You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will try to use boost::regex instead")
|
||||
|
||||
set(BOOST_REGEX_STANDALONE ON)
|
||||
add_git_submodule(regex EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(CMAKE_THREAD_PREFER_PTHREAD)
|
||||
set(THREADS_PREFER_PTHREAD_FLAG)
|
||||
find_package(Threads)
|
||||
|
||||
set(Boost_DETAILED_FAILURE_MSG ON)
|
||||
if(NOT BUILD_SHARED_LIBS)
|
||||
set(Boost_USE_STATIC_LIBS ON)
|
||||
endif()
|
||||
find_package(Boost 1.70.0 REQUIRED COMPONENTS system iostreams regex program_options)
|
||||
add_git_submodule(gxrio EXCLUDE_FROM_ALL)
|
||||
find_package(ZLIB REQUIRED)
|
||||
find_package(LibLZMA REQUIRED)
|
||||
|
||||
if(NOT MSVC AND Boost_USE_STATIC_LIBS)
|
||||
find_package(ZLIB REQUIRED)
|
||||
find_package(BZip2 REQUIRED)
|
||||
endif()
|
||||
include(FindFilesystem)
|
||||
list(APPEND CIFPP_REQUIRED_LIBRARIES ${STDCPPFS_LIBRARY})
|
||||
|
||||
include(FindAtomic)
|
||||
list(APPEND CIFPP_REQUIRED_LIBRARIES ${STDCPPATOMIC_LIBRARY})
|
||||
|
||||
# Create a revision file, containing the current git version info
|
||||
|
||||
find_package(Git)
|
||||
if(GIT_FOUND AND EXISTS "${CMAKE_SOURCE_DIR}/.git")
|
||||
include(GetGitRevisionDescription)
|
||||
get_git_head_revision(REFSPEC COMMITHASH)
|
||||
|
||||
# Generate our own version string
|
||||
git_describe_working_tree(BUILD_VERSION_STRING --match=build --dirty)
|
||||
else()
|
||||
message(WARNING "Git not found, cannot set version info")
|
||||
|
||||
SET(BUILD_VERSION_STRING ${PROJECT_VERSION})
|
||||
endif()
|
||||
|
||||
# generate version.h
|
||||
string(TIMESTAMP BUILD_DATE_TIME "%Y-%m-%dT%H:%M:%SZ" UTC)
|
||||
configure_file("${CMAKE_SOURCE_DIR}/src/revision.hpp.in" "${CMAKE_BINARY_DIR}/revision.hpp" @ONLY)
|
||||
include(VersionString)
|
||||
write_version_header("LibCIFPP")
|
||||
|
||||
# SymOp data table
|
||||
if(CIFPP_RECREATE_SYMOP_DATA)
|
||||
# The tool to create the table
|
||||
add_executable(symop-map-generator "${PROJECT_SOURCE_DIR}/tools/symop-map-generator.cpp")
|
||||
|
||||
add_executable(symop-map-generator "${CMAKE_SOURCE_DIR}/tools/symop-map-generator.cpp")
|
||||
|
||||
target_link_libraries(symop-map-generator Threads::Threads ${Boost_LIBRARIES} std::filesystem ${ZLIB_LIBRARIES} ${BZip2_LIBRARIES})
|
||||
if(Boost_INCLUDE_DIR)
|
||||
target_include_directories(symop-map-generator PUBLIC ${Boost_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
set($ENV{CLIBD} ${CLIBD})
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${CMAKE_SOURCE_DIR}/src/SymOpTable_data.hpp
|
||||
COMMAND $<TARGET_FILE:symop-map-generator> ${CLIBD}/syminfo.lib ${CMAKE_SOURCE_DIR}/src/SymOpTable_data.hpp
|
||||
)
|
||||
|
||||
OUTPUT ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
|
||||
COMMAND $<TARGET_FILE:symop-map-generator> $ENV{CLIBD}/syminfo.lib ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
|
||||
)
|
||||
|
||||
add_custom_target(
|
||||
OUTPUT ${CMAKE_SOURCE_DIR}/src/SymOpTable_data.hpp
|
||||
OUTPUT ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
|
||||
DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib"
|
||||
)
|
||||
endif()
|
||||
|
||||
# Sources
|
||||
set(project_sources
|
||||
${PROJECT_SOURCE_DIR}/src/category.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/condition.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/datablock.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/dictionary_parser.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/file.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/item.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/parser.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/row.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/validate.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/text.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/utilities.cpp
|
||||
|
||||
set(project_sources
|
||||
${PROJECT_SOURCE_DIR}/src/AtomType.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/BondMap.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/Cif++.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/Cif2PDB.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/CifParser.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/CifUtils.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/CifValidator.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/Compound.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/PDB2Cif.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/PDB2CifRemark3.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/Point.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/Secondary.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/Structure.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/Symmetry.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/TlsParser.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/atom_type.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/compound.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/point.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/symmetry.cpp
|
||||
|
||||
${PROJECT_SOURCE_DIR}/src/model.cpp
|
||||
|
||||
${PROJECT_SOURCE_DIR}/src/pdb/cif2pdb.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/pdb/tls.cpp
|
||||
)
|
||||
|
||||
set(project_headers
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/AtomType.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/BondMap.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/Cif++.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/Cif2PDB.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/CifParser.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/CifUtils.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/CifValidator.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/Compound.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/Matrix.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/PDB2Cif.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/PDB2CifRemark3.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/Point.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/Secondary.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/Structure.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/Symmetry.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/TlsParser.hpp
|
||||
)
|
||||
set(project_headers
|
||||
${PROJECT_SOURCE_DIR}/include/cif++.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/utilities.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/item.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/datablock.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/file.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/validate.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/list.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/iterator.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/parser.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/forward_decl.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/dictionary_parser.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/condition.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/category.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/row.hpp
|
||||
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/atom_type.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/compound.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/point.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/symmetry.hpp
|
||||
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/model.hpp
|
||||
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb/cif2pdb.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb/io.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif_remark_3.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb/tls.hpp
|
||||
)
|
||||
|
||||
add_library(cifpp ${project_sources} ${project_headers} ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp)
|
||||
add_library(cifpp::cifpp ALIAS cifpp)
|
||||
|
||||
add_library(cifpp ${project_sources} ${project_headers} ${CMAKE_SOURCE_DIR}/src/SymOpTable_data.hpp)
|
||||
set_target_properties(cifpp PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
if(BOOST_REGEX_STANDALONE)
|
||||
target_compile_definitions(cifpp PUBLIC USE_BOOST_REGEX=1 BOOST_REGEX_STANDALONE=1)
|
||||
endif()
|
||||
|
||||
target_include_directories(cifpp
|
||||
PUBLIC
|
||||
"$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>"
|
||||
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
|
||||
${Boost_INCLUDE_DIR}
|
||||
)
|
||||
|
||||
target_include_directories(cifpp
|
||||
PRIVATE
|
||||
${CMAKE_BINARY_DIR}
|
||||
)
|
||||
target_link_libraries(cifpp PUBLIC
|
||||
Threads::Threads
|
||||
ZLIB::ZLIB
|
||||
LibLZMA::LibLZMA
|
||||
${CIFPP_REQUIRED_LIBRARIES})
|
||||
|
||||
target_link_libraries(cifpp Threads::Threads ${Boost_LIBRARIES} std::filesystem ${ZLIB_LIBRARIES} ${BZip2_LIBRARIES})
|
||||
get_target_property(GXRIO_INCLUDE_DIR gxrio::gxrio INTERFACE_INCLUDE_DIRECTORIES)
|
||||
target_include_directories(cifpp PRIVATE ${GXRIO_INCLUDE_DIR})
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
||||
target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
|
||||
endif (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
||||
if(BOOST_REGEX_STANDALONE)
|
||||
target_include_directories(cifpp PRIVATE regex/include)
|
||||
endif()
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
||||
target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
|
||||
endif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
||||
|
||||
option(CIFPP_DOWNLOAD_CCD "Download the CCD file components.cif during installation" OFF)
|
||||
if(CIFPP_DOWNLOAD_CCD)
|
||||
# download the components.cif file from CCD
|
||||
set(COMPONENTS_CIF ${PROJECT_SOURCE_DIR}/data/components.cif)
|
||||
|
||||
if (NOT EXISTS ${COMPONENTS_CIF})
|
||||
|
||||
if (NOT EXISTS ${PROJECT_SOURCE_DIR}/data)
|
||||
if(NOT EXISTS ${COMPONENTS_CIF})
|
||||
if(NOT EXISTS ${PROJECT_SOURCE_DIR}/data)
|
||||
file(MAKE_DIRECTORY ${PROJECT_SOURCE_DIR}/data/)
|
||||
endif()
|
||||
|
||||
@@ -305,7 +279,7 @@ if(CIFPP_DOWNLOAD_CCD)
|
||||
SHOW_PROGRESS)
|
||||
add_custom_command(OUTPUT ${COMPONENTS_CIF}
|
||||
COMMAND ${GUNZIP} ${COMPONENTS_CIF}.gz
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/data/)
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/data/)
|
||||
else()
|
||||
file(DOWNLOAD ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif ${COMPONENTS_CIF}
|
||||
SHOW_PROGRESS)
|
||||
@@ -316,16 +290,12 @@ if(CIFPP_DOWNLOAD_CCD)
|
||||
endif()
|
||||
|
||||
if(UNIX)
|
||||
option(CIFPP_INSTALL_UPDATE_SCRIPT "Install the script to update CCD and dictionary files" OFF)
|
||||
set(CIFPP_CACHE_DIR "/var/cache/libcifpp" CACHE STRING "The cache directory to use")
|
||||
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
|
||||
endif()
|
||||
|
||||
generate_export_header(cifpp
|
||||
EXPORT_FILE_NAME cif++/Cif++Export.hpp)
|
||||
|
||||
set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} )
|
||||
set(LIBRARY_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} )
|
||||
set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
set(LIBRARY_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR})
|
||||
set(SHARE_INSTALL_DIR ${CMAKE_INSTALL_DATADIR}/libcifpp)
|
||||
|
||||
set(CIFPP_DATA_DIR "${CMAKE_INSTALL_PREFIX}/${SHARE_INSTALL_DIR}" CACHE STRING "The directory containing the provided data files")
|
||||
@@ -333,7 +303,6 @@ set(CIFPP_DATA_DIR "${CMAKE_INSTALL_PREFIX}/${SHARE_INSTALL_DIR}" CACHE STRING "
|
||||
target_compile_definitions(cifpp PUBLIC DATA_DIR="${CIFPP_DATA_DIR}")
|
||||
|
||||
# Install rules
|
||||
|
||||
install(TARGETS cifpp
|
||||
EXPORT cifppTargets
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
@@ -341,6 +310,13 @@ install(TARGETS cifpp
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:${PROJECT_NAME}>
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
OPTIONAL)
|
||||
endif()
|
||||
|
||||
install(EXPORT cifppTargets
|
||||
FILE "cifppTargets.cmake"
|
||||
NAMESPACE cifpp::
|
||||
@@ -354,27 +330,29 @@ install(
|
||||
)
|
||||
|
||||
install(
|
||||
FILES "${CMAKE_CURRENT_BINARY_DIR}/cif++/Cif++Export.hpp"
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/cif++
|
||||
FILES include/cif++.hpp
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
|
||||
COMPONENT Devel
|
||||
)
|
||||
|
||||
install(FILES
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx_v50.dic
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic
|
||||
${COMPONENTS_CIF}
|
||||
DESTINATION ${SHARE_INSTALL_DIR}
|
||||
DESTINATION ${CIFPP_DATA_DIR}
|
||||
)
|
||||
|
||||
configure_package_config_file(Config.cmake.in
|
||||
configure_package_config_file(
|
||||
${PROJECT_SOURCE_DIR}/cmake/cifppConfig.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfig.cmake
|
||||
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
|
||||
PATH_VARS INCLUDE_INSTALL_DIR LIBRARY_INSTALL_DIR SHARE_INSTALL_DIR
|
||||
)
|
||||
|
||||
install(FILES
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfig.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfigVersion.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfig.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfigVersion.cmake"
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
|
||||
COMPONENT Devel
|
||||
)
|
||||
@@ -386,21 +364,20 @@ set_target_properties(cifpp PROPERTIES
|
||||
INTERFACE_cifpp_MAJOR_VERSION ${cifpp_MAJOR_VERSION})
|
||||
|
||||
set_property(TARGET cifpp APPEND PROPERTY
|
||||
COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION
|
||||
COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION
|
||||
)
|
||||
|
||||
write_basic_package_version_file(
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfigVersion.cmake"
|
||||
VERSION ${PROJECT_VERSION}
|
||||
COMPATIBILITY AnyNewerVersion
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfigVersion.cmake"
|
||||
VERSION ${PROJECT_VERSION}
|
||||
COMPATIBILITY AnyNewerVersion
|
||||
)
|
||||
|
||||
# pkgconfig support
|
||||
|
||||
set(prefix ${CMAKE_INSTALL_PREFIX})
|
||||
set(prefix ${CMAKE_INSTALL_PREFIX})
|
||||
set(exec_prefix ${CMAKE_INSTALL_PREFIX})
|
||||
set(libdir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
|
||||
set(includedir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})
|
||||
set(libdir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
|
||||
set(includedir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libcifpp.pc.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc.in @ONLY)
|
||||
@@ -408,38 +385,26 @@ file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc
|
||||
INPUT ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc.in)
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
|
||||
|
||||
# Unit tests
|
||||
if(ENABLE_TESTING)
|
||||
enable_testing()
|
||||
|
||||
option(CIFPP_BUILD_TESTS "Build test exectuables" OFF)
|
||||
find_package(Boost REQUIRED headers)
|
||||
|
||||
if(CIFPP_BUILD_TESTS)
|
||||
|
||||
if(CIFPP_USE_RSRC)
|
||||
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/cifpp_test_rsrc.obj
|
||||
COMMAND ${MRC} -o ${CMAKE_CURRENT_BINARY_DIR}/cifpp_test_rsrc.obj ${CMAKE_SOURCE_DIR}/rsrc/mmcif_pdbx_v50.dic ${COFF_SPEC}
|
||||
)
|
||||
set(CIFPP_TEST_RESOURCE ${CMAKE_CURRENT_BINARY_DIR}/cifpp_test_rsrc.obj)
|
||||
endif()
|
||||
|
||||
list(APPEND CIFPP_tests
|
||||
# pdb2cif
|
||||
rename-compound
|
||||
structure
|
||||
unit)
|
||||
list(APPEND CIFPP_tests unit-v2 unit-3d format model rename-compound sugar
|
||||
)
|
||||
|
||||
foreach(CIFPP_TEST IN LISTS CIFPP_tests)
|
||||
set(CIFPP_TEST "${CIFPP_TEST}-test")
|
||||
set(CIFPP_TEST_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/test/${CIFPP_TEST}.cpp")
|
||||
|
||||
add_executable(${CIFPP_TEST} ${CIFPP_TEST_SOURCE} ${CIFPP_TEST_RESOURCE})
|
||||
add_executable(${CIFPP_TEST} ${CIFPP_TEST_SOURCE})
|
||||
|
||||
target_include_directories(${CIFPP_TEST} PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
${CMAKE_CURRENT_BINARY_DIR} # for config.h
|
||||
)
|
||||
target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp::cifpp Boost::headers)
|
||||
|
||||
if(CIFPP_USE_RSRC)
|
||||
mrc_target_resources(${CIFPP_TEST} ${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic)
|
||||
endif()
|
||||
|
||||
target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp ${Boost_LIBRARIES} std::filesystem ${ZLIB_LIBRARIES} ${BZip2_LIBRARIES})
|
||||
|
||||
if(MSVC)
|
||||
# Specify unwind semantics so that MSVC knowns how to handle exceptions
|
||||
target_compile_options(${CIFPP_TEST} PRIVATE /EHsc)
|
||||
@@ -453,18 +418,16 @@ if(CIFPP_BUILD_TESTS)
|
||||
|
||||
add_test(NAME ${CIFPP_TEST}
|
||||
COMMAND $<TARGET_FILE:${CIFPP_TEST}> -- ${PROJECT_SOURCE_DIR}/test)
|
||||
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
message("Will install in ${CMAKE_INSTALL_PREFIX}")
|
||||
|
||||
# Optionally install the update scripts for CCD and dictionary files
|
||||
|
||||
if(CIFPP_INSTALL_UPDATE_SCRIPT)
|
||||
set(CIFPP_CRON_DIR "$ENV{DESTDIR}/etc/cron.weekly")
|
||||
|
||||
configure_file(${CMAKE_SOURCE_DIR}/tools/update-libcifpp-data.in update-libcifpp-data @ONLY)
|
||||
configure_file(${PROJECT_SOURCE_DIR}/tools/update-libcifpp-data.in update-libcifpp-data @ONLY)
|
||||
install(
|
||||
FILES ${CMAKE_CURRENT_BINARY_DIR}/update-libcifpp-data
|
||||
DESTINATION ${CIFPP_CRON_DIR}
|
||||
@@ -485,4 +448,3 @@ if(CIFPP_INSTALL_UPDATE_SCRIPT)
|
||||
|
||||
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
|
||||
endif()
|
||||
|
||||
|
||||
5
LICENSE
5
LICENSE
@@ -1,6 +1,7 @@
|
||||
SPDX-License-Identifier: BSD-2-Clause
|
||||
BSD-2-Clause License
|
||||
|
||||
Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
@@ -20,4 +21,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
@@ -12,7 +12,6 @@ have been used as well as MSVC version 2019.
|
||||
|
||||
Other requirements are:
|
||||
|
||||
- Boost libraries, at least version 1.70
|
||||
- [mrc](https://github.com/mhekkel/mrc), a resource compiler that
|
||||
allows including data files into the executable making them easier to
|
||||
install. Strictly this is optional, but at the expense of functionality.
|
||||
|
||||
59
changelog
59
changelog
@@ -1,3 +1,62 @@
|
||||
Version 5.0.0
|
||||
- Total rewrite of cif part
|
||||
- Removed DSSP code, moved into dssp project itself
|
||||
|
||||
Version 4.2.1
|
||||
- Improved REMARK 3 parser (for TLS in large molecules)
|
||||
|
||||
Version 4.2.0
|
||||
- Yet another rewrite of resource loading
|
||||
|
||||
Version 4.1.1
|
||||
- Fall back to zero charge for scattering factors if the atom
|
||||
was not found in the table.
|
||||
- Improve code to locate resources, failing less.
|
||||
|
||||
Version 4.1.0
|
||||
- Some interface changes for mmcif::Atom
|
||||
|
||||
Version 4.0.1
|
||||
- Added a bunch of const methods to Datablock and Category.
|
||||
- Changed PDB writing interface to accept Datablock instead of File.
|
||||
|
||||
Version 4.0.0
|
||||
- getResidue in mmcif::Structure now requires both a
|
||||
sequence ID and an auth sequence ID. As a result the code was cleaned
|
||||
up considerably.
|
||||
|
||||
Version 3.0.5
|
||||
- mmcif::Structure redesign. It is now a wrapper around a cif::Datablock.
|
||||
|
||||
Version 3.0.4
|
||||
- Fix in mmCIF parser, now correctly handles the unquoted
|
||||
string ??
|
||||
|
||||
Version 3.0.3
|
||||
- Better configuration checks, for atomic e.g.
|
||||
- Fixed a problem introduced in refactoring mmcif::Atom
|
||||
- Version string creation
|
||||
|
||||
Version 3.0.2
|
||||
- refactored mmcif::Atom for performance reasons
|
||||
|
||||
Version 3.0.1
|
||||
- Fixed processing of proline restraints file from CCP4, proline
|
||||
is a peptide, really.
|
||||
- Added code to facilitate DSSP
|
||||
|
||||
Version 3.0.0
|
||||
- Replaced many strings in the API with string_view for
|
||||
performance reasons.
|
||||
- Upgraded mmcif::Structure
|
||||
- various other small fixes
|
||||
|
||||
Version 2.0.5
|
||||
- Backporting updated CMakeLists.txt file
|
||||
|
||||
Version 2.0.4
|
||||
- Reverted a too strict test when reading cif files.
|
||||
|
||||
Version 2.0.3
|
||||
- Fixed reading mmCIF files where model numbers are used and
|
||||
model number 1 is missing.
|
||||
|
||||
27
cmake/AddGitSubmodule.cmake
Normal file
27
cmake/AddGitSubmodule.cmake
Normal file
@@ -0,0 +1,27 @@
|
||||
cmake_minimum_required(VERSION 3.16..3.19)
|
||||
|
||||
function(add_git_submodule dir)
|
||||
# add a Git submodule directory to CMake, assuming the
|
||||
# Git submodule directory is a CMake project.
|
||||
#
|
||||
# Usage: in CMakeLists.txt
|
||||
#
|
||||
# include(AddGitSubmodule.cmake)
|
||||
# add_git_submodule(mysubmod_dir)
|
||||
find_package(Git REQUIRED)
|
||||
|
||||
if(NOT EXISTS ${dir}/CMakeLists.txt)
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.19)
|
||||
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive -- ${dir}
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
COMMAND_ERROR_IS_FATAL ANY)
|
||||
else()
|
||||
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive -- ${dir}
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(ENABLE_TESTING OFF)
|
||||
|
||||
add_subdirectory(${dir} ${ARGV})
|
||||
endfunction(add_git_submodule)
|
||||
62
cmake/FindAtomic.cmake
Normal file
62
cmake/FindAtomic.cmake
Normal file
@@ -0,0 +1,62 @@
|
||||
# Simple check to see if we need a library for std::atomic
|
||||
|
||||
if(TARGET std::atomic)
|
||||
return()
|
||||
endif()
|
||||
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
include(CMakePushCheckState)
|
||||
include(CheckIncludeFileCXX)
|
||||
include(CheckCXXSourceRuns)
|
||||
|
||||
cmake_push_check_state()
|
||||
|
||||
check_include_file_cxx("atomic" _CXX_ATOMIC_HAVE_HEADER)
|
||||
mark_as_advanced(_CXX_ATOMIC_HAVE_HEADER)
|
||||
|
||||
set(code [[
|
||||
#include <atomic>
|
||||
int main(int argc, char** argv) {
|
||||
std::atomic<long long> s;
|
||||
++s;
|
||||
return 0;
|
||||
}
|
||||
]])
|
||||
|
||||
check_cxx_source_runs("${code}" _CXX_ATOMIC_BUILTIN)
|
||||
|
||||
if(_CXX_ATOMIC_BUILTIN)
|
||||
set(_found 1)
|
||||
else()
|
||||
list(APPEND CMAKE_REQUIRED_LIBRARIES atomic)
|
||||
list(APPEND FOLLY_LINK_LIBRARIES atomic)
|
||||
|
||||
check_cxx_source_runs("${code}" _CXX_ATOMIC_LIB_NEEDED)
|
||||
if (NOT _CXX_ATOMIC_LIB_NEEDED)
|
||||
message(FATAL_ERROR "unable to link C++ std::atomic code: you may need \
|
||||
to install GNU libatomic")
|
||||
else()
|
||||
set(_found 1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(_found)
|
||||
add_library(std::atomic INTERFACE IMPORTED)
|
||||
set_property(TARGET std::atomic APPEND PROPERTY INTERFACE_COMPILE_FEATURES cxx_std_14)
|
||||
|
||||
if(_CXX_ATOMIC_BUILTIN)
|
||||
# Nothing to add...
|
||||
elseif(_CXX_ATOMIC_LIB_NEEDED)
|
||||
set_target_properties(std::atomic PROPERTIES IMPORTED_LIBNAME atomic)
|
||||
set(STDCPPATOMIC_LIBRARY atomic)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
cmake_pop_check_state()
|
||||
|
||||
set(Atomic_FOUND ${_found} CACHE BOOL "TRUE if we can run a program using std::atomic" FORCE)
|
||||
|
||||
if(Atomic_FIND_REQUIRED AND NOT Atomic_FOUND)
|
||||
message(FATAL_ERROR "Cannot run simple program using std::atomic")
|
||||
endif()
|
||||
@@ -12,8 +12,6 @@ include(CheckCXXSourceCompiles)
|
||||
|
||||
cmake_push_check_state()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
check_include_file_cxx("filesystem" _CXX_FILESYSTEM_HAVE_HEADER)
|
||||
mark_as_advanced(_CXX_FILESYSTEM_HAVE_HEADER)
|
||||
|
||||
@@ -59,8 +57,10 @@ if(_found)
|
||||
# Nothing to add...
|
||||
elseif(CXX_FILESYSTEM_STDCPPFS_NEEDED)
|
||||
set_target_properties(std::filesystem PROPERTIES IMPORTED_LIBNAME stdc++fs)
|
||||
set(STDCPPFS_LIBRARY stdc++fs)
|
||||
elseif(CXX_FILESYSTEM_CPPFS_NEEDED)
|
||||
set_target_properties(std::filesystem PROPERTIES IMPORTED_LIBNAME c++fs)
|
||||
set(STDCPPFS_LIBRARY c++fs)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
77
cmake/VersionString.cmake
Normal file
77
cmake/VersionString.cmake
Normal file
@@ -0,0 +1,77 @@
|
||||
# SPDX-License-Identifier: BSD-2-Clause
|
||||
|
||||
# Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
|
||||
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
|
||||
# Create a revision file, containing the current git version info, if any
|
||||
function(write_version_header)
|
||||
include(GetGitRevisionDescription)
|
||||
if(NOT(GIT-NOTFOUND OR HEAD-HASH-NOTFOUND))
|
||||
git_describe_working_tree(BUILD_VERSION_STRING --match=build --dirty)
|
||||
|
||||
if(BUILD_VERSION_STRING MATCHES "build-([0-9]+)-g([0-9a-f]+)(-dirty)?")
|
||||
set(BUILD_GIT_TAGREF "${CMAKE_MATCH_2}")
|
||||
if(CMAKE_MATCH_3)
|
||||
set(BUILD_VERSION_STRING "${CMAKE_MATCH_1}*")
|
||||
else()
|
||||
set(BUILD_VERSION_STRING "${CMAKE_MATCH_1}")
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
set(BUILD_VERSION_STRING "no git info available")
|
||||
endif()
|
||||
|
||||
include_directories(${CMAKE_BINARY_DIR} PRIVATE)
|
||||
string(TIMESTAMP BUILD_DATE_TIME "%Y-%m-%dT%H:%M:%SZ" UTC)
|
||||
|
||||
if(ARGC GREATER 0)
|
||||
set(VAR_PREFIX "${ARGV0}")
|
||||
endif()
|
||||
|
||||
file(WRITE "${CMAKE_BINARY_DIR}/revision.hpp.in" [[// Generated revision file
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ostream>
|
||||
|
||||
const char k@VAR_PREFIX@ProjectName[] = "@PROJECT_NAME@";
|
||||
const char k@VAR_PREFIX@VersionNumber[] = "@PROJECT_VERSION@";
|
||||
const char k@VAR_PREFIX@VersionGitTag[] = "@BUILD_GIT_TAGREF@";
|
||||
const char k@VAR_PREFIX@BuildInfo[] = "@BUILD_VERSION_STRING@";
|
||||
const char k@VAR_PREFIX@BuildDate[] = "@BUILD_DATE_TIME@";
|
||||
|
||||
inline void write_version_string(std::ostream &os, bool verbose)
|
||||
{
|
||||
os << k@VAR_PREFIX@ProjectName << " version " << k@VAR_PREFIX@VersionNumber << std::endl;
|
||||
if (verbose)
|
||||
{
|
||||
os << "build: " << k@VAR_PREFIX@BuildInfo << ' ' << k@VAR_PREFIX@BuildDate << std::endl;
|
||||
if (k@VAR_PREFIX@VersionGitTag[0] != 0)
|
||||
os << "git tag: " << k@VAR_PREFIX@VersionGitTag << std::endl;
|
||||
}
|
||||
}
|
||||
]])
|
||||
configure_file("${CMAKE_BINARY_DIR}/revision.hpp.in" "${CMAKE_BINARY_DIR}/revision.hpp" @ONLY)
|
||||
endfunction()
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
@PACKAGE_INIT@
|
||||
|
||||
include(CMakeFindDependencyMacro)
|
||||
find_dependency(Boost 1.70.0 REQUIRED COMPONENTS system iostreams regex program_options)
|
||||
if(NOT WIN32)
|
||||
find_dependency(ZLIB)
|
||||
find_dependency(BZip2)
|
||||
endif()
|
||||
find_dependency(Threads)
|
||||
|
||||
find_dependency(ZLIB REQUIRED)
|
||||
find_dependency(LibLZMA REQUIRED)
|
||||
|
||||
INCLUDE("${CMAKE_CURRENT_LIST_DIR}/cifppTargets.cmake")
|
||||
|
||||
18
cmake/test-rx.cpp
Normal file
18
cmake/test-rx.cpp
Normal file
@@ -0,0 +1,18 @@
|
||||
// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86164
|
||||
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
|
||||
int main()
|
||||
{
|
||||
std::string s(100'000, '*');
|
||||
std::smatch m;
|
||||
std::regex r("^(.*?)$");
|
||||
|
||||
std::regex_search(s, m, r);
|
||||
|
||||
std::cout << s.substr(0, 10) << std::endl;
|
||||
std::cout << m.str(1).substr(0, 10) << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -2394,3 +2394,189 @@ VAL "Create component" 1999-07-08 RCSB
|
||||
VAL "Modify descriptor" 2011-06-04 RCSB
|
||||
#
|
||||
|
||||
data_NAG
|
||||
#
|
||||
|
||||
_chem_comp.id NAG
|
||||
_chem_comp.name 2-acetamido-2-deoxy-beta-D-glucopyranose
|
||||
_chem_comp.type "D-saccharide, beta linking"
|
||||
_chem_comp.pdbx_type ATOMS
|
||||
_chem_comp.formula "C8 H15 N O6"
|
||||
_chem_comp.mon_nstd_parent_comp_id ?
|
||||
_chem_comp.pdbx_synonyms
|
||||
;N-acetyl-beta-D-glucosamine; 2-acetamido-2-deoxy-beta-D-glucose; 2-acetamido-2-deoxy-D-glucose;
|
||||
2-acetamido-2-deoxy-glucose; N-ACETYL-D-GLUCOSAMINE
|
||||
;
|
||||
|
||||
_chem_comp.pdbx_formal_charge 0
|
||||
_chem_comp.pdbx_initial_date 1999-07-08
|
||||
_chem_comp.pdbx_modified_date 2020-07-17
|
||||
_chem_comp.pdbx_ambiguous_flag N
|
||||
_chem_comp.pdbx_release_status REL
|
||||
_chem_comp.pdbx_replaced_by ?
|
||||
_chem_comp.pdbx_replaces ?
|
||||
_chem_comp.formula_weight 221.208
|
||||
_chem_comp.one_letter_code ?
|
||||
_chem_comp.three_letter_code NAG
|
||||
_chem_comp.pdbx_model_coordinates_details ?
|
||||
_chem_comp.pdbx_model_coordinates_missing_flag N
|
||||
_chem_comp.pdbx_ideal_coordinates_details Corina
|
||||
_chem_comp.pdbx_ideal_coordinates_missing_flag N
|
||||
_chem_comp.pdbx_model_coordinates_db_code 8PCH
|
||||
_chem_comp.pdbx_subcomponent_list ?
|
||||
_chem_comp.pdbx_processing_site RCSB
|
||||
# #
|
||||
loop_
|
||||
_pdbx_chem_comp_synonyms.ordinal
|
||||
_pdbx_chem_comp_synonyms.comp_id
|
||||
_pdbx_chem_comp_synonyms.name
|
||||
_pdbx_chem_comp_synonyms.provenance
|
||||
_pdbx_chem_comp_synonyms.type
|
||||
1 NAG N-acetyl-beta-D-glucosamine PDB ?
|
||||
2 NAG 2-acetamido-2-deoxy-beta-D-glucose PDB ?
|
||||
3 NAG 2-acetamido-2-deoxy-D-glucose PDB ?
|
||||
4 NAG 2-acetamido-2-deoxy-glucose PDB ?
|
||||
5 NAG N-ACETYL-D-GLUCOSAMINE PDB ?
|
||||
# #
|
||||
loop_
|
||||
_chem_comp_atom.comp_id
|
||||
_chem_comp_atom.atom_id
|
||||
_chem_comp_atom.alt_atom_id
|
||||
_chem_comp_atom.type_symbol
|
||||
_chem_comp_atom.charge
|
||||
_chem_comp_atom.pdbx_align
|
||||
_chem_comp_atom.pdbx_aromatic_flag
|
||||
_chem_comp_atom.pdbx_leaving_atom_flag
|
||||
_chem_comp_atom.pdbx_stereo_config
|
||||
_chem_comp_atom.model_Cartn_x
|
||||
_chem_comp_atom.model_Cartn_y
|
||||
_chem_comp_atom.model_Cartn_z
|
||||
_chem_comp_atom.pdbx_model_Cartn_x_ideal
|
||||
_chem_comp_atom.pdbx_model_Cartn_y_ideal
|
||||
_chem_comp_atom.pdbx_model_Cartn_z_ideal
|
||||
_chem_comp_atom.pdbx_component_atom_id
|
||||
_chem_comp_atom.pdbx_component_comp_id
|
||||
_chem_comp_atom.pdbx_ordinal
|
||||
NAG C1 C1 C 0 1 N N R 7.396 28.163 26.662 0.185 1.082 -0.421 C1 NAG 1
|
||||
NAG C2 C2 C 0 1 N N R 6.973 29.233 27.644 0.790 -0.220 0.112 C2 NAG 2
|
||||
NAG C3 C3 C 0 1 N N R 7.667 29.055 29.000 -0.124 -1.390 -0.265 C3 NAG 3
|
||||
NAG C4 C4 C 0 1 N N S 7.573 27.588 29.490 -1.526 -1.129 0.294 C4 NAG 4
|
||||
NAG C5 C5 C 0 1 N N R 7.902 26.592 28.373 -2.042 0.207 -0.246 C5 NAG 5
|
||||
NAG C6 C6 C 0 1 N N N 7.599 25.173 28.797 -3.417 0.504 0.355 C6 NAG 6
|
||||
NAG C7 C7 C 0 1 N N N 6.291 31.299 26.595 3.197 0.157 0.076 C7 NAG 7
|
||||
NAG C8 C8 C 0 1 N N N 6.684 32.649 26.036 4.559 -0.052 -0.533 C8 NAG 8
|
||||
NAG N2 N2 N 0 1 N N N 7.268 30.545 27.089 2.114 -0.422 -0.480 N2 NAG 9
|
||||
NAG O1 O1 O 0 1 N Y N 6.676 28.363 25.419 1.003 2.185 -0.024 O1 NAG 10
|
||||
NAG O3 O3 O 0 1 N N N 7.038 29.909 29.947 0.395 -2.600 0.291 O3 NAG 11
|
||||
NAG O4 O4 O 0 1 N N N 8.494 27.358 30.574 -2.405 -2.180 -0.114 O4 NAG 12
|
||||
NAG O5 O5 O 0 1 N N N 7.104 26.875 27.206 -1.130 1.248 0.113 O5 NAG 13
|
||||
NAG O6 O6 O 0 1 N N N 6.232 25.040 29.165 -3.949 1.691 -0.236 O6 NAG 14
|
||||
NAG O7 O7 O 0 1 N N N 5.114 30.936 26.562 3.074 0.845 1.067 O7 NAG 15
|
||||
NAG H1 H1 H 0 1 N N N 8.477 28.257 26.481 0.133 1.040 -1.509 H1 NAG 16
|
||||
NAG H2 H2 H 0 1 N N N 5.888 29.146 27.803 0.879 -0.163 1.197 H2 NAG 17
|
||||
NAG H3 H3 H 0 1 N N N 8.729 29.321 28.892 -0.174 -1.478 -1.350 H3 NAG 18
|
||||
NAG H4 H4 H 0 1 N N N 6.544 27.403 29.831 -1.483 -1.091 1.382 H4 NAG 19
|
||||
NAG H5 H5 H 0 1 N N N 8.971 26.674 28.128 -2.123 0.154 -1.332 H5 NAG 20
|
||||
NAG H61 H61 H 0 1 N N N 7.816 24.492 27.961 -4.088 -0.333 0.157 H61 NAG 21
|
||||
NAG H62 H62 H 0 1 N N N 8.232 24.910 29.657 -3.320 0.645 1.431 H62 NAG 22
|
||||
NAG H81 H81 H 0 1 N N N 5.791 33.159 25.646 4.560 0.320 -1.558 H81 NAG 23
|
||||
NAG H82 H82 H 0 1 N N N 7.136 33.258 26.833 5.305 0.490 0.050 H82 NAG 24
|
||||
NAG H83 H83 H 0 1 N N N 7.411 32.511 25.222 4.799 -1.115 -0.532 H83 NAG 25
|
||||
NAG HN2 HN2 H 0 1 N N N 8.210 30.881 27.079 2.212 -0.973 -1.273 HN2 NAG 26
|
||||
NAG HO1 HO1 H 0 1 N Y N 6.933 27.696 24.793 0.679 3.044 -0.328 HO1 NAG 27
|
||||
NAG HO3 HO3 H 0 1 N Y N 7.459 29.809 30.793 -0.135 -3.384 0.091 HO3 NAG 28
|
||||
NAG HO4 HO4 H 0 1 N Y N 8.425 26.456 30.863 -3.312 -2.079 0.206 HO4 NAG 29
|
||||
NAG HO6 HO6 H 0 1 N Y N 6.060 24.143 29.428 -4.822 1.940 0.099 HO6 NAG 30
|
||||
# #
|
||||
loop_
|
||||
_chem_comp_bond.comp_id
|
||||
_chem_comp_bond.atom_id_1
|
||||
_chem_comp_bond.atom_id_2
|
||||
_chem_comp_bond.value_order
|
||||
_chem_comp_bond.pdbx_aromatic_flag
|
||||
_chem_comp_bond.pdbx_stereo_config
|
||||
_chem_comp_bond.pdbx_ordinal
|
||||
NAG C1 C2 SING N N 1
|
||||
NAG C1 O1 SING N N 2
|
||||
NAG C1 O5 SING N N 3
|
||||
NAG C1 H1 SING N N 4
|
||||
NAG C2 C3 SING N N 5
|
||||
NAG C2 N2 SING N N 6
|
||||
NAG C2 H2 SING N N 7
|
||||
NAG C3 C4 SING N N 8
|
||||
NAG C3 O3 SING N N 9
|
||||
NAG C3 H3 SING N N 10
|
||||
NAG C4 C5 SING N N 11
|
||||
NAG C4 O4 SING N N 12
|
||||
NAG C4 H4 SING N N 13
|
||||
NAG C5 C6 SING N N 14
|
||||
NAG C5 O5 SING N N 15
|
||||
NAG C5 H5 SING N N 16
|
||||
NAG C6 O6 SING N N 17
|
||||
NAG C6 H61 SING N N 18
|
||||
NAG C6 H62 SING N N 19
|
||||
NAG C7 C8 SING N N 20
|
||||
NAG C7 N2 SING N N 21
|
||||
NAG C7 O7 DOUB N N 22
|
||||
NAG C8 H81 SING N N 23
|
||||
NAG C8 H82 SING N N 24
|
||||
NAG C8 H83 SING N N 25
|
||||
NAG N2 HN2 SING N N 26
|
||||
NAG O1 HO1 SING N N 27
|
||||
NAG O3 HO3 SING N N 28
|
||||
NAG O4 HO4 SING N N 29
|
||||
NAG O6 HO6 SING N N 30
|
||||
# #
|
||||
loop_
|
||||
_pdbx_chem_comp_descriptor.comp_id
|
||||
_pdbx_chem_comp_descriptor.type
|
||||
_pdbx_chem_comp_descriptor.program
|
||||
_pdbx_chem_comp_descriptor.program_version
|
||||
_pdbx_chem_comp_descriptor.descriptor
|
||||
NAG SMILES ACDLabs 12.01 "O=C(NC1C(O)C(O)C(OC1O)CO)C"
|
||||
NAG InChI InChI 1.03 "InChI=1S/C8H15NO6/c1-3(11)9-5-7(13)6(12)4(2-10)15-8(5)14/h4-8,10,12-14H,2H2,1H3,(H,9,11)/t4-,5-,6-,7-,8-/m1/s1"
|
||||
NAG InChIKey InChI 1.03 OVRNDRQMDRJTHS-FMDGEEDCSA-N
|
||||
NAG SMILES_CANONICAL CACTVS 3.370 "CC(=O)N[C@H]1[C@H](O)O[C@H](CO)[C@@H](O)[C@@H]1O"
|
||||
NAG SMILES CACTVS 3.370 "CC(=O)N[CH]1[CH](O)O[CH](CO)[CH](O)[CH]1O"
|
||||
NAG SMILES_CANONICAL "OpenEye OEToolkits" 1.7.6 "CC(=O)N[C@@H]1[C@H]([C@@H]([C@H](O[C@H]1O)CO)O)O"
|
||||
NAG SMILES "OpenEye OEToolkits" 1.7.6 "CC(=O)NC1C(C(C(OC1O)CO)O)O"
|
||||
# #
|
||||
loop_
|
||||
_pdbx_chem_comp_identifier.comp_id
|
||||
_pdbx_chem_comp_identifier.type
|
||||
_pdbx_chem_comp_identifier.program
|
||||
_pdbx_chem_comp_identifier.program_version
|
||||
_pdbx_chem_comp_identifier.identifier
|
||||
NAG "SYSTEMATIC NAME" ACDLabs 12.01 "2-(acetylamino)-2-deoxy-beta-D-glucopyranose"
|
||||
NAG "SYSTEMATIC NAME" "OpenEye OEToolkits" 1.7.6 "N-[(2R,3R,4R,5S,6R)-6-(hydroxymethyl)-2,4,5-tris(oxidanyl)oxan-3-yl]ethanamide"
|
||||
NAG "CONDENSED IUPAC CARBOHYDRATE SYMBOL" GMML 1.0 DGlcpNAcb
|
||||
NAG "COMMON NAME" GMML 1.0 N-acetyl-b-D-glucopyranosamine
|
||||
NAG "IUPAC CARBOHYDRATE SYMBOL" PDB-CARE 1.0 b-D-GlcpNAc
|
||||
NAG "SNFG CARBOHYDRATE SYMBOL" GMML 1.0 GlcNAc
|
||||
# #
|
||||
loop_
|
||||
_pdbx_chem_comp_feature.comp_id
|
||||
_pdbx_chem_comp_feature.type
|
||||
_pdbx_chem_comp_feature.value
|
||||
_pdbx_chem_comp_feature.source
|
||||
_pdbx_chem_comp_feature.support
|
||||
NAG "CARBOHYDRATE ISOMER" D PDB ?
|
||||
NAG "CARBOHYDRATE RING" pyranose PDB ?
|
||||
NAG "CARBOHYDRATE ANOMER" beta PDB ?
|
||||
NAG "CARBOHYDRATE PRIMARY CARBONYL GROUP" aldose PDB ?
|
||||
# #
|
||||
loop_
|
||||
_pdbx_chem_comp_audit.comp_id
|
||||
_pdbx_chem_comp_audit.action_type
|
||||
_pdbx_chem_comp_audit.date
|
||||
_pdbx_chem_comp_audit.processing_site
|
||||
NAG "Create component" 1999-07-08 RCSB
|
||||
NAG "Modify descriptor" 2011-06-04 RCSB
|
||||
NAG "Modify leaving atom flag" 2011-07-01 RCSB
|
||||
NAG "Modify leaving atom flag" 2012-11-26 RCSB
|
||||
NAG "Other modification" 2019-08-12 RCSB
|
||||
NAG "Other modification" 2019-12-19 RCSB
|
||||
NAG "Other modification" 2020-07-03 RCSB
|
||||
NAG "Modify name" 2020-07-17 RCSB
|
||||
NAG "Modify synonyms" 2020-07-17 RCSB
|
||||
##
|
||||
@@ -1,28 +1,24 @@
|
||||
#include <iostream>
|
||||
#include <filesystem>
|
||||
|
||||
#include <cif++/Cif++.hpp>
|
||||
#include <cif++.hpp>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
int main()
|
||||
{
|
||||
fs::path in("1cbs.cif.gz");
|
||||
|
||||
cif::File file;
|
||||
|
||||
file.loadDictionary("mmcif_pdbx_v50");
|
||||
|
||||
cif::file file;
|
||||
file.load("1cbs.cif.gz");
|
||||
|
||||
auto& db = file.firstDatablock()["atom_site"];
|
||||
auto n = db.find(cif::Key("label_atom_id") == "OXT").size();
|
||||
auto& db = file.front();
|
||||
auto &atom_site = db["atom_site"];
|
||||
auto n = atom_site.find(cif::key("label_atom_id") == "OXT").size();
|
||||
|
||||
std::cout << "File contains " << db.size() << " atoms of which " << n << (n == 1 ? " is" : " are") << " OXT" << std::endl
|
||||
std::cout << "File contains " << atom_site.size() << " atoms of which " << n << (n == 1 ? " is" : " are") << " OXT" << std::endl
|
||||
<< "residues with an OXT are:" << std::endl;
|
||||
|
||||
for (const auto& [asym, comp, seqnr]: db.find<std::string,std::string,int>(
|
||||
cif::Key("label_atom_id") == "OXT", "label_asym_id", "label_comp_id", "label_seq_id"))
|
||||
for (const auto& [asym, comp, seqnr]: atom_site.find<std::string,std::string,int>(
|
||||
cif::key("label_atom_id") == "OXT", "label_asym_id", "label_comp_id", "label_seq_id"))
|
||||
{
|
||||
std::cout << asym << ' ' << comp << ' ' << seqnr << std::endl;
|
||||
}
|
||||
|
||||
1
gxrio
Submodule
1
gxrio
Submodule
Submodule gxrio added at a7bb5b5c4b
40
include/cif++.hpp
Normal file
40
include/cif++.hpp
Normal file
@@ -0,0 +1,40 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++/utilities.hpp>
|
||||
#include <cif++/file.hpp>
|
||||
#include <cif++/parser.hpp>
|
||||
#include <cif++/format.hpp>
|
||||
|
||||
#include <cif++/compound.hpp>
|
||||
#include <cif++/point.hpp>
|
||||
#include <cif++/symmetry.hpp>
|
||||
|
||||
#include <cif++/model.hpp>
|
||||
|
||||
#include <cif++/pdb/io.hpp>
|
||||
@@ -1,245 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
// Lib for working with structures as contained in mmCIF and PDB files
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
enum AtomType : uint8_t
|
||||
{
|
||||
Nn = 0, // Unknown
|
||||
|
||||
H = 1, // Hydrogen
|
||||
He = 2, // Helium
|
||||
|
||||
Li = 3, // Lithium
|
||||
Be = 4, // Beryllium
|
||||
B = 5, // Boron
|
||||
C = 6, // Carbon
|
||||
N = 7, // Nitrogen
|
||||
O = 8, // Oxygen
|
||||
F = 9, // Fluorine
|
||||
Ne = 10, // Neon
|
||||
|
||||
Na = 11, // Sodium
|
||||
Mg = 12, // Magnesium
|
||||
Al = 13, // Aluminium
|
||||
Si = 14, // Silicon
|
||||
P = 15, // Phosphorus
|
||||
S = 16, // Sulfur
|
||||
Cl = 17, // Chlorine
|
||||
Ar = 18, // Argon
|
||||
|
||||
K = 19, // Potassium
|
||||
Ca = 20, // Calcium
|
||||
Sc = 21, // Scandium
|
||||
Ti = 22, // Titanium
|
||||
V = 23, // Vanadium
|
||||
Cr = 24, // Chromium
|
||||
Mn = 25, // Manganese
|
||||
Fe = 26, // Iron
|
||||
Co = 27, // Cobalt
|
||||
Ni = 28, // Nickel
|
||||
Cu = 29, // Copper
|
||||
Zn = 30, // Zinc
|
||||
Ga = 31, // Gallium
|
||||
Ge = 32, // Germanium
|
||||
As = 33, // Arsenic
|
||||
Se = 34, // Selenium
|
||||
Br = 35, // Bromine
|
||||
Kr = 36, // Krypton
|
||||
|
||||
Rb = 37, // Rubidium
|
||||
Sr = 38, // Strontium
|
||||
Y = 39, // Yttrium
|
||||
Zr = 40, // Zirconium
|
||||
Nb = 41, // Niobium
|
||||
Mo = 42, // Molybdenum
|
||||
Tc = 43, // Technetium
|
||||
Ru = 44, // Ruthenium
|
||||
Rh = 45, // Rhodium
|
||||
Pd = 46, // Palladium
|
||||
Ag = 47, // Silver
|
||||
Cd = 48, // Cadmium
|
||||
In = 49, // Indium
|
||||
Sn = 50, // Tin
|
||||
Sb = 51, // Antimony
|
||||
Te = 52, // Tellurium
|
||||
I = 53, // Iodine
|
||||
Xe = 54, // Xenon
|
||||
Cs = 55, // Caesium
|
||||
Ba = 56, // Barium
|
||||
La = 57, // Lanthanum
|
||||
|
||||
Hf = 72, // Hafnium
|
||||
Ta = 73, // Tantalum
|
||||
W = 74, // Tungsten
|
||||
Re = 75, // Rhenium
|
||||
Os = 76, // Osmium
|
||||
Ir = 77, // Iridium
|
||||
Pt = 78, // Platinum
|
||||
Au = 79, // Gold
|
||||
Hg = 80, // Mercury
|
||||
Tl = 81, // Thallium
|
||||
Pb = 82, // Lead
|
||||
Bi = 83, // Bismuth
|
||||
Po = 84, // Polonium
|
||||
At = 85, // Astatine
|
||||
Rn = 86, // Radon
|
||||
Fr = 87, // Francium
|
||||
Ra = 88, // Radium
|
||||
Ac = 89, // Actinium
|
||||
|
||||
Rf = 104, // Rutherfordium
|
||||
Db = 105, // Dubnium
|
||||
Sg = 106, // Seaborgium
|
||||
Bh = 107, // Bohrium
|
||||
Hs = 108, // Hassium
|
||||
Mt = 109, // Meitnerium
|
||||
Ds = 110, // Darmstadtium
|
||||
Rg = 111, // Roentgenium
|
||||
Cn = 112, // Copernicium
|
||||
Nh = 113, // Nihonium
|
||||
Fl = 114, // Flerovium
|
||||
Mc = 115, // Moscovium
|
||||
Lv = 116, // Livermorium
|
||||
Ts = 117, // Tennessine
|
||||
Og = 118, // Oganesson
|
||||
|
||||
Ce = 58, // Cerium
|
||||
Pr = 59, // Praseodymium
|
||||
Nd = 60, // Neodymium
|
||||
Pm = 61, // Promethium
|
||||
Sm = 62, // Samarium
|
||||
Eu = 63, // Europium
|
||||
Gd = 64, // Gadolinium
|
||||
Tb = 65, // Terbium
|
||||
Dy = 66, // Dysprosium
|
||||
Ho = 67, // Holmium
|
||||
Er = 68, // Erbium
|
||||
Tm = 69, // Thulium
|
||||
Yb = 70, // Ytterbium
|
||||
Lu = 71, // Lutetium
|
||||
|
||||
Th = 90, // Thorium
|
||||
Pa = 91, // Protactinium
|
||||
U = 92, // Uranium
|
||||
Np = 93, // Neptunium
|
||||
Pu = 94, // Plutonium
|
||||
Am = 95, // Americium
|
||||
Cm = 96, // Curium
|
||||
Bk = 97, // Berkelium
|
||||
Cf = 98, // Californium
|
||||
Es = 99, // Einsteinium
|
||||
Fm = 100, // Fermium
|
||||
Md = 101, // Mendelevium
|
||||
No = 102, // Nobelium
|
||||
Lr = 103, // Lawrencium
|
||||
|
||||
D = 129, // Deuterium
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// AtomTypeInfo
|
||||
|
||||
enum RadiusType {
|
||||
eRadiusCalculated,
|
||||
eRadiusEmpirical,
|
||||
eRadiusCovalentEmpirical,
|
||||
|
||||
eRadiusSingleBond,
|
||||
eRadiusDoubleBond,
|
||||
eRadiusTripleBond,
|
||||
|
||||
eRadiusVanderWaals,
|
||||
|
||||
eRadiusTypeCount
|
||||
};
|
||||
|
||||
struct AtomTypeInfo
|
||||
{
|
||||
AtomType type;
|
||||
std::string name;
|
||||
std::string symbol;
|
||||
float weight;
|
||||
bool metal;
|
||||
float radii[eRadiusTypeCount];
|
||||
};
|
||||
|
||||
extern const AtomTypeInfo kKnownAtoms[];
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// AtomTypeTraits
|
||||
|
||||
class AtomTypeTraits
|
||||
{
|
||||
public:
|
||||
AtomTypeTraits(AtomType a);
|
||||
AtomTypeTraits(const std::string& symbol);
|
||||
|
||||
AtomType type() const { return mInfo->type; }
|
||||
std::string name() const { return mInfo->name; }
|
||||
std::string symbol() const { return mInfo->symbol; }
|
||||
float weight() const { return mInfo->weight; }
|
||||
|
||||
bool isMetal() const { return mInfo->metal; }
|
||||
|
||||
static bool isElement(const std::string& symbol);
|
||||
static bool isMetal(const std::string& symbol);
|
||||
|
||||
float radius(RadiusType type = eRadiusSingleBond) const
|
||||
{
|
||||
if (type >= eRadiusTypeCount)
|
||||
throw std::invalid_argument("invalid radius requested");
|
||||
return mInfo->radii[type] / 100.f;
|
||||
}
|
||||
|
||||
// data type encapsulating the Waasmaier & Kirfel scattering factors
|
||||
// in a simplified form (only a and b).
|
||||
// Added the electrion scattering factors as well
|
||||
struct SFData
|
||||
{
|
||||
double a[6], b[6];
|
||||
};
|
||||
|
||||
// to get the Cval and Siva values, use this constant as charge:
|
||||
enum { kWKSFVal = -99 };
|
||||
|
||||
const SFData& wksf(int charge = 0) const;
|
||||
const SFData& elsf() const;
|
||||
|
||||
private:
|
||||
const struct AtomTypeInfo* mInfo;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,101 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include <filesystem>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "cif++/Structure.hpp"
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
class BondMapException : public std::runtime_error
|
||||
{
|
||||
public:
|
||||
BondMapException(const std::string& msg)
|
||||
: runtime_error(msg) {}
|
||||
};
|
||||
|
||||
class BondMap
|
||||
{
|
||||
public:
|
||||
BondMap(const Structure& p);
|
||||
|
||||
BondMap(const BondMap&) = delete;
|
||||
BondMap& operator=(const BondMap&) = delete;
|
||||
|
||||
bool operator()(const Atom& a, const Atom& b) const
|
||||
{
|
||||
return isBonded(index.at(a.id()), index.at(b.id()));
|
||||
}
|
||||
|
||||
bool is1_4(const Atom& a, const Atom& b) const
|
||||
{
|
||||
uint32_t ixa = index.at(a.id());
|
||||
uint32_t ixb = index.at(b.id());
|
||||
|
||||
return bond_1_4.count(key(ixa, ixb));
|
||||
}
|
||||
|
||||
// links coming from the struct_conn records:
|
||||
std::vector<std::string> linked(const Atom& a) const;
|
||||
|
||||
// This list of atomID's is comming from either CCD or the CCP4 dictionaries loaded
|
||||
static std::vector<std::string> atomIDsForCompound(const std::string& compoundID);
|
||||
|
||||
private:
|
||||
|
||||
bool isBonded(uint32_t ai, uint32_t bi) const
|
||||
{
|
||||
return bond.count(key(ai, bi)) != 0;
|
||||
}
|
||||
|
||||
uint64_t key(uint32_t a, uint32_t b) const
|
||||
{
|
||||
if (a > b)
|
||||
std::swap(a, b);
|
||||
return static_cast<uint64_t>(a) | (static_cast<uint64_t>(b) << 32);
|
||||
}
|
||||
|
||||
std::tuple<uint32_t,uint32_t> dekey(uint64_t k) const
|
||||
{
|
||||
return std::make_tuple(
|
||||
static_cast<uint32_t>(k >> 32),
|
||||
static_cast<uint32_t>(k)
|
||||
);
|
||||
}
|
||||
|
||||
uint32_t dim;
|
||||
std::unordered_map<std::string,uint32_t> index;
|
||||
std::set<uint64_t> bond, bond_1_4;
|
||||
|
||||
std::map<std::string,std::set<std::string>> link;
|
||||
};
|
||||
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,248 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cif++/Cif++.hpp"
|
||||
|
||||
#include <stack>
|
||||
#include <map>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class CifParserError : public std::runtime_error
|
||||
{
|
||||
public:
|
||||
CifParserError(uint32_t lineNr, const std::string& message);
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
extern const uint32_t kMaxLineLength;
|
||||
|
||||
extern const uint8_t kCharTraitsTable[128];
|
||||
|
||||
enum CharTraitsMask: uint8_t {
|
||||
kOrdinaryMask = 1 << 0,
|
||||
kNonBlankMask = 1 << 1,
|
||||
kTextLeadMask = 1 << 2,
|
||||
kAnyPrintMask = 1 << 3
|
||||
};
|
||||
|
||||
inline bool isWhite(int ch)
|
||||
{
|
||||
return std::isspace(ch) or ch == '#';
|
||||
}
|
||||
|
||||
inline bool isOrdinary(int ch)
|
||||
{
|
||||
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kOrdinaryMask) != 0;
|
||||
}
|
||||
|
||||
inline bool isNonBlank(int ch)
|
||||
{
|
||||
return ch > 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kNonBlankMask) != 0;
|
||||
}
|
||||
|
||||
inline bool isTextLead(int ch)
|
||||
{
|
||||
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kTextLeadMask) != 0;
|
||||
}
|
||||
|
||||
inline bool isAnyPrint(int ch)
|
||||
{
|
||||
return ch == '\t' or
|
||||
(ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
|
||||
}
|
||||
|
||||
inline bool isUnquotedString(const char* s)
|
||||
{
|
||||
bool result = isOrdinary(*s++);
|
||||
while (result and *s != 0)
|
||||
{
|
||||
result = isNonBlank(*s);
|
||||
++s;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::tuple<std::string,std::string> splitTagName(const std::string& tag);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
using DatablockIndex = std::map<std::string,std::size_t>;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// sac Parser, analogous to SAX Parser (simple api for xml)
|
||||
|
||||
class SacParser
|
||||
{
|
||||
public:
|
||||
SacParser(std::istream& is, bool init = true);
|
||||
virtual ~SacParser() {}
|
||||
|
||||
enum CIFToken
|
||||
{
|
||||
eCIFTokenUnknown,
|
||||
|
||||
eCIFTokenEOF,
|
||||
|
||||
eCIFTokenDATA,
|
||||
eCIFTokenLOOP,
|
||||
eCIFTokenGLOBAL,
|
||||
eCIFTokenSAVE,
|
||||
eCIFTokenSTOP,
|
||||
eCIFTokenTag,
|
||||
eCIFTokenValue,
|
||||
};
|
||||
|
||||
static const char* kTokenName[];
|
||||
|
||||
enum CIFValueType
|
||||
{
|
||||
eCIFValueInt,
|
||||
eCIFValueFloat,
|
||||
eCIFValueNumeric,
|
||||
eCIFValueString,
|
||||
eCIFValueTextField,
|
||||
eCIFValueInapplicable,
|
||||
eCIFValueUnknown
|
||||
};
|
||||
|
||||
static const char* kValueName[];
|
||||
|
||||
int getNextChar();
|
||||
|
||||
void retract();
|
||||
void restart();
|
||||
|
||||
CIFToken getNextToken();
|
||||
void match(CIFToken token);
|
||||
|
||||
bool parseSingleDatablock(const std::string& datablock);
|
||||
|
||||
DatablockIndex indexDatablocks();
|
||||
bool parseSingleDatablock(const std::string& datablock, const DatablockIndex &index);
|
||||
|
||||
void parseFile();
|
||||
void parseGlobal();
|
||||
void parseDataBlock();
|
||||
|
||||
virtual void parseSaveFrame();
|
||||
|
||||
void parseDictionary();
|
||||
|
||||
void error(const std::string& msg);
|
||||
|
||||
// production methods, these are pure virtual here
|
||||
|
||||
virtual void produceDatablock(const std::string& name) = 0;
|
||||
virtual void produceCategory(const std::string& name) = 0;
|
||||
virtual void produceRow() = 0;
|
||||
virtual void produceItem(const std::string& category, const std::string& item, const std::string& value) = 0;
|
||||
|
||||
protected:
|
||||
|
||||
enum State
|
||||
{
|
||||
eStateStart,
|
||||
eStateWhite,
|
||||
eStateComment,
|
||||
eStateQuestionMark,
|
||||
eStateDot,
|
||||
eStateQuotedString,
|
||||
eStateQuotedStringQuote,
|
||||
eStateUnquotedString,
|
||||
eStateTag,
|
||||
eStateTextField,
|
||||
eStateFloat = 100,
|
||||
eStateInt = 110,
|
||||
// eStateNumericSuffix = 200,
|
||||
eStateValue = 300
|
||||
};
|
||||
|
||||
std::istream& mData;
|
||||
|
||||
// Parser state
|
||||
bool mValidate;
|
||||
uint32_t mLineNr;
|
||||
bool mBol;
|
||||
int mState, mStart;
|
||||
CIFToken mLookahead;
|
||||
std::string mTokenValue;
|
||||
CIFValueType mTokenType;
|
||||
std::stack<int> mBuffer;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Parser : public SacParser
|
||||
{
|
||||
public:
|
||||
Parser(std::istream& is, File& f, bool init = true);
|
||||
|
||||
virtual void produceDatablock(const std::string& name);
|
||||
virtual void produceCategory(const std::string& name);
|
||||
virtual void produceRow();
|
||||
virtual void produceItem(const std::string& category, const std::string& item, const std::string& value);
|
||||
|
||||
protected:
|
||||
File& mFile;
|
||||
Datablock* mDataBlock;
|
||||
Datablock::iterator mCat;
|
||||
Row mRow;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class DictParser : public Parser
|
||||
{
|
||||
public:
|
||||
|
||||
DictParser(Validator& validator, std::istream& is);
|
||||
~DictParser();
|
||||
|
||||
void loadDictionary();
|
||||
|
||||
private:
|
||||
|
||||
virtual void parseSaveFrame();
|
||||
|
||||
bool collectItemTypes();
|
||||
void linkItems();
|
||||
|
||||
Validator& mValidator;
|
||||
File mFile;
|
||||
struct DictParserDataImpl* mImpl;
|
||||
bool mCollectedItemTypes = false;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,198 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cif++/Cif++.hpp"
|
||||
|
||||
// duh.. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86164
|
||||
// #include <regex>
|
||||
#include <boost/regex.hpp>
|
||||
|
||||
#include <set>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
struct ValidateCategory;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class ValidationError : public std::exception
|
||||
{
|
||||
public:
|
||||
ValidationError(const std::string& msg);
|
||||
ValidationError(const std::string& cat, const std::string& item,
|
||||
const std::string& msg);
|
||||
const char* what() const noexcept { return mMsg.c_str(); }
|
||||
std::string mMsg;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
enum class DDL_PrimitiveType
|
||||
{
|
||||
Char, UChar, Numb
|
||||
};
|
||||
|
||||
DDL_PrimitiveType mapToPrimitiveType(const std::string& s);
|
||||
|
||||
struct ValidateType
|
||||
{
|
||||
std::string mName;
|
||||
DDL_PrimitiveType mPrimitiveType;
|
||||
// std::regex mRx;
|
||||
boost::regex mRx;
|
||||
|
||||
bool operator<(const ValidateType& rhs) const
|
||||
{
|
||||
return icompare(mName, rhs.mName) < 0;
|
||||
}
|
||||
|
||||
// compare values based on type
|
||||
// int compare(const std::string& a, const std::string& b) const
|
||||
// {
|
||||
// return compare(a.c_str(), b.c_str());
|
||||
// }
|
||||
|
||||
int compare(const char* a, const char* b) const;
|
||||
};
|
||||
|
||||
struct ValidateItem
|
||||
{
|
||||
std::string mTag;
|
||||
bool mMandatory;
|
||||
const ValidateType* mType;
|
||||
cif::iset mEnums;
|
||||
std::string mDefault;
|
||||
bool mDefaultIsNull;
|
||||
ValidateCategory* mCategory = nullptr;
|
||||
|
||||
// ItemLinked is used for non-key links
|
||||
struct ItemLinked
|
||||
{
|
||||
ValidateItem* mParent;
|
||||
std::string mParentItem;
|
||||
std::string mChildItem;
|
||||
};
|
||||
|
||||
std::vector<ItemLinked> mLinked;
|
||||
|
||||
bool operator<(const ValidateItem& rhs) const
|
||||
{
|
||||
return icompare(mTag, rhs.mTag) < 0;
|
||||
}
|
||||
|
||||
bool operator==(const ValidateItem& rhs) const
|
||||
{
|
||||
return iequals(mTag, rhs.mTag);
|
||||
}
|
||||
|
||||
void operator()(std::string value) const;
|
||||
};
|
||||
|
||||
struct ValidateCategory
|
||||
{
|
||||
std::string mName;
|
||||
std::vector<std::string> mKeys;
|
||||
cif::iset mGroups;
|
||||
cif::iset mMandatoryFields;
|
||||
std::set<ValidateItem> mItemValidators;
|
||||
|
||||
bool operator<(const ValidateCategory& rhs) const
|
||||
{
|
||||
return icompare(mName, rhs.mName) < 0;
|
||||
}
|
||||
|
||||
void addItemValidator(ValidateItem&& v);
|
||||
|
||||
const ValidateItem* getValidatorForItem(std::string tag) const;
|
||||
|
||||
const std::set<ValidateItem>& itemValidators() const
|
||||
{
|
||||
return mItemValidators;
|
||||
}
|
||||
};
|
||||
|
||||
struct ValidateLink
|
||||
{
|
||||
int mLinkGroupID;
|
||||
std::string mParentCategory;
|
||||
std::vector<std::string> mParentKeys;
|
||||
std::string mChildCategory;
|
||||
std::vector<std::string> mChildKeys;
|
||||
std::string mLinkGroupLabel;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Validator
|
||||
{
|
||||
public:
|
||||
friend class DictParser;
|
||||
|
||||
Validator();
|
||||
~Validator();
|
||||
|
||||
Validator(const Validator& rhs) = delete;
|
||||
Validator& operator=(const Validator& rhs) = delete;
|
||||
|
||||
Validator(Validator&& rhs);
|
||||
Validator& operator=(Validator&& rhs);
|
||||
|
||||
void addTypeValidator(ValidateType&& v);
|
||||
const ValidateType* getValidatorForType(std::string typeCode) const;
|
||||
|
||||
void addCategoryValidator(ValidateCategory&& v);
|
||||
const ValidateCategory* getValidatorForCategory(std::string category) const;
|
||||
|
||||
void addLinkValidator(ValidateLink&& v);
|
||||
std::vector<const ValidateLink*> getLinksForParent(const std::string& category) const;
|
||||
std::vector<const ValidateLink*> getLinksForChild(const std::string& category) const;
|
||||
|
||||
void reportError(const std::string& msg, bool fatal);
|
||||
|
||||
std::string dictName() const { return mName; }
|
||||
void dictName(const std::string& name) { mName = name; }
|
||||
|
||||
std::string dictVersion() const { return mVersion; }
|
||||
void dictVersion(const std::string& version) { mVersion = version; }
|
||||
|
||||
private:
|
||||
|
||||
// name is fully qualified here:
|
||||
ValidateItem* getValidatorForItem(std::string name) const;
|
||||
|
||||
std::string mName;
|
||||
std::string mVersion;
|
||||
bool mStrict = false;
|
||||
// std::set<uint32_t> mSubCategories;
|
||||
std::set<ValidateType> mTypeValidators;
|
||||
std::set<ValidateCategory> mCategoryValidators;
|
||||
std::vector<ValidateLink> mLinkValidators;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,391 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
|
||||
* Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// uBlas compatible matrix types
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
// matrix is m x n, addressing i,j is 0 <= i < m and 0 <= j < n
|
||||
// element m i,j is mapped to [i * n + j] and thus storage is row major
|
||||
|
||||
template <typename T>
|
||||
class MatrixBase
|
||||
{
|
||||
public:
|
||||
using value_type = T;
|
||||
|
||||
virtual ~MatrixBase() {}
|
||||
|
||||
virtual uint32_t dim_m() const = 0;
|
||||
virtual uint32_t dim_n() const = 0;
|
||||
|
||||
virtual value_type &operator()(uint32_t i, uint32_t j) { throw std::runtime_error("unimplemented method"); }
|
||||
virtual value_type operator()(uint32_t i, uint32_t j) const = 0;
|
||||
|
||||
MatrixBase &operator*=(const value_type &rhs);
|
||||
|
||||
MatrixBase &operator-=(const value_type &rhs);
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
MatrixBase<T> &MatrixBase<T>::operator*=(const T &rhs)
|
||||
{
|
||||
for (uint32_t i = 0; i < dim_m(); ++i)
|
||||
{
|
||||
for (uint32_t j = 0; j < dim_n(); ++j)
|
||||
{
|
||||
operator()(i, j) *= rhs;
|
||||
}
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
MatrixBase<T> &MatrixBase<T>::operator-=(const T &rhs)
|
||||
{
|
||||
for (uint32_t i = 0; i < dim_m(); ++i)
|
||||
{
|
||||
for (uint32_t j = 0; j < dim_n(); ++j)
|
||||
{
|
||||
operator()(i, j) -= rhs;
|
||||
}
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::ostream &operator<<(std::ostream &lhs, const MatrixBase<T> &rhs)
|
||||
{
|
||||
lhs << '[' << rhs.dim_m() << ',' << rhs.dim_n() << ']' << '(';
|
||||
for (uint32_t i = 0; i < rhs.dim_m(); ++i)
|
||||
{
|
||||
lhs << '(';
|
||||
for (uint32_t j = 0; j < rhs.dim_n(); ++j)
|
||||
{
|
||||
if (j > 0)
|
||||
lhs << ',';
|
||||
lhs << rhs(i, j);
|
||||
}
|
||||
lhs << ')';
|
||||
}
|
||||
lhs << ')';
|
||||
|
||||
return lhs;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class Matrix : public MatrixBase<T>
|
||||
{
|
||||
public:
|
||||
using value_type = T;
|
||||
|
||||
template <typename T2>
|
||||
Matrix(const MatrixBase<T2> &m)
|
||||
: m_m(m.dim_m())
|
||||
, m_n(m.dim_n())
|
||||
{
|
||||
m_data = new value_type[m_m * m_n];
|
||||
for (uint32_t i = 0; i < m_m; ++i)
|
||||
{
|
||||
for (uint32_t j = 0; j < m_n; ++j)
|
||||
operator()(i, j) = m(i, j);
|
||||
}
|
||||
}
|
||||
|
||||
Matrix()
|
||||
: m_data(nullptr)
|
||||
, m_m(0)
|
||||
, m_n(0)
|
||||
{
|
||||
}
|
||||
|
||||
Matrix(const Matrix &m)
|
||||
: m_m(m.m_m)
|
||||
, m_n(m.m_n)
|
||||
{
|
||||
m_data = new value_type[m_m * m_n];
|
||||
std::copy(m.m_data, m.m_data + (m_m * m_n), m_data);
|
||||
}
|
||||
|
||||
Matrix &operator=(const Matrix &m)
|
||||
{
|
||||
value_type *t = new value_type[m.m_m * m.m_n];
|
||||
std::copy(m.m_data, m.m_data + (m.m_m * m.m_n), t);
|
||||
|
||||
delete[] m_data;
|
||||
m_data = t;
|
||||
m_m = m.m_m;
|
||||
m_n = m.m_n;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
Matrix(uint32_t m, uint32_t n, T v = T())
|
||||
: m_m(m)
|
||||
, m_n(n)
|
||||
{
|
||||
m_data = new value_type[m_m * m_n];
|
||||
std::fill(m_data, m_data + (m_m * m_n), v);
|
||||
}
|
||||
|
||||
virtual ~Matrix()
|
||||
{
|
||||
delete[] m_data;
|
||||
}
|
||||
|
||||
virtual uint32_t dim_m() const { return m_m; }
|
||||
virtual uint32_t dim_n() const { return m_n; }
|
||||
|
||||
virtual value_type operator()(uint32_t i, uint32_t j) const
|
||||
{
|
||||
assert(i < m_m);
|
||||
assert(j < m_n);
|
||||
return m_data[i * m_n + j];
|
||||
}
|
||||
|
||||
virtual value_type &operator()(uint32_t i, uint32_t j)
|
||||
{
|
||||
assert(i < m_m);
|
||||
assert(j < m_n);
|
||||
return m_data[i * m_n + j];
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void each(Func f)
|
||||
{
|
||||
for (uint32_t i = 0; i < m_m * m_n; ++i)
|
||||
f(m_data[i]);
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
Matrix &operator/=(U v)
|
||||
{
|
||||
for (uint32_t i = 0; i < m_m * m_n; ++i)
|
||||
m_data[i] /= v;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
value_type *m_data;
|
||||
uint32_t m_m, m_n;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
class SymmetricMatrix : public MatrixBase<T>
|
||||
{
|
||||
public:
|
||||
typedef typename MatrixBase<T>::value_type value_type;
|
||||
|
||||
SymmetricMatrix(uint32_t n, T v = T())
|
||||
: m_owner(true)
|
||||
, m_n(n)
|
||||
{
|
||||
uint32_t N = (m_n * (m_n + 1)) / 2;
|
||||
m_data = new value_type[N];
|
||||
std::fill(m_data, m_data + N, v);
|
||||
}
|
||||
|
||||
SymmetricMatrix(const T *data, uint32_t n)
|
||||
: m_owner(false)
|
||||
, m_data(const_cast<T *>(data))
|
||||
, m_n(n)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~SymmetricMatrix()
|
||||
{
|
||||
if (m_owner)
|
||||
delete[] m_data;
|
||||
}
|
||||
|
||||
virtual uint32_t dim_m() const { return m_n; }
|
||||
virtual uint32_t dim_n() const { return m_n; }
|
||||
|
||||
T operator()(uint32_t i, uint32_t j) const;
|
||||
virtual T &operator()(uint32_t i, uint32_t j);
|
||||
|
||||
// erase two rows, add one at the end (for neighbour joining)
|
||||
void erase_2(uint32_t i, uint32_t j);
|
||||
|
||||
template <typename Func>
|
||||
void each(Func f)
|
||||
{
|
||||
uint32_t N = (m_n * (m_n + 1)) / 2;
|
||||
|
||||
for (uint32_t i = 0; i < N; ++i)
|
||||
f(m_data[i]);
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
SymmetricMatrix &operator/=(U v)
|
||||
{
|
||||
uint32_t N = (m_n * (m_n + 1)) / 2;
|
||||
|
||||
for (uint32_t i = 0; i < N; ++i)
|
||||
m_data[i] /= v;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
bool m_owner;
|
||||
value_type *m_data;
|
||||
uint32_t m_n;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline T SymmetricMatrix<T>::operator()(uint32_t i, uint32_t j) const
|
||||
{
|
||||
return i < j
|
||||
? m_data[(j * (j + 1)) / 2 + i]
|
||||
: m_data[(i * (i + 1)) / 2 + j];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T &SymmetricMatrix<T>::operator()(uint32_t i, uint32_t j)
|
||||
{
|
||||
if (i > j)
|
||||
std::swap(i, j);
|
||||
assert(j < m_n);
|
||||
return m_data[(j * (j + 1)) / 2 + i];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void SymmetricMatrix<T>::erase_2(uint32_t di, uint32_t dj)
|
||||
{
|
||||
uint32_t s = 0, d = 0;
|
||||
for (uint32_t i = 0; i < m_n; ++i)
|
||||
{
|
||||
for (uint32_t j = 0; j < i; ++j)
|
||||
{
|
||||
if (i != di and j != dj and i != dj and j != di)
|
||||
{
|
||||
if (s != d)
|
||||
m_data[d] = m_data[s];
|
||||
++d;
|
||||
}
|
||||
|
||||
++s;
|
||||
}
|
||||
}
|
||||
|
||||
--m_n;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class IdentityMatrix : public MatrixBase<T>
|
||||
{
|
||||
public:
|
||||
typedef typename MatrixBase<T>::value_type value_type;
|
||||
|
||||
IdentityMatrix(uint32_t n)
|
||||
: m_n(n)
|
||||
{
|
||||
}
|
||||
|
||||
virtual uint32_t dim_m() const { return m_n; }
|
||||
virtual uint32_t dim_n() const { return m_n; }
|
||||
|
||||
virtual value_type operator()(uint32_t i, uint32_t j) const
|
||||
{
|
||||
value_type result = 0;
|
||||
if (i == j)
|
||||
result = 1;
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_n;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// matrix functions
|
||||
|
||||
template <typename T>
|
||||
Matrix<T> operator*(const MatrixBase<T> &lhs, const MatrixBase<T> &rhs)
|
||||
{
|
||||
Matrix<T> result(std::min(lhs.dim_m(), rhs.dim_m()), std::min(lhs.dim_n(), rhs.dim_n()));
|
||||
|
||||
for (uint32_t i = 0; i < result.dim_m(); ++i)
|
||||
{
|
||||
for (uint32_t j = 0; j < result.dim_n(); ++j)
|
||||
{
|
||||
for (uint32_t li = 0, rj = 0; li < lhs.dim_m() and rj < rhs.dim_n(); ++li, ++rj)
|
||||
result(i, j) += lhs(li, j) * rhs(i, rj);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Matrix<T> operator*(const MatrixBase<T> &lhs, T rhs)
|
||||
{
|
||||
Matrix<T> result(lhs);
|
||||
result *= rhs;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Matrix<T> operator-(const MatrixBase<T> &lhs, const MatrixBase<T> &rhs)
|
||||
{
|
||||
Matrix<T> result(std::min(lhs.dim_m(), rhs.dim_m()), std::min(lhs.dim_n(), rhs.dim_n()));
|
||||
|
||||
for (uint32_t i = 0; i < result.dim_m(); ++i)
|
||||
{
|
||||
for (uint32_t j = 0; j < result.dim_n(); ++j)
|
||||
{
|
||||
result(i, j) = lhs(i, j) - rhs(i, j);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Matrix<T> operator-(const MatrixBase<T> &lhs, T rhs)
|
||||
{
|
||||
Matrix<T> result(lhs.dim_m(), lhs.dim_n());
|
||||
result -= rhs;
|
||||
return result;
|
||||
}
|
||||
|
||||
// template <typename T>
|
||||
// symmetric_matrix<T> hammingDistance(const MatrixBase<T> &lhs, T rhs);
|
||||
|
||||
// template <typename T>
|
||||
// std::vector<T> sum(const MatrixBase<T> &m);
|
||||
@@ -1,428 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
|
||||
#if HAVE_LIBCLIPPER
|
||||
#include <clipper/core/coords.h>
|
||||
#endif
|
||||
|
||||
#include <boost/math/quaternion.hpp>
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
typedef boost::math::quaternion<float> Quaternion;
|
||||
|
||||
const double
|
||||
kPI = 3.141592653589793238462643383279502884;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
// Point, a location with x, y and z coordinates as floating point.
|
||||
// This one is derived from a tuple<float,float,float> so
|
||||
// you can do things like:
|
||||
//
|
||||
// float x, y, z;
|
||||
// tie(x, y, z) = atom.loc();
|
||||
|
||||
template<typename F>
|
||||
struct PointF
|
||||
{
|
||||
typedef F FType;
|
||||
|
||||
FType mX, mY, mZ;
|
||||
|
||||
PointF() : mX(0), mY(0), mZ(0) {}
|
||||
PointF(FType x, FType y, FType z) : mX(x), mY(y), mZ(z) {}
|
||||
|
||||
template<typename PF>
|
||||
PointF(const PointF<PF>& pt)
|
||||
: mX(static_cast<F>(pt.mX))
|
||||
, mY(static_cast<F>(pt.mY))
|
||||
, mZ(static_cast<F>(pt.mZ)) {}
|
||||
|
||||
#if HAVE_LIBCLIPPER
|
||||
PointF(const clipper::Coord_orth& pt): mX(pt[0]), mY(pt[1]), mZ(pt[2]) {}
|
||||
|
||||
PointF& operator=(const clipper::Coord_orth& rhs)
|
||||
{
|
||||
mX = rhs[0];
|
||||
mY = rhs[1];
|
||||
mZ = rhs[2];
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename PF>
|
||||
PointF& operator=(const PointF<PF>& rhs)
|
||||
{
|
||||
mX = static_cast<F>(rhs.mX);
|
||||
mY = static_cast<F>(rhs.mY);
|
||||
mZ = static_cast<F>(rhs.mZ);
|
||||
return *this;
|
||||
}
|
||||
|
||||
FType& getX() { return mX; }
|
||||
FType getX() const { return mX; }
|
||||
void setX(FType x) { mX = x; }
|
||||
|
||||
FType& getY() { return mY; }
|
||||
FType getY() const { return mY; }
|
||||
void setY(FType y) { mY = y; }
|
||||
|
||||
FType& getZ() { return mZ; }
|
||||
FType getZ() const { return mZ; }
|
||||
void setZ(FType z) { mZ = z; }
|
||||
|
||||
PointF& operator+=(const PointF& rhs)
|
||||
{
|
||||
mX += rhs.mX;
|
||||
mY += rhs.mY;
|
||||
mZ += rhs.mZ;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
PointF& operator+=(FType d)
|
||||
{
|
||||
mX += d;
|
||||
mY += d;
|
||||
mZ += d;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
PointF& operator-=(const PointF& rhs)
|
||||
{
|
||||
mX -= rhs.mX;
|
||||
mY -= rhs.mY;
|
||||
mZ -= rhs.mZ;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
PointF& operator-=(FType d)
|
||||
{
|
||||
mX -= d;
|
||||
mY -= d;
|
||||
mZ -= d;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
PointF& operator*=(FType rhs)
|
||||
{
|
||||
mX *= rhs;
|
||||
mY *= rhs;
|
||||
mZ *= rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
PointF& operator/=(FType rhs)
|
||||
{
|
||||
mX /= rhs;
|
||||
mY /= rhs;
|
||||
mZ /= rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
FType normalize()
|
||||
{
|
||||
auto length = mX * mX + mY * mY + mZ * mZ;
|
||||
if (length > 0)
|
||||
{
|
||||
length = std::sqrt(length);
|
||||
operator/=(length);
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
void rotate(const boost::math::quaternion<FType>& q)
|
||||
{
|
||||
boost::math::quaternion<FType> p(0, mX, mY, mZ);
|
||||
|
||||
p = q * p * boost::math::conj(q);
|
||||
|
||||
mX = p.R_component_2();
|
||||
mY = p.R_component_3();
|
||||
mZ = p.R_component_4();
|
||||
}
|
||||
|
||||
#if HAVE_LIBCLIPPER
|
||||
operator clipper::Coord_orth() const
|
||||
{
|
||||
return clipper::Coord_orth(mX, mY, mZ);
|
||||
}
|
||||
#endif
|
||||
|
||||
operator std::tuple<const FType&, const FType&, const FType&>() const
|
||||
{
|
||||
return std::make_tuple(std::ref(mX), std::ref(mY), std::ref(mZ));
|
||||
}
|
||||
|
||||
operator std::tuple<FType&,FType&,FType&>()
|
||||
{
|
||||
return std::make_tuple(std::ref(mX), std::ref(mY), std::ref(mZ));
|
||||
}
|
||||
|
||||
bool operator==(const PointF& rhs) const
|
||||
{
|
||||
return mX == rhs.mX and mY == rhs.mY and mZ == rhs.mZ;
|
||||
}
|
||||
|
||||
// consider point as a vector... perhaps I should rename Point?
|
||||
FType lengthsq() const
|
||||
{
|
||||
return mX * mX + mY * mY + mZ * mZ;
|
||||
}
|
||||
|
||||
FType length() const
|
||||
{
|
||||
return sqrt(mX * mX + mY * mY + mZ * mZ);
|
||||
}
|
||||
};
|
||||
|
||||
typedef PointF<float> Point;
|
||||
typedef PointF<double> DPoint;
|
||||
|
||||
template<typename F>
|
||||
inline std::ostream& operator<<(std::ostream& os, const PointF<F>& pt)
|
||||
{
|
||||
os << '(' << pt.mX << ',' << pt.mY << ',' << pt.mZ << ')';
|
||||
return os;
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
inline PointF<F> operator+(const PointF<F>& lhs, const PointF<F>& rhs)
|
||||
{
|
||||
return PointF<F>(lhs.mX + rhs.mX, lhs.mY + rhs.mY, lhs.mZ + rhs.mZ);
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
inline PointF<F> operator-(const PointF<F>& lhs, const PointF<F>& rhs)
|
||||
{
|
||||
return PointF<F>(lhs.mX - rhs.mX, lhs.mY - rhs.mY, lhs.mZ - rhs.mZ);
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
inline PointF<F> operator-(const PointF<F>& pt)
|
||||
{
|
||||
return PointF<F>(-pt.mX, -pt.mY, -pt.mZ);
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
inline PointF<F> operator*(const PointF<F>& pt, F f)
|
||||
{
|
||||
return PointF<F>(pt.mX * f, pt.mY * f, pt.mZ * f);
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
inline PointF<F> operator*(F f, const PointF<F>& pt)
|
||||
{
|
||||
return PointF<F>(pt.mX * f, pt.mY * f, pt.mZ * f);
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
inline PointF<F> operator/(const PointF<F>& pt, F f)
|
||||
{
|
||||
return PointF<F>(pt.mX / f, pt.mY / f, pt.mZ / f);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// several standard 3d operations
|
||||
|
||||
template<typename F>
|
||||
inline double DistanceSquared(const PointF<F>& a, const PointF<F>& b)
|
||||
{
|
||||
return
|
||||
(a.mX - b.mX) * (a.mX - b.mX) +
|
||||
(a.mY - b.mY) * (a.mY - b.mY) +
|
||||
(a.mZ - b.mZ) * (a.mZ - b.mZ);
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
inline double Distance(const PointF<F>& a, const PointF<F>& b)
|
||||
{
|
||||
return sqrt(
|
||||
(a.mX - b.mX) * (a.mX - b.mX) +
|
||||
(a.mY - b.mY) * (a.mY - b.mY) +
|
||||
(a.mZ - b.mZ) * (a.mZ - b.mZ));
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
inline F DotProduct(const PointF<F>& a, const PointF<F>& b)
|
||||
{
|
||||
return a.mX * b.mX + a.mY * b.mY + a.mZ * b.mZ;
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
inline PointF<F> CrossProduct(const PointF<F>& a, const PointF<F>& b)
|
||||
{
|
||||
return PointF<F>(a.mY * b.mZ - b.mY * a.mZ,
|
||||
a.mZ * b.mX - b.mZ * a.mX,
|
||||
a.mX * b.mY - b.mX * a.mY);
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
double Angle(const PointF<F>& p1, const PointF<F>& p2, const PointF<F>& p3)
|
||||
{
|
||||
PointF<F> v1 = p1 - p2;
|
||||
PointF<F> v2 = p3 - p2;
|
||||
|
||||
return std::acos(DotProduct(v1, v2) / (v1.length() * v2.length())) * 180 / kPI;
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
double DihedralAngle(const PointF<F>& p1, const PointF<F>& p2, const PointF<F>& p3, const PointF<F>& p4)
|
||||
{
|
||||
PointF<F> v12 = p1 - p2; // vector from p2 to p1
|
||||
PointF<F> v43 = p4 - p3; // vector from p3 to p4
|
||||
|
||||
PointF<F> z = p2 - p3; // vector from p3 to p2
|
||||
|
||||
PointF<F> p = CrossProduct(z, v12);
|
||||
PointF<F> x = CrossProduct(z, v43);
|
||||
PointF<F> y = CrossProduct(z, x);
|
||||
|
||||
double u = DotProduct(x, x);
|
||||
double v = DotProduct(y, y);
|
||||
|
||||
double result = 360;
|
||||
if (u > 0 and v > 0)
|
||||
{
|
||||
u = DotProduct(p, x) / sqrt(u);
|
||||
v = DotProduct(p, y) / sqrt(v);
|
||||
if (u != 0 or v != 0)
|
||||
result = atan2(v, u) * 180 / kPI;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
double CosinusAngle(const PointF<F>& p1, const PointF<F>& p2, const PointF<F>& p3, const PointF<F>& p4)
|
||||
{
|
||||
PointF<F> v12 = p1 - p2;
|
||||
PointF<F> v34 = p3 - p4;
|
||||
|
||||
double result = 0;
|
||||
|
||||
double x = DotProduct(v12, v12) * DotProduct(v34, v34);
|
||||
if (x > 0)
|
||||
result = DotProduct(v12, v34) / sqrt(x);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
auto DistancePointToLine(const PointF<F> &l1, const PointF<F> &l2, const PointF<F> &p)
|
||||
{
|
||||
auto line = l2 - l1;
|
||||
auto p_to_l1 = p - l1;
|
||||
auto p_to_l2 = p - l2;
|
||||
auto cross = CrossProduct(p_to_l1, p_to_l2);
|
||||
return cross.length() / line.length();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// For e.g. simulated annealing, returns a new point that is moved in
|
||||
// a random direction with a distance randomly chosen from a normal
|
||||
// distribution with a stddev of offset.
|
||||
|
||||
template<typename F>
|
||||
PointF<F> Nudge(PointF<F> p, F offset);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// We use quaternions to do rotations in 3d space
|
||||
|
||||
Quaternion Normalize(Quaternion q);
|
||||
|
||||
std::tuple<double,Point> QuaternionToAngleAxis(Quaternion q);
|
||||
Point Centroid(std::vector<Point>& Points);
|
||||
Point CenterPoints(std::vector<Point>& Points);
|
||||
Quaternion AlignPoints(const std::vector<Point>& a, const std::vector<Point>& b);
|
||||
double RMSd(const std::vector<Point>& a, const std::vector<Point>& b);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Helper class to generate evenly divided Points on a sphere
|
||||
// we use a fibonacci sphere to calculate even distribution of the dots
|
||||
|
||||
template<int N>
|
||||
class SphericalDots
|
||||
{
|
||||
public:
|
||||
enum { P = 2 * N + 1 };
|
||||
typedef typename std::array<Point,P> array_type;
|
||||
typedef typename array_type::const_iterator iterator;
|
||||
|
||||
static SphericalDots& instance()
|
||||
{
|
||||
static SphericalDots sInstance;
|
||||
return sInstance;
|
||||
}
|
||||
|
||||
size_t size() const { return mPoints.size(); }
|
||||
const Point operator[](uint32_t inIx) const { return mPoints[inIx]; }
|
||||
iterator begin() const { return mPoints.begin(); }
|
||||
iterator end() const { return mPoints.end(); }
|
||||
|
||||
double weight() const { return mWeight; }
|
||||
|
||||
SphericalDots()
|
||||
{
|
||||
|
||||
const double
|
||||
kGoldenRatio = (1 + std::sqrt(5.0)) / 2;
|
||||
|
||||
mWeight = (4 * kPI) / P;
|
||||
|
||||
auto p = mPoints.begin();
|
||||
|
||||
for (int32_t i = -N; i <= N; ++i)
|
||||
{
|
||||
double lat = std::asin((2.0 * i) / P);
|
||||
double lon = std::fmod(i, kGoldenRatio) * 2 * kPI / kGoldenRatio;
|
||||
|
||||
p->mX = sin(lon) * cos(lat);
|
||||
p->mY = cos(lon) * cos(lat);
|
||||
p->mZ = sin(lat);
|
||||
|
||||
++p;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
array_type mPoints;
|
||||
double mWeight;
|
||||
};
|
||||
|
||||
typedef SphericalDots<50> SphericalDots_50;
|
||||
|
||||
}
|
||||
@@ -1,218 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
// Calculate DSSP-like secondary structure information
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
class Structure;
|
||||
class Monomer;
|
||||
|
||||
struct Res;
|
||||
|
||||
extern const float
|
||||
kCouplingConstant, kMinHBondEnergy, kMaxHBondEnergy;
|
||||
|
||||
enum SecondaryStructureType : char
|
||||
{
|
||||
ssLoop = ' ',
|
||||
ssAlphahelix = 'H',
|
||||
ssBetabridge = 'B',
|
||||
ssStrand = 'E',
|
||||
ssHelix_3 = 'G',
|
||||
ssHelix_5 = 'I',
|
||||
ssHelix_PPII = 'P',
|
||||
ssTurn = 'T',
|
||||
ssBend = 'S'
|
||||
};
|
||||
|
||||
enum class HelixType
|
||||
{
|
||||
rh_3_10, rh_alpha, rh_pi, rh_pp
|
||||
};
|
||||
|
||||
enum class Helix
|
||||
{
|
||||
None, Start, End, StartAndEnd, Middle
|
||||
};
|
||||
|
||||
//struct HBond
|
||||
//{
|
||||
// std::string labelAsymID;
|
||||
// int labelSeqID;
|
||||
// double energy;
|
||||
//};
|
||||
//
|
||||
//struct BridgePartner
|
||||
//{
|
||||
// std::string labelAsymID;
|
||||
// int labelSeqID;
|
||||
// int ladder;
|
||||
// bool parallel;
|
||||
//};
|
||||
|
||||
struct SecondaryStructure
|
||||
{
|
||||
SecondaryStructureType type;
|
||||
// HBond donor[2], acceptor[2];
|
||||
// BridgePartner beta[2];
|
||||
// int sheet;
|
||||
// bool bend;
|
||||
};
|
||||
|
||||
//void CalculateSecondaryStructure(Structure& s);
|
||||
|
||||
const size_t
|
||||
kHistogramSize = 30;
|
||||
|
||||
struct DSSP_Statistics
|
||||
{
|
||||
uint32_t nrOfResidues, nrOfChains, nrOfSSBridges, nrOfIntraChainSSBridges, nrOfHBonds;
|
||||
uint32_t nrOfHBondsInAntiparallelBridges, nrOfHBondsInParallelBridges;
|
||||
uint32_t nrOfHBondsPerDistance[11] = {};
|
||||
double accessibleSurface = 0;
|
||||
|
||||
uint32_t residuesPerAlphaHelixHistogram[kHistogramSize] = {};
|
||||
uint32_t parallelBridgesPerLadderHistogram[kHistogramSize] = {};
|
||||
uint32_t antiparallelBridgesPerLadderHistogram[kHistogramSize] = {};
|
||||
uint32_t laddersPerSheetHistogram[kHistogramSize] = {};
|
||||
};
|
||||
|
||||
enum class ChainBreak
|
||||
{
|
||||
None, NewChain, Gap
|
||||
};
|
||||
|
||||
class DSSP
|
||||
{
|
||||
public:
|
||||
DSSP(const Structure& s, int min_poly_proline_stretch_length, bool calculateSurfaceAccessibility);
|
||||
~DSSP();
|
||||
|
||||
DSSP(const DSSP&) = delete;
|
||||
DSSP& operator=(const DSSP&) = delete;
|
||||
|
||||
SecondaryStructureType operator()(const std::string& inAsymID, int inSeqID) const;
|
||||
SecondaryStructureType operator()(const Monomer& m) const;
|
||||
|
||||
double accessibility(const std::string& inAsymID, int inSeqID) const;
|
||||
double accessibility(const Monomer& m) const;
|
||||
|
||||
bool isAlphaHelixEndBeforeStart(const Monomer& m) const;
|
||||
bool isAlphaHelixEndBeforeStart(const std::string& inAsymID, int inSeqID) const;
|
||||
|
||||
DSSP_Statistics GetStatistics() const;
|
||||
|
||||
class iterator;
|
||||
using res_iterator = typename std::vector<Res>::iterator;
|
||||
|
||||
class ResidueInfo
|
||||
{
|
||||
public:
|
||||
friend class iterator;
|
||||
|
||||
explicit operator bool() const { return not empty(); }
|
||||
bool empty() const { return mImpl == nullptr; }
|
||||
|
||||
const Monomer& residue() const;
|
||||
std::string alt_id() const;
|
||||
|
||||
/// \brief return 0 if not a break, ' ' in case of a new chain and '*' in case of a broken chain
|
||||
ChainBreak chainBreak() const;
|
||||
|
||||
/// \brief the internal number in DSSP
|
||||
int nr() const;
|
||||
|
||||
SecondaryStructureType ss() const;
|
||||
|
||||
int ssBridgeNr() const;
|
||||
|
||||
Helix helix(HelixType helixType) const;
|
||||
|
||||
bool bend() const;
|
||||
|
||||
double accessibility() const;
|
||||
|
||||
/// \brief returns resinfo, ladder and parallel
|
||||
std::tuple<ResidueInfo,int,bool> bridgePartner(int i) const;
|
||||
|
||||
int sheet() const;
|
||||
|
||||
/// \brief return resinfo and the energy of the bond
|
||||
std::tuple<ResidueInfo,double> acceptor(int i) const;
|
||||
std::tuple<ResidueInfo,double> donor(int i) const;
|
||||
|
||||
private:
|
||||
ResidueInfo(Res* res) : mImpl(res) {}
|
||||
|
||||
Res* mImpl;
|
||||
};
|
||||
|
||||
class iterator
|
||||
{
|
||||
public:
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
using value_type = ResidueInfo;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = value_type*;
|
||||
using reference = value_type&;
|
||||
|
||||
iterator(const iterator& i);
|
||||
iterator(Res* res);
|
||||
iterator& operator=(const iterator& i);
|
||||
|
||||
reference operator*() { return mCurrent; }
|
||||
pointer operator->() { return &mCurrent; }
|
||||
|
||||
iterator& operator++();
|
||||
iterator operator++(int)
|
||||
{
|
||||
auto tmp(*this);
|
||||
this->operator++();
|
||||
return tmp;
|
||||
}
|
||||
|
||||
bool operator==(const iterator& rhs) const { return mCurrent.mImpl == rhs.mCurrent.mImpl; }
|
||||
bool operator!=(const iterator& rhs) const { return mCurrent.mImpl != rhs.mCurrent.mImpl; }
|
||||
|
||||
private:
|
||||
ResidueInfo mCurrent;
|
||||
};
|
||||
|
||||
iterator begin() const;
|
||||
iterator end() const;
|
||||
|
||||
bool empty() const { return begin() == end(); }
|
||||
|
||||
private:
|
||||
struct DSSPImpl* mImpl;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
@@ -1,546 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#include "cif++/AtomType.hpp"
|
||||
#include "cif++/Cif++.hpp"
|
||||
#include "cif++/Compound.hpp"
|
||||
#include "cif++/Point.hpp"
|
||||
|
||||
/*
|
||||
To modify a structure, you will have to use actions.
|
||||
|
||||
The currently supported actions are:
|
||||
|
||||
// - Move atom to new location
|
||||
- Remove atom
|
||||
// - Add new atom that was formerly missing
|
||||
// - Add alternate Residue
|
||||
-
|
||||
|
||||
*/
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
class Atom;
|
||||
class Residue;
|
||||
class Monomer;
|
||||
class Polymer;
|
||||
class Structure;
|
||||
class File;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Atom
|
||||
{
|
||||
public:
|
||||
Atom();
|
||||
Atom(struct AtomImpl *impl);
|
||||
Atom(const Atom &rhs);
|
||||
|
||||
Atom(cif::Datablock &db, cif::Row &row);
|
||||
|
||||
// a special constructor to create symmetry copies
|
||||
Atom(const Atom &rhs, const Point &symmmetry_location, const std::string &symmetry_operation);
|
||||
|
||||
~Atom();
|
||||
|
||||
explicit operator bool() const { return mImpl_ != nullptr; }
|
||||
|
||||
// return a copy of this atom, with data copied instead of referenced
|
||||
Atom clone() const;
|
||||
|
||||
Atom &operator=(const Atom &rhs);
|
||||
|
||||
const std::string &id() const;
|
||||
AtomType type() const;
|
||||
|
||||
Point location() const;
|
||||
void location(Point p);
|
||||
|
||||
/// \brief Translate the position of this atom by \a t
|
||||
void translate(Point t);
|
||||
|
||||
/// \brief Rotate the position of this atom by \a q
|
||||
void rotate(Quaternion q);
|
||||
|
||||
// for direct access to underlying data, be careful!
|
||||
const cif::Row getRow() const;
|
||||
const cif::Row getRowAniso() const;
|
||||
|
||||
// Atom symmetryCopy(const Point& d, const clipper::RTop_orth& rt);
|
||||
bool isSymmetryCopy() const;
|
||||
std::string symmetry() const;
|
||||
// const clipper::RTop_orth& symop() const;
|
||||
|
||||
const Compound &comp() const;
|
||||
bool isWater() const;
|
||||
int charge() const;
|
||||
|
||||
float uIso() const;
|
||||
bool getAnisoU(float anisou[6]) const;
|
||||
float occupancy() const;
|
||||
|
||||
template <typename T>
|
||||
T property(const std::string &name) const;
|
||||
|
||||
void property(const std::string &name, const std::string &value);
|
||||
|
||||
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
|
||||
void property(const std::string &name, const T &value)
|
||||
{
|
||||
property(name, std::to_string(value));
|
||||
}
|
||||
|
||||
// specifications
|
||||
std::string labelAtomID() const;
|
||||
std::string labelCompID() const;
|
||||
std::string labelAsymID() const;
|
||||
std::string labelEntityID() const;
|
||||
int labelSeqID() const;
|
||||
std::string labelAltID() const;
|
||||
bool isAlternate() const;
|
||||
|
||||
std::string authAtomID() const;
|
||||
std::string authCompID() const;
|
||||
std::string authAsymID() const;
|
||||
std::string authSeqID() const;
|
||||
std::string pdbxAuthInsCode() const;
|
||||
std::string pdbxAuthAltID() const;
|
||||
|
||||
std::string labelID() const; // label_comp_id + '_' + label_asym_id + '_' + label_seq_id
|
||||
std::string pdbID() const; // auth_comp_id + '_' + auth_asym_id + '_' + auth_seq_id + pdbx_PDB_ins_code
|
||||
|
||||
bool operator==(const Atom &rhs) const;
|
||||
|
||||
// // get clipper format Atom
|
||||
// clipper::Atom toClipper() const;
|
||||
|
||||
// Radius calculation based on integrating the density until perc of electrons is found
|
||||
void calculateRadius(float resHigh, float resLow, float perc);
|
||||
float radius() const;
|
||||
|
||||
// access data in compound for this atom
|
||||
|
||||
// convenience routine
|
||||
bool isBackBone() const
|
||||
{
|
||||
auto atomID = labelAtomID();
|
||||
return atomID == "N" or atomID == "O" or atomID == "C" or atomID == "CA";
|
||||
}
|
||||
|
||||
void swap(Atom &b)
|
||||
{
|
||||
std::swap(mImpl_, b.mImpl_);
|
||||
}
|
||||
|
||||
int compare(const Atom &b) const;
|
||||
|
||||
bool operator<(const Atom &rhs) const
|
||||
{
|
||||
return compare(rhs) < 0;
|
||||
}
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const Atom &atom);
|
||||
|
||||
private:
|
||||
friend class Structure;
|
||||
void setID(int id);
|
||||
|
||||
AtomImpl *impl();
|
||||
const AtomImpl *impl() const;
|
||||
|
||||
struct AtomImpl *mImpl_;
|
||||
};
|
||||
|
||||
inline void swap(mmcif::Atom &a, mmcif::Atom &b)
|
||||
{
|
||||
a.swap(b);
|
||||
}
|
||||
|
||||
inline double Distance(const Atom &a, const Atom &b)
|
||||
{
|
||||
return Distance(a.location(), b.location());
|
||||
}
|
||||
|
||||
inline double DistanceSquared(const Atom &a, const Atom &b)
|
||||
{
|
||||
return DistanceSquared(a.location(), b.location());
|
||||
}
|
||||
|
||||
typedef std::vector<Atom> AtomView;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Residue
|
||||
{
|
||||
public:
|
||||
// constructors should be private, but that's not possible for now (needed in emplace)
|
||||
|
||||
// constructor for waters
|
||||
Residue(const Structure &structure, const std::string &compoundID,
|
||||
const std::string &asymID, const std::string &authSeqID);
|
||||
|
||||
// constructor for a residue without a sequence number
|
||||
Residue(const Structure &structure, const std::string &compoundID,
|
||||
const std::string &asymID);
|
||||
|
||||
// constructor for a residue with a sequence number
|
||||
Residue(const Structure &structure, const std::string &compoundID,
|
||||
const std::string &asymID, int seqID, const std::string &authSeqID);
|
||||
|
||||
Residue(const Residue &rhs) = delete;
|
||||
Residue &operator=(const Residue &rhs) = delete;
|
||||
|
||||
Residue(Residue &&rhs);
|
||||
Residue &operator=(Residue &&rhs);
|
||||
|
||||
virtual ~Residue();
|
||||
|
||||
const Compound &compound() const;
|
||||
const AtomView &atoms() const;
|
||||
|
||||
/// \brief Unique atoms returns only the atoms without alternates and the first of each alternate atom id.
|
||||
AtomView unique_atoms() const;
|
||||
|
||||
/// \brief The alt ID used for the unique atoms
|
||||
std::string unique_alt_id() const;
|
||||
|
||||
Atom atomByID(const std::string &atomID) const;
|
||||
|
||||
const std::string &compoundID() const { return mCompoundID; }
|
||||
const std::string &asymID() const { return mAsymID; }
|
||||
int seqID() const { return mSeqID; }
|
||||
std::string entityID() const;
|
||||
|
||||
std::string authAsymID() const;
|
||||
std::string authSeqID() const;
|
||||
std::string authInsCode() const;
|
||||
|
||||
// return a human readable PDB-like auth id (chain+seqnr+iCode)
|
||||
std::string authID() const;
|
||||
|
||||
// similar for mmCIF space
|
||||
std::string labelID() const;
|
||||
|
||||
// Is this residue a single entity?
|
||||
bool isEntity() const;
|
||||
|
||||
bool isWater() const { return mCompoundID == "HOH"; }
|
||||
|
||||
const Structure &structure() const { return *mStructure; }
|
||||
|
||||
bool empty() const { return mStructure == nullptr; }
|
||||
|
||||
bool hasAlternateAtoms() const;
|
||||
|
||||
/// \brief Return the list of unique alt ID's present in this residue
|
||||
std::set<std::string> getAlternateIDs() const;
|
||||
|
||||
/// \brief Return the list of unique atom ID's
|
||||
std::set<std::string> getAtomIDs() const;
|
||||
|
||||
/// \brief Return the list of atoms having ID \a atomID
|
||||
AtomView getAtomsByID(const std::string &atomID) const;
|
||||
|
||||
// some routines for 3d work
|
||||
std::tuple<Point, float> centerAndRadius() const;
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const Residue &res);
|
||||
|
||||
protected:
|
||||
Residue() {}
|
||||
|
||||
friend class Polymer;
|
||||
|
||||
const Structure *mStructure = nullptr;
|
||||
std::string mCompoundID, mAsymID;
|
||||
int mSeqID = 0;
|
||||
|
||||
// Watch out, this is used only to label waters... The rest of the code relies on
|
||||
// MapLabelToAuth to get this info. Perhaps we should rename this member field.
|
||||
std::string mAuthSeqID;
|
||||
AtomView mAtoms;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// a monomer models a single Residue in a protein chain
|
||||
|
||||
class Monomer : public Residue
|
||||
{
|
||||
public:
|
||||
// Monomer();
|
||||
Monomer(const Monomer &rhs) = delete;
|
||||
Monomer &operator=(const Monomer &rhs) = delete;
|
||||
|
||||
Monomer(Monomer &&rhs);
|
||||
Monomer &operator=(Monomer &&rhs);
|
||||
|
||||
Monomer(const Polymer &polymer, size_t index, int seqID, const std::string &authSeqID,
|
||||
const std::string &compoundID);
|
||||
|
||||
bool is_first_in_chain() const;
|
||||
bool is_last_in_chain() const;
|
||||
|
||||
// convenience
|
||||
bool has_alpha() const;
|
||||
bool has_kappa() const;
|
||||
|
||||
// Assuming this is really an amino acid...
|
||||
|
||||
float phi() const;
|
||||
float psi() const;
|
||||
float alpha() const;
|
||||
float kappa() const;
|
||||
float tco() const;
|
||||
float omega() const;
|
||||
|
||||
// torsion angles
|
||||
size_t nrOfChis() const;
|
||||
float chi(size_t i) const;
|
||||
|
||||
bool isCis() const;
|
||||
|
||||
/// \brief Returns true if the four atoms C, CA, N and O are present
|
||||
bool isComplete() const;
|
||||
|
||||
/// \brief Returns true if any of the backbone atoms has an alternate
|
||||
bool hasAlternateBackboneAtoms() const;
|
||||
|
||||
Atom CAlpha() const { return atomByID("CA"); }
|
||||
Atom C() const { return atomByID("C"); }
|
||||
Atom N() const { return atomByID("N"); }
|
||||
Atom O() const { return atomByID("O"); }
|
||||
Atom H() const { return atomByID("H"); }
|
||||
|
||||
bool isBondedTo(const Monomer &rhs) const
|
||||
{
|
||||
return this != &rhs and areBonded(*this, rhs);
|
||||
}
|
||||
|
||||
static bool areBonded(const Monomer &a, const Monomer &b, float errorMargin = 0.5f);
|
||||
static bool isCis(const Monomer &a, const Monomer &b);
|
||||
static float omega(const Monomer &a, const Monomer &b);
|
||||
|
||||
// for LEU and VAL
|
||||
float chiralVolume() const;
|
||||
|
||||
private:
|
||||
const Polymer *mPolymer;
|
||||
size_t mIndex;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Polymer : public std::vector<Monomer>
|
||||
{
|
||||
public:
|
||||
Polymer(const Structure &s, const std::string &entityID, const std::string &asymID);
|
||||
|
||||
Polymer(const Polymer &) = delete;
|
||||
Polymer &operator=(const Polymer &) = delete;
|
||||
|
||||
// Polymer(Polymer&& rhs) = delete;
|
||||
// Polymer& operator=(Polymer&& rhs) = de;
|
||||
|
||||
Monomer &getBySeqID(int seqID);
|
||||
const Monomer &getBySeqID(int seqID) const;
|
||||
|
||||
Structure *structure() const { return mStructure; }
|
||||
|
||||
std::string asymID() const { return mAsymID; }
|
||||
std::string entityID() const { return mEntityID; }
|
||||
|
||||
std::string chainID() const;
|
||||
|
||||
int Distance(const Monomer &a, const Monomer &b) const;
|
||||
|
||||
private:
|
||||
Structure *mStructure;
|
||||
std::string mEntityID;
|
||||
std::string mAsymID;
|
||||
cif::RowSet mPolySeq;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// file is a reference to the data stored in e.g. the cif file.
|
||||
// This object is not copyable.
|
||||
|
||||
class File : public std::enable_shared_from_this<File>
|
||||
{
|
||||
public:
|
||||
File();
|
||||
File(const std::filesystem::path &path);
|
||||
File(const char *data, size_t length); // good luck trying to find out what it is...
|
||||
~File();
|
||||
|
||||
File(const File &) = delete;
|
||||
File &operator=(const File &) = delete;
|
||||
|
||||
cif::Datablock& createDatablock(const std::string &name);
|
||||
|
||||
void load(const std::filesystem::path &path);
|
||||
void save(const std::filesystem::path &path);
|
||||
|
||||
Structure *model(size_t nr = 1);
|
||||
|
||||
struct FileImpl &impl() const { return *mImpl; }
|
||||
|
||||
cif::Datablock &data();
|
||||
cif::File &file();
|
||||
|
||||
private:
|
||||
struct FileImpl *mImpl;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
enum class StructureOpenOptions
|
||||
{
|
||||
SkipHydrogen = 1 << 0
|
||||
};
|
||||
|
||||
inline bool operator&(StructureOpenOptions a, StructureOpenOptions b)
|
||||
{
|
||||
return static_cast<int>(a) bitand static_cast<int>(b);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Structure
|
||||
{
|
||||
public:
|
||||
Structure(File &p, size_t modelNr = 1, StructureOpenOptions options = {});
|
||||
Structure &operator=(const Structure &) = delete;
|
||||
~Structure();
|
||||
|
||||
// Create a read-only clone of the current structure (for multithreaded calculations that move atoms)
|
||||
Structure(const Structure &);
|
||||
|
||||
File &getFile() const;
|
||||
|
||||
const AtomView &atoms() const { return mAtoms; }
|
||||
AtomView waters() const;
|
||||
|
||||
const std::list<Polymer> &polymers() const { return mPolymers; }
|
||||
std::list<Polymer> &polymers() { return mPolymers; }
|
||||
|
||||
const std::vector<Residue> &nonPolymers() const { return mNonPolymers; }
|
||||
const std::vector<Residue> &branchResidues() const { return mBranchResidues; }
|
||||
|
||||
Atom getAtomByID(std::string id) const;
|
||||
// Atom getAtomByLocation(Point pt, float maxDistance) const;
|
||||
|
||||
Atom getAtomByLabel(const std::string &atomID, const std::string &asymID,
|
||||
const std::string &compID, int seqID, const std::string &altID = "");
|
||||
|
||||
/// \brief Get a residue, if \a seqID is zero, the non-polymers are searched
|
||||
const Residue &getResidue(const std::string &asymID, const std::string &compID, int seqID = 0) const;
|
||||
|
||||
// map between auth and label locations
|
||||
|
||||
std::tuple<std::string, int, std::string> MapAuthToLabel(const std::string &asymID,
|
||||
const std::string &seqID, const std::string &compID, const std::string &insCode = "");
|
||||
|
||||
std::tuple<std::string, std::string, std::string, std::string> MapLabelToAuth(
|
||||
const std::string &asymID, int seqID, const std::string &compID);
|
||||
|
||||
// returns chain, seqnr, icode
|
||||
std::tuple<char, int, char> MapLabelToAuth(
|
||||
const std::string &asymID, int seqID) const;
|
||||
|
||||
// returns chain,seqnr,comp,iCode
|
||||
std::tuple<std::string, int, std::string, std::string> MapLabelToPDB(
|
||||
const std::string &asymID, int seqID, const std::string &compID,
|
||||
const std::string &authSeqID) const;
|
||||
|
||||
std::tuple<std::string, int, std::string> MapPDBToLabel(
|
||||
const std::string &asymID, int seqID, const std::string &compID, const std::string &iCode) const;
|
||||
|
||||
// Actions
|
||||
void removeAtom(Atom &a);
|
||||
void swapAtoms(Atom &a1, Atom &a2); // swap the labels for these atoms
|
||||
void moveAtom(Atom &a, Point p); // move atom to a new location
|
||||
void changeResidue(const Residue &res, const std::string &newCompound,
|
||||
const std::vector<std::tuple<std::string, std::string>> &remappedAtoms);
|
||||
|
||||
/// \brief Create a new non-polymer entity, returns new ID
|
||||
/// \param mon_id The mon_id for the new nonpoly, must be an existing and known compound from CCD
|
||||
/// \return The ID of the created entity
|
||||
std::string createNonPolyEntity(const std::string &mon_id);
|
||||
|
||||
/// \brief Create a new NonPolymer struct_asym with atoms constructed from \a atoms, returns asym_id.
|
||||
/// This method assumes you are copying data from one cif file to another.
|
||||
///
|
||||
/// \param entity_id The entity ID of the new nonpoly
|
||||
/// \param atoms The array of atom_site rows containing the data.
|
||||
/// \return The newly create asym ID
|
||||
std::string createNonpoly(const std::string &entity_id, const std::vector<mmcif::Atom> &atoms);
|
||||
|
||||
/// \brief To sort the atoms in order of model > asym-id > res-id > atom-id
|
||||
/// Will asssign new atom_id's to all atoms. Be carefull
|
||||
void sortAtoms();
|
||||
|
||||
/// \brief Translate the coordinates of all atoms in the structure by \a t
|
||||
void translate(Point t);
|
||||
|
||||
/// \brief Rotate the coordinates of all atoms in the structure by \a q
|
||||
void rotate(Quaternion t);
|
||||
|
||||
const std::vector<Residue> &getNonPolymers() const { return mNonPolymers; }
|
||||
const std::vector<Residue> &getBranchResidues() const { return mBranchResidues; }
|
||||
|
||||
void cleanupEmptyCategories();
|
||||
|
||||
private:
|
||||
friend Polymer;
|
||||
friend Residue;
|
||||
// friend residue_view;
|
||||
// friend residue_iterator;
|
||||
|
||||
cif::Category &category(const char *name) const;
|
||||
cif::Datablock &datablock() const;
|
||||
|
||||
std::string insertCompound(const std::string &compoundID, bool isEntity);
|
||||
|
||||
void loadData();
|
||||
void updateAtomIndex();
|
||||
|
||||
void loadAtomsForModel(StructureOpenOptions options);
|
||||
|
||||
File &mFile;
|
||||
size_t mModelNr;
|
||||
AtomView mAtoms;
|
||||
std::vector<size_t> mAtomIndex;
|
||||
std::list<Polymer> mPolymers;
|
||||
std::vector<Residue> mNonPolymers, mBranchResidues;
|
||||
};
|
||||
|
||||
} // namespace mmcif
|
||||
277
include/cif++/atom_type.hpp
Normal file
277
include/cif++/atom_type.hpp
Normal file
@@ -0,0 +1,277 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
// Lib for working with structures as contained in mmCIF and PDB files
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
enum atom_type : uint8_t
|
||||
{
|
||||
Nn = 0, // Unknown
|
||||
|
||||
H = 1, // Hydrogen
|
||||
He = 2, // Helium
|
||||
|
||||
Li = 3, // Lithium
|
||||
Be = 4, // Beryllium
|
||||
B = 5, // Boron
|
||||
C = 6, // Carbon
|
||||
N = 7, // Nitrogen
|
||||
O = 8, // Oxygen
|
||||
F = 9, // Fluorine
|
||||
Ne = 10, // Neon
|
||||
|
||||
Na = 11, // Sodium
|
||||
Mg = 12, // Magnesium
|
||||
Al = 13, // Aluminium
|
||||
Si = 14, // Silicon
|
||||
P = 15, // Phosphorus
|
||||
S = 16, // Sulfur
|
||||
Cl = 17, // Chlorine
|
||||
Ar = 18, // Argon
|
||||
|
||||
K = 19, // Potassium
|
||||
Ca = 20, // Calcium
|
||||
Sc = 21, // Scandium
|
||||
Ti = 22, // Titanium
|
||||
V = 23, // Vanadium
|
||||
Cr = 24, // Chromium
|
||||
Mn = 25, // Manganese
|
||||
Fe = 26, // Iron
|
||||
Co = 27, // Cobalt
|
||||
Ni = 28, // Nickel
|
||||
Cu = 29, // Copper
|
||||
Zn = 30, // Zinc
|
||||
Ga = 31, // Gallium
|
||||
Ge = 32, // Germanium
|
||||
As = 33, // Arsenic
|
||||
Se = 34, // Selenium
|
||||
Br = 35, // Bromine
|
||||
Kr = 36, // Krypton
|
||||
|
||||
Rb = 37, // Rubidium
|
||||
Sr = 38, // Strontium
|
||||
Y = 39, // Yttrium
|
||||
Zr = 40, // Zirconium
|
||||
Nb = 41, // Niobium
|
||||
Mo = 42, // Molybdenum
|
||||
Tc = 43, // Technetium
|
||||
Ru = 44, // Ruthenium
|
||||
Rh = 45, // Rhodium
|
||||
Pd = 46, // Palladium
|
||||
Ag = 47, // Silver
|
||||
Cd = 48, // Cadmium
|
||||
In = 49, // Indium
|
||||
Sn = 50, // Tin
|
||||
Sb = 51, // Antimony
|
||||
Te = 52, // Tellurium
|
||||
I = 53, // Iodine
|
||||
Xe = 54, // Xenon
|
||||
Cs = 55, // Caesium
|
||||
Ba = 56, // Barium
|
||||
La = 57, // Lanthanum
|
||||
|
||||
Hf = 72, // Hafnium
|
||||
Ta = 73, // Tantalum
|
||||
W = 74, // Tungsten
|
||||
Re = 75, // Rhenium
|
||||
Os = 76, // Osmium
|
||||
Ir = 77, // Iridium
|
||||
Pt = 78, // Platinum
|
||||
Au = 79, // Gold
|
||||
Hg = 80, // Mercury
|
||||
Tl = 81, // Thallium
|
||||
Pb = 82, // Lead
|
||||
Bi = 83, // Bismuth
|
||||
Po = 84, // Polonium
|
||||
At = 85, // Astatine
|
||||
Rn = 86, // Radon
|
||||
Fr = 87, // Francium
|
||||
Ra = 88, // Radium
|
||||
Ac = 89, // Actinium
|
||||
|
||||
Rf = 104, // Rutherfordium
|
||||
Db = 105, // Dubnium
|
||||
Sg = 106, // Seaborgium
|
||||
Bh = 107, // Bohrium
|
||||
Hs = 108, // Hassium
|
||||
Mt = 109, // Meitnerium
|
||||
Ds = 110, // Darmstadtium
|
||||
Rg = 111, // Roentgenium
|
||||
Cn = 112, // Copernicium
|
||||
Nh = 113, // Nihonium
|
||||
Fl = 114, // Flerovium
|
||||
Mc = 115, // Moscovium
|
||||
Lv = 116, // Livermorium
|
||||
Ts = 117, // Tennessine
|
||||
Og = 118, // Oganesson
|
||||
|
||||
Ce = 58, // Cerium
|
||||
Pr = 59, // Praseodymium
|
||||
Nd = 60, // Neodymium
|
||||
Pm = 61, // Promethium
|
||||
Sm = 62, // Samarium
|
||||
Eu = 63, // Europium
|
||||
Gd = 64, // Gadolinium
|
||||
Tb = 65, // Terbium
|
||||
Dy = 66, // Dysprosium
|
||||
Ho = 67, // Holmium
|
||||
Er = 68, // Erbium
|
||||
Tm = 69, // Thulium
|
||||
Yb = 70, // Ytterbium
|
||||
Lu = 71, // Lutetium
|
||||
|
||||
Th = 90, // Thorium
|
||||
Pa = 91, // Protactinium
|
||||
U = 92, // Uranium
|
||||
Np = 93, // Neptunium
|
||||
Pu = 94, // Plutonium
|
||||
Am = 95, // Americium
|
||||
Cm = 96, // Curium
|
||||
Bk = 97, // Berkelium
|
||||
Cf = 98, // Californium
|
||||
Es = 99, // Einsteinium
|
||||
Fm = 100, // Fermium
|
||||
Md = 101, // Mendelevium
|
||||
No = 102, // Nobelium
|
||||
Lr = 103, // Lawrencium
|
||||
|
||||
D = 129, // Deuterium
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// atom_type_info
|
||||
|
||||
enum class radius_type
|
||||
{
|
||||
calculated,
|
||||
empirical,
|
||||
covalent_empirical,
|
||||
|
||||
single_bond,
|
||||
double_bond,
|
||||
triple_bond,
|
||||
|
||||
van_der_waals,
|
||||
|
||||
type_count
|
||||
};
|
||||
|
||||
constexpr size_t kRadiusTypeCount = static_cast<size_t>(radius_type::type_count);
|
||||
|
||||
enum class ionic_radius_type
|
||||
{
|
||||
effective, crystal
|
||||
};
|
||||
|
||||
struct atom_type_info
|
||||
{
|
||||
atom_type type;
|
||||
std::string name;
|
||||
std::string symbol;
|
||||
float weight;
|
||||
bool metal;
|
||||
float radii[kRadiusTypeCount];
|
||||
};
|
||||
|
||||
extern const atom_type_info kKnownAtoms[];
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// AtomTypeTraits
|
||||
|
||||
class atom_type_traits
|
||||
{
|
||||
public:
|
||||
atom_type_traits(atom_type a);
|
||||
atom_type_traits(const std::string &symbol);
|
||||
|
||||
atom_type type() const { return m_info->type; }
|
||||
std::string name() const { return m_info->name; }
|
||||
std::string symbol() const { return m_info->symbol; }
|
||||
float weight() const { return m_info->weight; }
|
||||
|
||||
bool is_metal() const { return m_info->metal; }
|
||||
|
||||
static bool is_element(const std::string &symbol);
|
||||
static bool is_metal(const std::string &symbol);
|
||||
|
||||
float radius(radius_type type = radius_type::single_bond) const
|
||||
{
|
||||
if (type >= radius_type::type_count)
|
||||
throw std::invalid_argument("invalid radius requested");
|
||||
return m_info->radii[static_cast<size_t>(type)] / 100.f;
|
||||
}
|
||||
|
||||
/// \brief Return the radius for a charged version of this atom in a solid crystal
|
||||
///
|
||||
/// \param charge The charge of the ion
|
||||
/// \return The radius of the ion
|
||||
float crystal_ionic_radius(int charge) const;
|
||||
|
||||
/// \brief Return the radius for a charged version of this atom in a non-solid environment
|
||||
///
|
||||
/// \param charge The charge of the ion
|
||||
/// \return The radius of the ion
|
||||
float effective_ionic_radius(int charge) const;
|
||||
|
||||
/// \brief Return the radius for a charged version of this atom, returns the effective radius by default
|
||||
///
|
||||
/// \param charge The charge of the ion
|
||||
/// \return The radius of the ion
|
||||
float ionic_radius(int charge, ionic_radius_type type = ionic_radius_type::effective) const
|
||||
{
|
||||
return type == ionic_radius_type::effective ? effective_ionic_radius(charge) : crystal_ionic_radius(charge);
|
||||
}
|
||||
|
||||
// data type encapsulating the Waasmaier & Kirfel scattering factors
|
||||
// in a simplified form (only a and b).
|
||||
// Added the electrion scattering factors as well
|
||||
struct SFData
|
||||
{
|
||||
double a[6], b[6];
|
||||
};
|
||||
|
||||
// to get the Cval and Siva values, use this constant as charge:
|
||||
enum
|
||||
{
|
||||
kWKSFVal = -99
|
||||
};
|
||||
|
||||
const SFData &wksf(int charge = 0) const;
|
||||
const SFData &elsf() const;
|
||||
|
||||
private:
|
||||
const struct atom_type_info *m_info;
|
||||
};
|
||||
|
||||
} // namespace pdbx
|
||||
587
include/cif++/category.hpp
Normal file
587
include/cif++/category.hpp
Normal file
@@ -0,0 +1,587 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include <cif++/forward_decl.hpp>
|
||||
|
||||
#include <cif++/condition.hpp>
|
||||
#include <cif++/iterator.hpp>
|
||||
#include <cif++/row.hpp>
|
||||
#include <cif++/validate.hpp>
|
||||
|
||||
// TODO: implement all of:
|
||||
// https://en.cppreference.com/w/cpp/named_req/Container
|
||||
// https://en.cppreference.com/w/cpp/named_req/SequenceContainer
|
||||
// and more?
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class category
|
||||
{
|
||||
public:
|
||||
friend class row_handle;
|
||||
|
||||
template <typename, typename...>
|
||||
friend class iterator_impl;
|
||||
|
||||
using value_type = row_handle;
|
||||
using reference = value_type;
|
||||
using const_reference = const value_type;
|
||||
using iterator = iterator_impl<category>;
|
||||
using const_iterator = iterator_impl<const category>;
|
||||
|
||||
category() = default;
|
||||
|
||||
category(std::string_view name);
|
||||
|
||||
category(const category &rhs);
|
||||
|
||||
category(category &&rhs);
|
||||
|
||||
category &operator=(const category &rhs);
|
||||
|
||||
category &operator=(category &&rhs);
|
||||
|
||||
~category();
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
const std::string &name() const { return m_name; }
|
||||
|
||||
iset fields() const;
|
||||
|
||||
std::set<uint16_t> key_field_indices() const;
|
||||
|
||||
void set_validator(const validator *v, datablock &db);
|
||||
void update_links(datablock &db);
|
||||
|
||||
const validator *get_validator() const { return m_validator; }
|
||||
const category_validator *get_cat_validator() const { return m_cat_validator; }
|
||||
|
||||
bool is_valid() const;
|
||||
bool validate_links() const;
|
||||
|
||||
bool operator==(const category &rhs) const;
|
||||
bool operator!=(const category &rhs) const
|
||||
{
|
||||
return not operator==(rhs);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
reference front()
|
||||
{
|
||||
return {*this, *m_head};
|
||||
}
|
||||
|
||||
const_reference front() const
|
||||
{
|
||||
return {const_cast<category &>(*this), const_cast<row &>(*m_head)};
|
||||
}
|
||||
|
||||
reference back()
|
||||
{
|
||||
return {*this, *m_tail};
|
||||
}
|
||||
|
||||
const_reference back() const
|
||||
{
|
||||
return {const_cast<category &>(*this), const_cast<row &>(*m_tail)};
|
||||
}
|
||||
|
||||
iterator begin()
|
||||
{
|
||||
return {*this, m_head};
|
||||
}
|
||||
|
||||
iterator end()
|
||||
{
|
||||
return {*this, nullptr};
|
||||
}
|
||||
|
||||
const_iterator begin() const
|
||||
{
|
||||
return {*this, m_head};
|
||||
}
|
||||
|
||||
const_iterator end() const
|
||||
{
|
||||
return {*this, nullptr};
|
||||
}
|
||||
|
||||
const_iterator cbegin() const
|
||||
{
|
||||
return {*this, m_head};
|
||||
}
|
||||
|
||||
const_iterator cend() const
|
||||
{
|
||||
return {*this, nullptr};
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
return std::distance(cbegin(), cend());
|
||||
}
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
return m_head == nullptr;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// A category can have a key, as defined by the validator/dictionary
|
||||
|
||||
/// @brief The key type
|
||||
using key_type = row_initializer;
|
||||
|
||||
/// @brief Return a row_handle for the row specified by \a key
|
||||
/// @param key The value for the key, fields specified in the dictionary should have a value
|
||||
/// @return The row found in the index, or an undefined row_handle
|
||||
row_handle operator[](const key_type &key);
|
||||
|
||||
const row_handle operator[](const key_type &key) const
|
||||
{
|
||||
return const_cast<category *>(this)->operator[](key);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
template <typename... Ts, typename... Ns>
|
||||
iterator_proxy<const category, Ts...> rows(Ns... names) const
|
||||
{
|
||||
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
|
||||
return iterator_proxy<const category, Ts...>(*this, begin(), {names...});
|
||||
}
|
||||
|
||||
template <typename... Ts, typename... Ns>
|
||||
iterator_proxy<category, Ts...> rows(Ns... names)
|
||||
{
|
||||
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
|
||||
return iterator_proxy<category, Ts...>(*this, begin(), {names...});
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
conditional_iterator_proxy<category> find(condition &&cond)
|
||||
{
|
||||
return find(begin(), std::forward<condition>(cond));
|
||||
}
|
||||
|
||||
conditional_iterator_proxy<category> find(iterator pos, condition &&cond)
|
||||
{
|
||||
return {*this, pos, std::forward<condition>(cond)};
|
||||
}
|
||||
|
||||
conditional_iterator_proxy<const category> find(condition &&cond) const
|
||||
{
|
||||
return find(cbegin(), std::forward<condition>(cond));
|
||||
}
|
||||
|
||||
conditional_iterator_proxy<const category> find(const_iterator pos, condition &&cond) const
|
||||
{
|
||||
return conditional_iterator_proxy<const category>{*this, pos, std::forward<condition>(cond)};
|
||||
}
|
||||
|
||||
template <typename... Ts, typename... Ns>
|
||||
conditional_iterator_proxy<category, Ts...> find(condition &&cond, Ns... names)
|
||||
{
|
||||
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
|
||||
return find<Ts...>(cbegin(), std::forward<condition>(cond), std::forward<Ns>(names)...);
|
||||
}
|
||||
|
||||
template <typename... Ts, typename... Ns>
|
||||
conditional_iterator_proxy<const category, Ts...> find(condition &&cond, Ns... names) const
|
||||
{
|
||||
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
|
||||
return find<Ts...>(cbegin(), std::forward<condition>(cond), std::forward<Ns>(names)...);
|
||||
}
|
||||
|
||||
template <typename... Ts, typename... Ns>
|
||||
conditional_iterator_proxy<category, Ts...> find(const_iterator pos, condition &&cond, Ns... names)
|
||||
{
|
||||
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
|
||||
return {*this, pos, std::forward<condition>(cond), std::forward<Ns>(names)...};
|
||||
}
|
||||
|
||||
template <typename... Ts, typename... Ns>
|
||||
conditional_iterator_proxy<const category, Ts...> find(const_iterator pos, condition &&cond, Ns... names) const
|
||||
{
|
||||
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
|
||||
return {*this, pos, std::forward<condition>(cond), std::forward<Ns>(names)...};
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// if you only expect a single row
|
||||
|
||||
row_handle find1(condition &&cond)
|
||||
{
|
||||
return find1(begin(), std::forward<condition>(cond));
|
||||
}
|
||||
|
||||
row_handle find1(iterator pos, condition &&cond)
|
||||
{
|
||||
auto h = find(pos, std::forward<condition>(cond));
|
||||
|
||||
return h.size() != 1 ? row_handle{} : *h.begin();
|
||||
}
|
||||
|
||||
const row_handle find1(condition &&cond) const
|
||||
{
|
||||
return find1(cbegin(), std::forward<condition>(cond));
|
||||
}
|
||||
|
||||
const row_handle find1(const_iterator pos, condition &&cond) const
|
||||
{
|
||||
auto h = find(pos, std::forward<condition>(cond));
|
||||
|
||||
return h.size() != 1 ? row_handle{} : *h.begin();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T find1(condition &&cond, const char *column) const
|
||||
{
|
||||
return find1<T>(cbegin(), std::forward<condition>(cond), column);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T find1(const_iterator pos, condition &&cond, const char *column) const
|
||||
{
|
||||
auto h = find<T>(pos, std::forward<condition>(cond), column);
|
||||
|
||||
return h.size() == 1 ? *h.begin() : T{};
|
||||
}
|
||||
|
||||
template <typename... Ts, typename... Cs, typename U = std::enable_if_t<sizeof...(Ts) != 1>>
|
||||
std::tuple<Ts...> find1(condition &&cond, Cs... columns) const
|
||||
{
|
||||
static_assert(sizeof...(Ts) == sizeof...(Cs), "The number of column titles should be equal to the number of types to return");
|
||||
// static_assert(std::is_same_v<Cs, const char*>..., "The column names should be const char");
|
||||
return find1<Ts...>(cbegin(), std::forward<condition>(cond), std::forward<Cs>(columns)...);
|
||||
}
|
||||
|
||||
template <typename... Ts, typename... Cs, typename U = std::enable_if_t<sizeof...(Ts) != 1>>
|
||||
std::tuple<Ts...> find1(const_iterator pos, condition &&cond, Cs... columns) const
|
||||
{
|
||||
static_assert(sizeof...(Ts) == sizeof...(Cs), "The number of column titles should be equal to the number of types to return");
|
||||
auto h = find<Ts...>(pos, std::forward<condition>(cond), std::forward<Cs>(columns)...);
|
||||
|
||||
return h.size() == 1 ? *h.begin() : std::tuple<Ts...>{};
|
||||
}
|
||||
|
||||
bool exists(condition &&cond) const
|
||||
{
|
||||
bool result = false;
|
||||
|
||||
if (cond)
|
||||
{
|
||||
cond.prepare(*this);
|
||||
|
||||
auto sh = cond.single();
|
||||
|
||||
if (sh.has_value() and *sh)
|
||||
result = true;
|
||||
else
|
||||
{
|
||||
for (auto r : *this)
|
||||
{
|
||||
if (cond(r))
|
||||
{
|
||||
result = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
bool has_children(row_handle r) const;
|
||||
bool has_parents(row_handle r) const;
|
||||
|
||||
std::vector<row_handle> get_children(row_handle r, const category &childCat) const;
|
||||
std::vector<row_handle> get_parents(row_handle r, const category &parentCat) const;
|
||||
std::vector<row_handle> get_linked(row_handle r, const category &cat) const;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
// void insert(const_iterator pos, const row_initializer &row)
|
||||
// {
|
||||
// insert_impl(pos, row);
|
||||
// }
|
||||
|
||||
// void insert(const_iterator pos, row_initializer &&row)
|
||||
// {
|
||||
// insert_impl(pos, std::move(row));
|
||||
// }
|
||||
|
||||
iterator erase(iterator pos);
|
||||
void erase(row_handle rh)
|
||||
{
|
||||
erase(iterator(*this, rh.m_row));
|
||||
}
|
||||
|
||||
size_t erase(condition &&cond);
|
||||
size_t erase(condition &&cond, std::function<void(row_handle)> &&visit);
|
||||
|
||||
iterator emplace(row_initializer &&ri)
|
||||
{
|
||||
return this->emplace(ri.begin(), ri.end());
|
||||
}
|
||||
|
||||
template <typename ItemIter>
|
||||
iterator emplace(ItemIter b, ItemIter e)
|
||||
{
|
||||
row *r = this->create_row();
|
||||
|
||||
try
|
||||
{
|
||||
for (auto i = b; i != e; ++i)
|
||||
{
|
||||
// item_value *new_item = this->create_item(*i);
|
||||
r->append(add_column(i->name()), { i->value() });
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (r != nullptr)
|
||||
this->delete_row(r);
|
||||
throw;
|
||||
}
|
||||
|
||||
return insert_impl(cend(), r);
|
||||
}
|
||||
|
||||
void clear();
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
/// \brief generate a new, unique ID. Pass it an ID generating function
|
||||
/// based on a sequence number. This function will be called until the
|
||||
/// result is unique in the context of this category
|
||||
std::string get_unique_id(std::function<std::string(int)> generator = cif::cif_id_for_number);
|
||||
std::string get_unique_id(const std::string &prefix)
|
||||
{
|
||||
return get_unique_id([prefix](int nr)
|
||||
{ return prefix + std::to_string(nr + 1); });
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
/// \brief Rename a single column in the rows that match \a cond to value \a value
|
||||
/// making sure the linked categories are updated according to the link.
|
||||
/// That means, child categories are updated if the links are absolute
|
||||
/// and unique. If they are not, the child category rows are split.
|
||||
|
||||
void update_value(condition &&cond, std::string_view tag, std::string_view value)
|
||||
{
|
||||
auto rs = find(std::move(cond));
|
||||
std::vector<row_handle> rows;
|
||||
std::copy(rs.begin(), rs.end(), std::back_inserter(rows));
|
||||
update_value(rows, tag, value);
|
||||
}
|
||||
|
||||
void update_value(const std::vector<row_handle> &rows, std::string_view tag, std::string_view value);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
/// \brief Return the index number for \a column_name
|
||||
|
||||
uint16_t get_column_ix(std::string_view column_name) const
|
||||
{
|
||||
uint16_t result;
|
||||
|
||||
for (result = 0; result < m_columns.size(); ++result)
|
||||
{
|
||||
if (iequals(column_name, m_columns[result].m_name))
|
||||
break;
|
||||
}
|
||||
|
||||
if (VERBOSE > 0 and result == m_columns.size() and m_cat_validator != nullptr) // validate the name, if it is known at all (since it was not found)
|
||||
{
|
||||
auto iv = m_cat_validator->get_validator_for_item(column_name);
|
||||
if (iv == nullptr)
|
||||
std::cerr << "Invalid name used '" << column_name << "' is not a known column in " + m_name << std::endl;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string_view get_column_name(uint16_t ix) const
|
||||
{
|
||||
if (ix >= m_columns.size())
|
||||
throw std::out_of_range("column index is out of range");
|
||||
|
||||
return m_columns[ix].m_name;
|
||||
}
|
||||
|
||||
uint16_t add_column(std::string_view column_name)
|
||||
{
|
||||
using namespace std::literals;
|
||||
|
||||
size_t result = get_column_ix(column_name);
|
||||
|
||||
if (result == m_columns.size())
|
||||
{
|
||||
const item_validator *item_validator = nullptr;
|
||||
|
||||
if (m_cat_validator != nullptr)
|
||||
{
|
||||
item_validator = m_cat_validator->get_validator_for_item(column_name);
|
||||
if (item_validator == nullptr)
|
||||
m_validator->report_error("tag " + std::string(column_name) + " not allowed in category " + m_name, false);
|
||||
}
|
||||
|
||||
m_columns.emplace_back(column_name, item_validator);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool has_column(std::string_view name) const
|
||||
{
|
||||
return get_column_ix(name) < m_columns.size();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void sort(std::function<int(row_handle,row_handle)> f);
|
||||
void reorder_by_index();
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::vector<std::string> get_tag_order() const;
|
||||
|
||||
void write(std::ostream &os) const;
|
||||
void write(std::ostream &os, const std::vector<std::string> &order, bool addMissingColumns = true);
|
||||
|
||||
private:
|
||||
void write(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyColumns) const;
|
||||
|
||||
public:
|
||||
friend std::ostream &operator<<(std::ostream &os, const category &cat)
|
||||
{
|
||||
cat.write(os);
|
||||
return os;
|
||||
}
|
||||
|
||||
private:
|
||||
void update_value(row *row, size_t column, std::string_view value, bool updateLinked, bool validate = true);
|
||||
|
||||
private:
|
||||
void erase_orphans(condition &&cond, category &parent);
|
||||
|
||||
using allocator_type = std::allocator<void>;
|
||||
|
||||
constexpr allocator_type get_allocator() const
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
using char_allocator_type = typename std::allocator_traits<allocator_type>::template rebind_alloc<char>;
|
||||
using char_allocator_traits = std::allocator_traits<char_allocator_type>;
|
||||
|
||||
using row_allocator_type = typename std::allocator_traits<allocator_type>::template rebind_alloc<row>;
|
||||
using row_allocator_traits = std::allocator_traits<row_allocator_type>;
|
||||
|
||||
row_allocator_traits::pointer get_row()
|
||||
{
|
||||
row_allocator_type ra(get_allocator());
|
||||
return row_allocator_traits::allocate(ra, 1);
|
||||
}
|
||||
|
||||
row *create_row()
|
||||
{
|
||||
auto p = this->get_row();
|
||||
row_allocator_type ra(get_allocator());
|
||||
row_allocator_traits::construct(ra, p);
|
||||
return p;
|
||||
}
|
||||
|
||||
row *clone_row(const row &r);
|
||||
|
||||
void delete_row(row *r);
|
||||
|
||||
row_handle create_copy(row_handle r);
|
||||
|
||||
struct item_column
|
||||
{
|
||||
std::string m_name;
|
||||
const item_validator *m_validator;
|
||||
|
||||
item_column(std::string_view name, const item_validator *validator)
|
||||
: m_name(name)
|
||||
, m_validator(validator)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
struct link
|
||||
{
|
||||
link(category *linked, const link_validator *v)
|
||||
: linked(linked)
|
||||
, v(v)
|
||||
{
|
||||
}
|
||||
|
||||
category *linked;
|
||||
const link_validator *v;
|
||||
};
|
||||
|
||||
// proxy methods for every insertion
|
||||
iterator insert_impl(const_iterator pos, row *n);
|
||||
iterator erase_impl(const_iterator pos);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
condition get_parents_condition(row_handle rh, const category &parentCat) const;
|
||||
condition get_children_condition(row_handle rh, const category &childCat) const;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void swap_item(size_t column_ix, row_handle &a, row_handle &b);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::string m_name;
|
||||
std::vector<item_column> m_columns;
|
||||
const validator *m_validator = nullptr;
|
||||
const category_validator *m_cat_validator = nullptr;
|
||||
std::vector<link> m_parent_links, m_child_links;
|
||||
bool m_cascade = true;
|
||||
uint32_t m_last_unique_num = 0;
|
||||
class category_index *m_index = nullptr;
|
||||
row *m_head = nullptr, *m_tail = nullptr;
|
||||
};
|
||||
|
||||
} // namespace cif
|
||||
@@ -1,17 +1,17 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2020-2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
@@ -26,7 +26,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
/// \file This file contains the definition for the class Compound, encapsulating
|
||||
/// \file This file contains the definition for the class compound, encapsulating
|
||||
/// the information found for compounds in the CCD.
|
||||
|
||||
#include <map>
|
||||
@@ -34,20 +34,20 @@
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "cif++/AtomType.hpp"
|
||||
#include "cif++/Cif++.hpp"
|
||||
#include <cif++.hpp>
|
||||
#include <cif++/atom_type.hpp>
|
||||
|
||||
namespace mmcif
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Compound;
|
||||
struct CompoundAtom;
|
||||
class CompoundFactoryImpl;
|
||||
class compound;
|
||||
struct compound_atom;
|
||||
class compound_factory_impl;
|
||||
|
||||
/// \brief The bond type as defined in the CCD, possible values taken from the mmcif_pdbx_v50 file
|
||||
enum class BondType
|
||||
/// \brief The bond type as defined in the CCD, possible values taken from the mmcif_pdbx file
|
||||
enum class bond_type
|
||||
{
|
||||
sing, // 'single bond'
|
||||
doub, // 'double bond'
|
||||
@@ -59,32 +59,32 @@ enum class BondType
|
||||
pi, // 'pi bond'
|
||||
};
|
||||
|
||||
std::string to_string(BondType bondType);
|
||||
BondType from_string(const std::string& bondType);
|
||||
std::string to_string(bond_type bondType);
|
||||
bond_type from_string(const std::string &bondType);
|
||||
|
||||
/// --------------------------------------------------------------------
|
||||
/// \brief struct containing information about an atom in a chemical compound.
|
||||
/// This is a subset of the available information. Contact the author if you need more fields.
|
||||
|
||||
struct CompoundAtom
|
||||
struct compound_atom
|
||||
{
|
||||
std::string id;
|
||||
AtomType typeSymbol;
|
||||
atom_type type_symbol;
|
||||
int charge = 0;
|
||||
bool aromatic = false;
|
||||
bool leavingAtom = false;
|
||||
bool stereoConfig = false;
|
||||
bool leaving_atom = false;
|
||||
bool stereo_config = false;
|
||||
float x, y, z;
|
||||
};
|
||||
|
||||
/// --------------------------------------------------------------------
|
||||
/// \brief struct containing information about the bonds
|
||||
|
||||
struct CompoundBond
|
||||
struct compound_bond
|
||||
{
|
||||
std::string atomID[2];
|
||||
BondType type;
|
||||
bool aromatic = false, stereoConfig = false;
|
||||
std::string atom_id[2];
|
||||
bond_type type;
|
||||
bool aromatic = false, stereo_config = false;
|
||||
};
|
||||
|
||||
/// --------------------------------------------------------------------
|
||||
@@ -95,62 +95,56 @@ struct CompoundBond
|
||||
/// compound definitions by calling the addExtraComponents function and
|
||||
/// pass it a valid CCD formatted file.
|
||||
|
||||
class Compound
|
||||
class compound
|
||||
{
|
||||
public:
|
||||
|
||||
// accessors
|
||||
|
||||
std::string id() const { return mID; }
|
||||
std::string name() const { return mName; }
|
||||
std::string type() const { return mType; }
|
||||
std::string formula() const { return mFormula; }
|
||||
float formulaWeight() const { return mFormulaWeight; }
|
||||
int formalCharge() const { return mFormalCharge; }
|
||||
std::string id() const { return m_id; }
|
||||
std::string name() const { return m_name; }
|
||||
std::string type() const { return m_type; }
|
||||
std::string group() const { return m_group; }
|
||||
std::string formula() const { return m_formula; }
|
||||
float formula_weight() const { return m_formula_weight; }
|
||||
int formal_charge() const { return m_formal_charge; }
|
||||
|
||||
const std::vector<CompoundAtom> &atoms() const { return mAtoms; }
|
||||
const std::vector<CompoundBond> &bonds() const { return mBonds; }
|
||||
const std::vector<compound_atom> &atoms() const { return m_atoms; }
|
||||
const std::vector<compound_bond> &bonds() const { return m_bonds; }
|
||||
|
||||
CompoundAtom getAtomByID(const std::string &atomID) const;
|
||||
compound_atom get_atom_by_atom_id(const std::string &atom_id) const;
|
||||
|
||||
bool atomsBonded(const std::string &atomId_1, const std::string &atomId_2) const;
|
||||
// float atomBondValue(const std::string &atomId_1, const std::string &atomId_2) const;
|
||||
// float bondAngle(const std::string &atomId_1, const std::string &atomId_2, const std::string &atomId_3) const;
|
||||
// float chiralVolume(const std::string ¢reID) const;
|
||||
bool atoms_bonded(const std::string &atomId_1, const std::string &atomId_2) const;
|
||||
|
||||
bool isWater() const
|
||||
bool is_water() const
|
||||
{
|
||||
return mID == "HOH" or mID == "H2O" or mID == "WAT";
|
||||
return m_id == "HOH" or m_id == "H2O" or m_id == "WAT";
|
||||
}
|
||||
|
||||
private:
|
||||
friend class compound_factory_impl;
|
||||
friend class CCD_compound_factory_impl;
|
||||
friend class CCP4_compound_factory_impl;
|
||||
|
||||
friend class CompoundFactoryImpl;
|
||||
friend class CCDCompoundFactoryImpl;
|
||||
friend class CCP4CompoundFactoryImpl;
|
||||
compound(cif::datablock &db);
|
||||
compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group);
|
||||
|
||||
Compound(cif::Datablock &db);
|
||||
Compound(cif::Datablock &db, const std::string &id, const std::string &name, const std::string &type);
|
||||
|
||||
std::string mID;
|
||||
std::string mName;
|
||||
std::string mType;
|
||||
std::string mFormula;
|
||||
float mFormulaWeight = 0;
|
||||
int mFormalCharge = 0;
|
||||
std::vector<CompoundAtom> mAtoms;
|
||||
std::vector<CompoundBond> mBonds;
|
||||
std::string m_id;
|
||||
std::string m_name;
|
||||
std::string m_type;
|
||||
std::string m_group;
|
||||
std::string m_formula;
|
||||
float m_formula_weight = 0;
|
||||
int m_formal_charge = 0;
|
||||
std::vector<compound_atom> m_atoms;
|
||||
std::vector<compound_bond> m_bonds;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Factory class for Compound and Link objects
|
||||
// Factory class for compound and Link objects
|
||||
|
||||
CIFPP_EXPORT extern const std::map<std::string, char> kAAMap, kBaseMap;
|
||||
|
||||
class CompoundFactory
|
||||
class compound_factory
|
||||
{
|
||||
public:
|
||||
|
||||
/// \brief Initialise a singleton instance.
|
||||
///
|
||||
/// If you have a multithreaded application and want to have different
|
||||
@@ -159,37 +153,39 @@ class CompoundFactory
|
||||
/// flag to true.
|
||||
|
||||
static void init(bool useThreadLocalInstanceOnly);
|
||||
static CompoundFactory &instance();
|
||||
static compound_factory &instance();
|
||||
static void clear();
|
||||
|
||||
void setDefaultDictionary(const std::filesystem::path &inDictFile);
|
||||
void pushDictionary(const std::filesystem::path &inDictFile);
|
||||
void popDictionary();
|
||||
void set_default_dictionary(const std::filesystem::path &inDictFile);
|
||||
void push_dictionary(const std::filesystem::path &inDictFile);
|
||||
void pop_dictionary();
|
||||
|
||||
bool isKnownPeptide(const std::string &res_name) const;
|
||||
bool isKnownBase(const std::string &res_name) const;
|
||||
bool is_known_peptide(const std::string &res_name) const;
|
||||
bool is_known_base(const std::string &res_name) const;
|
||||
|
||||
/// \brief Create the Compound object for \a id
|
||||
/// \brief Create the compound object for \a id
|
||||
///
|
||||
/// This will create the Compound instance for \a id if it doesn't exist already.
|
||||
/// This will create the compound instance for \a id if it doesn't exist already.
|
||||
/// The result is owned by this factory and should not be deleted by the user.
|
||||
/// \param id The Compound ID, a three letter code usually
|
||||
/// \param id The compound ID, a three letter code usually
|
||||
/// \result The compound, or nullptr if it could not be created (missing info)
|
||||
const Compound *create(std::string id);
|
||||
const compound *create(std::string id);
|
||||
|
||||
~CompoundFactory();
|
||||
~compound_factory();
|
||||
|
||||
static const std::map<std::string, char> kAAMap, kBaseMap;
|
||||
|
||||
private:
|
||||
CompoundFactory();
|
||||
compound_factory();
|
||||
|
||||
CompoundFactory(const CompoundFactory &) = delete;
|
||||
CompoundFactory &operator=(const CompoundFactory &) = delete;
|
||||
compound_factory(const compound_factory &) = delete;
|
||||
compound_factory &operator=(const compound_factory &) = delete;
|
||||
|
||||
static std::unique_ptr<CompoundFactory> sInstance;
|
||||
static thread_local std::unique_ptr<CompoundFactory> tlInstance;
|
||||
static bool sUseThreadLocalInstance;
|
||||
static std::unique_ptr<compound_factory> s_instance;
|
||||
static thread_local std::unique_ptr<compound_factory> tl_instance;
|
||||
static bool s_use_thread_local_instance;
|
||||
|
||||
std::shared_ptr<CompoundFactoryImpl> mImpl;
|
||||
std::shared_ptr<compound_factory_impl> m_impl;
|
||||
};
|
||||
|
||||
} // namespace mmcif
|
||||
} // namespace pdbx
|
||||
745
include/cif++/condition.hpp
Normal file
745
include/cif++/condition.hpp
Normal file
@@ -0,0 +1,745 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
#include <utility>
|
||||
|
||||
#include <cif++/row.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// let's make life easier
|
||||
|
||||
iset get_category_fields(const category &cat);
|
||||
uint16_t get_column_ix(const category &cat, std::string_view col);
|
||||
bool is_column_type_uchar(const category &cat, std::string_view col);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// some more templates to be able to do querying
|
||||
|
||||
namespace detail
|
||||
{
|
||||
struct condition_impl
|
||||
{
|
||||
virtual ~condition_impl() {}
|
||||
|
||||
virtual condition_impl *prepare(const category &c) { return this; }
|
||||
virtual bool test(row_handle r) const = 0;
|
||||
virtual void str(std::ostream &os) const = 0;
|
||||
virtual std::optional<row_handle> single() const { return {}; };
|
||||
};
|
||||
|
||||
struct all_condition_impl : public condition_impl
|
||||
{
|
||||
bool test(row_handle r) const override { return true; }
|
||||
void str(std::ostream &os) const override { os << "*"; }
|
||||
};
|
||||
|
||||
struct or_condition_impl;
|
||||
struct and_condition_impl;
|
||||
struct not_condition_impl;
|
||||
} // namespace detail
|
||||
|
||||
class condition
|
||||
{
|
||||
public:
|
||||
using condition_impl = detail::condition_impl;
|
||||
|
||||
condition()
|
||||
: m_impl(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
explicit condition(condition_impl *impl)
|
||||
: m_impl(impl)
|
||||
{
|
||||
}
|
||||
|
||||
condition(const condition &) = delete;
|
||||
|
||||
condition(condition &&rhs) noexcept
|
||||
: m_impl(nullptr)
|
||||
{
|
||||
std::swap(m_impl, rhs.m_impl);
|
||||
}
|
||||
|
||||
condition &operator=(const condition &) = delete;
|
||||
|
||||
condition &operator=(condition &&rhs) noexcept
|
||||
{
|
||||
std::swap(m_impl, rhs.m_impl);
|
||||
return *this;
|
||||
}
|
||||
|
||||
~condition()
|
||||
{
|
||||
delete m_impl;
|
||||
m_impl = nullptr;
|
||||
}
|
||||
|
||||
void prepare(const category &c);
|
||||
|
||||
bool operator()(row_handle r) const
|
||||
{
|
||||
assert(this->m_impl != nullptr);
|
||||
assert(this->m_prepared);
|
||||
return m_impl ? m_impl->test(r) : false;
|
||||
}
|
||||
|
||||
explicit operator bool() { return not empty(); }
|
||||
bool empty() const { return m_impl == nullptr; }
|
||||
|
||||
std::optional<row_handle> single() const
|
||||
{
|
||||
return m_impl ? m_impl->single() : std::optional<row_handle>();
|
||||
}
|
||||
|
||||
friend condition operator||(condition &&a, condition &&b);
|
||||
friend condition operator&&(condition &&a, condition &&b);
|
||||
|
||||
friend struct detail::or_condition_impl;
|
||||
friend struct detail::and_condition_impl;
|
||||
friend struct detail::not_condition_impl;
|
||||
|
||||
void swap(condition &rhs)
|
||||
{
|
||||
std::swap(m_impl, rhs.m_impl);
|
||||
std::swap(m_prepared, rhs.m_prepared);
|
||||
}
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const condition &cond)
|
||||
{
|
||||
if (cond.m_impl)
|
||||
cond.m_impl->str(os);
|
||||
return os;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void optimise(condition_impl *&impl);
|
||||
|
||||
condition_impl *m_impl;
|
||||
bool m_prepared = false;
|
||||
};
|
||||
|
||||
namespace detail
|
||||
{
|
||||
struct key_is_empty_condition_impl : public condition_impl
|
||||
{
|
||||
key_is_empty_condition_impl(const std::string &item_tag)
|
||||
: m_item_tag(item_tag)
|
||||
{
|
||||
}
|
||||
|
||||
condition_impl *prepare(const category &c) override
|
||||
{
|
||||
m_item_ix = get_column_ix(c, m_item_tag);
|
||||
return this;
|
||||
}
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
return r[m_item_ix].empty();
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << m_item_tag << " IS NULL";
|
||||
}
|
||||
|
||||
std::string m_item_tag;
|
||||
size_t m_item_ix = 0;
|
||||
};
|
||||
|
||||
struct key_equals_condition_impl : public condition_impl
|
||||
{
|
||||
key_equals_condition_impl(item &&i)
|
||||
: m_item_tag(i.name())
|
||||
, m_value(i.value())
|
||||
{
|
||||
}
|
||||
|
||||
condition_impl *prepare(const category &c) override;
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
return m_single_hit.has_value() ?
|
||||
*m_single_hit == r :
|
||||
r[m_item_ix].compare(m_value, m_icase) == 0;
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << m_item_tag << (m_icase ? "^ " : " ") << " == " << m_value;
|
||||
}
|
||||
|
||||
virtual std::optional<row_handle> single() const override
|
||||
{
|
||||
return m_single_hit;
|
||||
}
|
||||
|
||||
std::string m_item_tag;
|
||||
size_t m_item_ix = 0;
|
||||
bool m_icase = false;
|
||||
std::string m_value;
|
||||
std::optional<row_handle> m_single_hit;
|
||||
};
|
||||
|
||||
struct key_equals_or_empty_condition_impl : public condition_impl
|
||||
{
|
||||
key_equals_or_empty_condition_impl(key_equals_condition_impl *equals, key_is_empty_condition_impl *empty)
|
||||
: m_item_tag(equals->m_item_tag)
|
||||
, m_value(equals->m_value)
|
||||
, m_icase(equals->m_icase)
|
||||
, m_single_hit(equals->m_single_hit)
|
||||
{
|
||||
assert(empty->m_item_ix == equals->m_item_ix);
|
||||
}
|
||||
|
||||
condition_impl *prepare(const category &c) override
|
||||
{
|
||||
m_item_ix = get_column_ix(c, m_item_tag);
|
||||
m_icase = is_column_type_uchar(c, m_item_tag);
|
||||
return this;
|
||||
}
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
bool result = false;
|
||||
if (m_single_hit.has_value())
|
||||
result = *m_single_hit == r;
|
||||
else
|
||||
result = r[m_item_ix].empty() or r[m_item_ix].compare(m_value, m_icase) == 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << m_item_tag << (m_icase ? "^ " : " ") << " == " << m_value << " OR " << m_item_tag << " IS NULL";
|
||||
}
|
||||
|
||||
virtual std::optional<row_handle> single() const override
|
||||
{
|
||||
return m_single_hit;
|
||||
}
|
||||
|
||||
std::string m_item_tag;
|
||||
size_t m_item_ix = 0;
|
||||
std::string m_value;
|
||||
bool m_icase = false;
|
||||
std::optional<row_handle> m_single_hit;
|
||||
};
|
||||
|
||||
struct key_compare_condition_impl : public condition_impl
|
||||
{
|
||||
template <typename COMP>
|
||||
key_compare_condition_impl(const std::string &item_tag, COMP &&comp, const std::string &s)
|
||||
: m_item_tag(item_tag)
|
||||
, m_compare(std::move(comp))
|
||||
, m_str(s)
|
||||
{
|
||||
}
|
||||
|
||||
condition_impl *prepare(const category &c) override
|
||||
{
|
||||
m_item_ix = get_column_ix(c, m_item_tag);
|
||||
m_icase = is_column_type_uchar(c, m_item_tag);
|
||||
return this;
|
||||
}
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
return m_compare(r, m_icase);
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << m_item_tag << (m_icase ? "^ " : " ") << m_str;
|
||||
}
|
||||
|
||||
std::string m_item_tag;
|
||||
size_t m_item_ix = 0;
|
||||
bool m_icase = false;
|
||||
std::function<bool(row_handle, bool)> m_compare;
|
||||
std::string m_str;
|
||||
};
|
||||
|
||||
struct key_matches_condition_impl : public condition_impl
|
||||
{
|
||||
key_matches_condition_impl(const std::string &item_tag, const std::regex &rx)
|
||||
: m_item_tag(item_tag)
|
||||
, m_item_ix(0)
|
||||
, mRx(rx)
|
||||
{
|
||||
}
|
||||
|
||||
condition_impl *prepare(const category &c) override
|
||||
{
|
||||
m_item_ix = get_column_ix(c, m_item_tag);
|
||||
return this;
|
||||
}
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
std::string_view txt = r[m_item_ix].text();
|
||||
return std::regex_match(txt.begin(), txt.end(), mRx);
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << m_item_tag << " =~ expression";
|
||||
}
|
||||
|
||||
std::string m_item_tag;
|
||||
size_t m_item_ix;
|
||||
std::regex mRx;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct any_is_condition_impl : public condition_impl
|
||||
{
|
||||
typedef T valueType;
|
||||
|
||||
any_is_condition_impl(const valueType &value)
|
||||
: mValue(value)
|
||||
{
|
||||
}
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
auto &c = r.get_category();
|
||||
|
||||
bool result = false;
|
||||
for (auto &f : get_category_fields(c))
|
||||
{
|
||||
try
|
||||
{
|
||||
if (r[f].compare(mValue) == 0)
|
||||
{
|
||||
result = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << "<any> == " << mValue;
|
||||
}
|
||||
|
||||
valueType mValue;
|
||||
};
|
||||
|
||||
struct any_matches_condition_impl : public condition_impl
|
||||
{
|
||||
any_matches_condition_impl(const std::regex &rx)
|
||||
: mRx(rx)
|
||||
{
|
||||
}
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
auto &c = r.get_category();
|
||||
|
||||
bool result = false;
|
||||
for (auto &f : get_category_fields(c))
|
||||
{
|
||||
try
|
||||
{
|
||||
std::string_view txt = r[f].text();
|
||||
if (std::regex_match(txt.begin(), txt.end(), mRx))
|
||||
{
|
||||
result = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << "<any> =~ expression";
|
||||
}
|
||||
|
||||
std::regex mRx;
|
||||
};
|
||||
|
||||
// TODO: Optimize and_condition by having a list of sub items.
|
||||
// That way you can also collapse multiple _is_ conditions in
|
||||
// case they make up an indexed tuple.
|
||||
struct and_condition_impl : public condition_impl
|
||||
{
|
||||
and_condition_impl(condition &&a, condition &&b)
|
||||
{
|
||||
mSub.emplace_back(std::exchange(a.m_impl, nullptr));
|
||||
mSub.emplace_back(std::exchange(b.m_impl, nullptr));
|
||||
}
|
||||
|
||||
~and_condition_impl()
|
||||
{
|
||||
for (auto sub : mSub)
|
||||
delete sub;
|
||||
}
|
||||
|
||||
condition_impl *prepare(const category &c) override;
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
bool result = true;
|
||||
|
||||
for (auto sub : mSub)
|
||||
{
|
||||
if (sub->test(r))
|
||||
continue;
|
||||
|
||||
result = false;
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << '(';
|
||||
|
||||
bool first = true;
|
||||
for (auto sub : mSub)
|
||||
{
|
||||
if (first)
|
||||
first = false;
|
||||
else
|
||||
os << " AND ";
|
||||
|
||||
sub->str(os);
|
||||
}
|
||||
|
||||
os << ')';
|
||||
}
|
||||
|
||||
virtual std::optional<row_handle> single() const override
|
||||
{
|
||||
std::optional<row_handle> result;
|
||||
|
||||
for (auto sub : mSub)
|
||||
{
|
||||
auto s = sub->single();
|
||||
|
||||
if (not result.has_value())
|
||||
{
|
||||
result = s;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (s == result)
|
||||
continue;
|
||||
|
||||
result.reset();
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<condition_impl *> mSub;
|
||||
};
|
||||
|
||||
struct or_condition_impl : public condition_impl
|
||||
{
|
||||
or_condition_impl(condition &&a, condition &&b)
|
||||
: mA(nullptr)
|
||||
, mB(nullptr)
|
||||
{
|
||||
std::swap(mA, a.m_impl);
|
||||
std::swap(mB, b.m_impl);
|
||||
}
|
||||
|
||||
~or_condition_impl()
|
||||
{
|
||||
delete mA;
|
||||
delete mB;
|
||||
}
|
||||
|
||||
condition_impl *prepare(const category &c) override;
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
return mA->test(r) or mB->test(r);
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << '(';
|
||||
mA->str(os);
|
||||
os << ") OR (";
|
||||
mB->str(os);
|
||||
os << ')';
|
||||
}
|
||||
|
||||
virtual std::optional<row_handle> single() const override
|
||||
{
|
||||
auto sa = mA->single();
|
||||
auto sb = mB->single();
|
||||
|
||||
if (sa.has_value() and sb.has_value() and sa != sb)
|
||||
sa.reset();
|
||||
else if (not sa.has_value())
|
||||
sa = sb;
|
||||
|
||||
return sa;
|
||||
}
|
||||
|
||||
condition_impl *mA;
|
||||
condition_impl *mB;
|
||||
};
|
||||
|
||||
struct not_condition_impl : public condition_impl
|
||||
{
|
||||
not_condition_impl(condition &&a)
|
||||
: mA(nullptr)
|
||||
{
|
||||
std::swap(mA, a.m_impl);
|
||||
}
|
||||
|
||||
~not_condition_impl()
|
||||
{
|
||||
delete mA;
|
||||
}
|
||||
|
||||
condition_impl *prepare(const category &c) override
|
||||
{
|
||||
mA = mA->prepare(c);
|
||||
return this;
|
||||
}
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
return not mA->test(r);
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << "NOT (";
|
||||
mA->str(os);
|
||||
os << ')';
|
||||
}
|
||||
|
||||
condition_impl *mA;
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
inline condition operator&&(condition &&a, condition &&b)
|
||||
{
|
||||
if (a.m_impl and b.m_impl)
|
||||
return condition(new detail::and_condition_impl(std::move(a), std::move(b)));
|
||||
if (a.m_impl)
|
||||
return condition(std::move(a));
|
||||
return condition(std::move(b));
|
||||
}
|
||||
|
||||
inline condition operator||(condition &&a, condition &&b)
|
||||
{
|
||||
if (a.m_impl and b.m_impl)
|
||||
return condition(new detail::or_condition_impl(std::move(a), std::move(b)));
|
||||
if (a.m_impl)
|
||||
return condition(std::move(a));
|
||||
return condition(std::move(b));
|
||||
}
|
||||
|
||||
struct empty_type
|
||||
{
|
||||
};
|
||||
|
||||
/// \brief A helper to make it possible to have conditions like ("id"_key == cif::null)
|
||||
|
||||
inline constexpr empty_type null = empty_type();
|
||||
|
||||
struct key
|
||||
{
|
||||
explicit key(const std::string &itemTag)
|
||||
: m_item_tag(itemTag)
|
||||
{
|
||||
}
|
||||
|
||||
explicit key(const char *itemTag)
|
||||
: m_item_tag(itemTag)
|
||||
{
|
||||
}
|
||||
|
||||
key(const key &) = delete;
|
||||
key &operator=(const key &) = delete;
|
||||
|
||||
std::string m_item_tag;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
condition operator==(const key &key, const T &v)
|
||||
{
|
||||
return condition(new detail::key_equals_condition_impl({ key.m_item_tag, v }));
|
||||
}
|
||||
|
||||
inline condition operator==(const key &key, const char *value)
|
||||
{
|
||||
if (value != nullptr and *value != 0)
|
||||
return condition(new detail::key_equals_condition_impl({ key.m_item_tag, value }));
|
||||
else
|
||||
return condition(new detail::key_is_empty_condition_impl(key.m_item_tag));
|
||||
}
|
||||
|
||||
// inline condition_t operator==(const key& key, const detail::ItemReference& v)
|
||||
// {
|
||||
// if (v.empty())
|
||||
// return condition_t(new detail::key_is_empty_condition_impl(key.m_item_tag));
|
||||
// else
|
||||
// return condition_t(new detail::key_compare_condition_impl(key.m_item_tag, [tag = key.m_item_tag, v](const category& c, const row& r, bool icase)
|
||||
// { return r[tag].template compare<(v, icase) == 0; }));
|
||||
// }
|
||||
|
||||
template <typename T>
|
||||
condition operator!=(const key &key, const T &v)
|
||||
{
|
||||
return condition(new detail::not_condition_impl(operator==(key, v)));
|
||||
}
|
||||
|
||||
inline condition operator!=(const key &key, const char *v)
|
||||
{
|
||||
std::string value(v ? v : "");
|
||||
return condition(new detail::not_condition_impl(operator==(key, value)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
condition operator>(const key &key, const T &v)
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << " > " << v;
|
||||
|
||||
return condition(new detail::key_compare_condition_impl(
|
||||
key.m_item_tag, [tag = key.m_item_tag, v](row_handle r, bool icase)
|
||||
{ return r[tag].template compare<T>(v, icase) > 0; },
|
||||
s.str()));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
condition operator>=(const key &key, const T &v)
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << " >= " << v;
|
||||
|
||||
return condition(new detail::key_compare_condition_impl(
|
||||
key.m_item_tag, [tag = key.m_item_tag, v](row_handle r, bool icase)
|
||||
{ return r[tag].template compare<T>(v, icase) >= 0; },
|
||||
s.str()));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
condition operator<(const key &key, const T &v)
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << " < " << v;
|
||||
|
||||
return condition(new detail::key_compare_condition_impl(
|
||||
key.m_item_tag, [tag = key.m_item_tag, v](row_handle r, bool icase)
|
||||
{ return r[tag].template compare<T>(v, icase) < 0; },
|
||||
s.str()));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
condition operator<=(const key &key, const T &v)
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << " <= " << v;
|
||||
|
||||
return condition(new detail::key_compare_condition_impl(
|
||||
key.m_item_tag, [tag = key.m_item_tag, v](row_handle r, bool icase)
|
||||
{ return r[tag].template compare<T>(v, icase) <= 0; },
|
||||
s.str()));
|
||||
}
|
||||
|
||||
inline condition operator==(const key &key, const std::regex &rx)
|
||||
{
|
||||
return condition(new detail::key_matches_condition_impl(key.m_item_tag, rx));
|
||||
}
|
||||
|
||||
inline condition operator==(const key &key, const empty_type &)
|
||||
{
|
||||
return condition(new detail::key_is_empty_condition_impl(key.m_item_tag));
|
||||
}
|
||||
|
||||
inline condition operator !(condition &&rhs)
|
||||
{
|
||||
return condition(new detail::not_condition_impl(std::move(rhs)));
|
||||
}
|
||||
|
||||
struct any_type
|
||||
{
|
||||
};
|
||||
|
||||
inline constexpr any_type any = any_type{};
|
||||
|
||||
template <typename T>
|
||||
condition operator==(const any_type &, const T &v)
|
||||
{
|
||||
return condition(new detail::any_is_condition_impl<T>(v));
|
||||
}
|
||||
|
||||
inline condition operator==(const any_type &, const std::regex &rx)
|
||||
{
|
||||
return condition(new detail::any_matches_condition_impl(rx));
|
||||
}
|
||||
|
||||
inline condition all()
|
||||
{
|
||||
return condition(new detail::all_condition_impl());
|
||||
}
|
||||
|
||||
namespace literals
|
||||
{
|
||||
inline key operator""_key(const char *text, size_t length)
|
||||
{
|
||||
return key(std::string(text, length));
|
||||
}
|
||||
} // namespace literals
|
||||
|
||||
} // namespace cif
|
||||
100
include/cif++/datablock.hpp
Normal file
100
include/cif++/datablock.hpp
Normal file
@@ -0,0 +1,100 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++/forward_decl.hpp>
|
||||
|
||||
#include <cif++/category.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class datablock : public std::list<category>
|
||||
{
|
||||
public:
|
||||
datablock() = default;
|
||||
|
||||
datablock(std::string_view name)
|
||||
: m_name(name)
|
||||
{
|
||||
}
|
||||
|
||||
datablock(const datablock &) = default;
|
||||
|
||||
datablock(datablock &&) = default;
|
||||
|
||||
datablock &operator=(const datablock &) = default;
|
||||
datablock &operator=(datablock &&) = default;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
const std::string &name() const { return m_name; }
|
||||
|
||||
void set_name(std::string_view name)
|
||||
{
|
||||
m_name = name;
|
||||
}
|
||||
|
||||
void set_validator(const validator *v);
|
||||
|
||||
const validator *get_validator() const;
|
||||
|
||||
bool is_valid() const;
|
||||
bool validate_links() const;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
category &operator[](std::string_view name);
|
||||
const category &operator[](std::string_view name) const;
|
||||
|
||||
category *get(std::string_view name);
|
||||
const category *get(std::string_view name) const;
|
||||
|
||||
std::tuple<iterator, bool> emplace(std::string_view name);
|
||||
|
||||
std::vector<std::string> get_tag_order() const;
|
||||
void write(std::ostream &os) const;
|
||||
void write(std::ostream &os, const std::vector<std::string> &tag_order);
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const datablock &db)
|
||||
{
|
||||
db.write(os);
|
||||
return os;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
bool operator==(const datablock &rhs) const;
|
||||
|
||||
private:
|
||||
std::string m_name;
|
||||
const validator *m_validator = nullptr;
|
||||
};
|
||||
|
||||
} // namespace cif
|
||||
36
include/cif++/dictionary_parser.hpp
Normal file
36
include/cif++/dictionary_parser.hpp
Normal file
@@ -0,0 +1,36 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++/validate.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
validator parse_dictionary(std::string_view name, std::istream &is);
|
||||
|
||||
} // namespace cif
|
||||
122
include/cif++/file.hpp
Normal file
122
include/cif++/file.hpp
Normal file
@@ -0,0 +1,122 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <list>
|
||||
|
||||
#include <cif++/datablock.hpp>
|
||||
#include <cif++/parser.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class file : public std::list<datablock>
|
||||
{
|
||||
public:
|
||||
file() = default;
|
||||
|
||||
explicit file(const std::filesystem::path &p)
|
||||
{
|
||||
load(p);
|
||||
}
|
||||
|
||||
explicit file(std::istream &is)
|
||||
{
|
||||
load(is);
|
||||
}
|
||||
|
||||
explicit file(const char *data, size_t length)
|
||||
{
|
||||
struct membuf : public std::streambuf
|
||||
{
|
||||
membuf(char *text, size_t length)
|
||||
{
|
||||
this->setg(text, text, text + length);
|
||||
}
|
||||
} buffer(const_cast<char *>(data), length);
|
||||
|
||||
std::istream is(&buffer);
|
||||
load(is);
|
||||
}
|
||||
|
||||
file(const file &) = default;
|
||||
file(file &&) = default;
|
||||
file &operator=(const file &) = default;
|
||||
file &operator=(file &&) = default;
|
||||
|
||||
void set_validator(const validator *v);
|
||||
|
||||
const validator *get_validator() const
|
||||
{
|
||||
return m_validator;
|
||||
}
|
||||
|
||||
bool is_valid() const;
|
||||
bool is_valid();
|
||||
bool validate_links() const;
|
||||
|
||||
void load_dictionary();
|
||||
void load_dictionary(std::string_view name);
|
||||
|
||||
bool contains(std::string_view name) const;
|
||||
|
||||
datablock &front()
|
||||
{
|
||||
assert(not empty());
|
||||
return std::list<datablock>::front();
|
||||
}
|
||||
|
||||
const datablock &front() const
|
||||
{
|
||||
assert(not empty());
|
||||
return std::list<datablock>::front();
|
||||
}
|
||||
|
||||
datablock &operator[](std::string_view name);
|
||||
const datablock &operator[](std::string_view name) const;
|
||||
|
||||
std::tuple<iterator, bool> emplace(std::string_view name);
|
||||
|
||||
void load(const std::filesystem::path &p);
|
||||
void load(std::istream &is);
|
||||
|
||||
void save(const std::filesystem::path &p) const;
|
||||
void save(std::ostream &os) const;
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const file &f)
|
||||
{
|
||||
f.save(os);
|
||||
return os;
|
||||
}
|
||||
|
||||
private:
|
||||
const validator *m_validator = nullptr;
|
||||
};
|
||||
|
||||
} // namespace cif
|
||||
221
include/cif++/format.hpp
Normal file
221
include/cif++/format.hpp
Normal file
@@ -0,0 +1,221 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
/// \file format.hpp
|
||||
/// File containing a basic reimplementation of boost::format
|
||||
/// but then a bit more simplistic. Still this allowed me to move my code
|
||||
/// from using boost::format to something without external dependency easily.
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
template <typename T>
|
||||
struct to_varg
|
||||
{
|
||||
using type = T;
|
||||
|
||||
to_varg(const T &v)
|
||||
: m_value(v)
|
||||
{
|
||||
}
|
||||
|
||||
type operator*() { return m_value; }
|
||||
|
||||
T m_value;
|
||||
};
|
||||
|
||||
// template <>
|
||||
// struct to_varg<char>
|
||||
// {
|
||||
// using type = const char *;
|
||||
|
||||
// to_varg(const char &v)
|
||||
// : m_value({ v })
|
||||
// {
|
||||
// }
|
||||
|
||||
// type operator*() { return m_value.c_str(); }
|
||||
|
||||
// std::string m_value;
|
||||
// };
|
||||
|
||||
template <>
|
||||
struct to_varg<const char *>
|
||||
{
|
||||
using type = const char *;
|
||||
|
||||
to_varg(const char *v)
|
||||
: m_value(v)
|
||||
{
|
||||
}
|
||||
|
||||
type operator*() { return m_value.c_str(); }
|
||||
|
||||
std::string m_value;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct to_varg<std::string>
|
||||
{
|
||||
using type = const char *;
|
||||
|
||||
to_varg(const std::string &v)
|
||||
: m_value(v)
|
||||
{
|
||||
}
|
||||
|
||||
type operator*() { return m_value.c_str(); }
|
||||
|
||||
std::string m_value;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
template <typename... Args>
|
||||
class format_plus_arg
|
||||
{
|
||||
public:
|
||||
using args_vector_type = std::tuple<detail::to_varg<Args>...>;
|
||||
using vargs_vector_type = std::tuple<typename detail::to_varg<Args>::type...>;
|
||||
|
||||
format_plus_arg(const format_plus_arg &) = delete;
|
||||
format_plus_arg &operator=(const format_plus_arg &) = delete;
|
||||
|
||||
|
||||
format_plus_arg(std::string_view fmt, Args... args)
|
||||
: m_fmt(fmt)
|
||||
, m_args(std::forward<Args>(args)...)
|
||||
{
|
||||
auto ix = std::make_index_sequence<sizeof...(Args)>();
|
||||
copy_vargs(ix);
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
char buffer[1024];
|
||||
std::string::size_type r = std::apply(snprintf, std::tuple_cat(std::make_tuple(buffer, sizeof(buffer), m_fmt.c_str()), m_vargs));
|
||||
return { buffer, r };
|
||||
}
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const format_plus_arg &f)
|
||||
{
|
||||
char buffer[1024];
|
||||
std::string::size_type r = std::apply(snprintf, std::tuple_cat(std::make_tuple(buffer, sizeof(buffer), f.m_fmt.c_str()), f.m_vargs));
|
||||
os.write(buffer, r);
|
||||
return os;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
template <size_t... I>
|
||||
void copy_vargs(std::index_sequence<I...>)
|
||||
{
|
||||
((std::get<I>(m_vargs) = *std::get<I>(m_args)), ...);
|
||||
}
|
||||
|
||||
std::string m_fmt;
|
||||
args_vector_type m_args;
|
||||
vargs_vector_type m_vargs;
|
||||
};
|
||||
|
||||
template <typename... Args>
|
||||
constexpr auto format(std::string_view fmt, Args... args)
|
||||
{
|
||||
return format_plus_arg(fmt, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
/// A streambuf that fills out lines with spaces up until a specified width
|
||||
|
||||
class fill_out_streambuf : public std::streambuf
|
||||
{
|
||||
public:
|
||||
using base_type = std::streambuf;
|
||||
using int_type = base_type::int_type;
|
||||
using char_type = base_type::char_type;
|
||||
using traits_type = base_type::traits_type;
|
||||
|
||||
fill_out_streambuf(std::ostream &os, int width = 80)
|
||||
: m_os(os)
|
||||
, m_upstream(os.rdbuf())
|
||||
, m_width(width)
|
||||
{
|
||||
}
|
||||
|
||||
~fill_out_streambuf()
|
||||
{
|
||||
m_os.rdbuf(m_upstream);
|
||||
}
|
||||
|
||||
virtual int_type
|
||||
overflow(int_type ic = traits_type::eof())
|
||||
{
|
||||
char ch = traits_type::to_char_type(ic);
|
||||
|
||||
int_type result = ic;
|
||||
|
||||
if (ch == '\n')
|
||||
{
|
||||
for (int i = m_column_count; result != traits_type::eof() and i < m_width; ++i)
|
||||
result = m_upstream->sputc(' ');
|
||||
}
|
||||
|
||||
if (result != traits_type::eof())
|
||||
result = m_upstream->sputc(ch);
|
||||
|
||||
if (result != traits_type::eof())
|
||||
{
|
||||
if (ch == '\n')
|
||||
{
|
||||
m_column_count = 0;
|
||||
++m_line_count;
|
||||
}
|
||||
else
|
||||
++m_column_count;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::streambuf *get_upstream() const { return m_upstream; }
|
||||
|
||||
int get_line_count() const { return m_line_count; }
|
||||
|
||||
private:
|
||||
std::ostream &m_os;
|
||||
std::streambuf *m_upstream;
|
||||
int m_width;
|
||||
int m_line_count = 0;
|
||||
int m_column_count = 0;
|
||||
};
|
||||
|
||||
} // namespace pdbx
|
||||
46
include/cif++/forward_decl.hpp
Normal file
46
include/cif++/forward_decl.hpp
Normal file
@@ -0,0 +1,46 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
class category;
|
||||
class datablock;
|
||||
class file;
|
||||
class parser;
|
||||
|
||||
class row;
|
||||
class row_handle;
|
||||
|
||||
class item;
|
||||
class item_handle;
|
||||
|
||||
} // namespace cif
|
||||
566
include/cif++/item.hpp
Normal file
566
include/cif++/item.hpp
Normal file
@@ -0,0 +1,566 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <charconv>
|
||||
#include <cstring>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
||||
#include <cif++/forward_decl.hpp>
|
||||
#include <cif++/text.hpp>
|
||||
|
||||
/// \file item.hpp
|
||||
/// This file contains the declaration of item but also the item_value and item_handle
|
||||
/// These handle the storage of and access to the data for a single data field.
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
extern int VERBOSE;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
/// \brief item is a transient class that is used to pass data into rows
|
||||
/// but it also takes care of formatting data.
|
||||
class item
|
||||
{
|
||||
public:
|
||||
/// \brief Default constructor, empty item
|
||||
item() = default;
|
||||
|
||||
/// \brief constructor for an item with name \a name and as
|
||||
/// content a single character string with content \a value
|
||||
item(std::string_view name, char value)
|
||||
: m_name(name)
|
||||
, m_value({ value })
|
||||
{
|
||||
}
|
||||
|
||||
/// \brief constructor for an item with name \a name and as
|
||||
/// content a the formatted floating point value \a value with
|
||||
/// precision \a precision
|
||||
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
|
||||
item(std::string_view name, const T &value, int precision)
|
||||
: m_name(name)
|
||||
{
|
||||
using namespace std;
|
||||
using namespace cif;
|
||||
|
||||
char buffer[32];
|
||||
|
||||
auto r = to_chars(buffer, buffer + sizeof(buffer) - 1, value, chars_format::fixed, precision);
|
||||
if (r.ec != std::errc())
|
||||
throw std::runtime_error("Could not format number");
|
||||
|
||||
assert(r.ptr >= buffer and r.ptr < buffer + sizeof(buffer));
|
||||
*r.ptr = 0;
|
||||
m_value.assign(buffer, r.ptr - buffer);
|
||||
}
|
||||
|
||||
/// \brief constructor for an item with name \a name and as
|
||||
/// content a formatted floating point value \a value with
|
||||
/// so-called general formatting
|
||||
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
|
||||
item(const std::string_view name, const T &value)
|
||||
: m_name(name)
|
||||
{
|
||||
using namespace std;
|
||||
using namespace cif;
|
||||
|
||||
char buffer[32];
|
||||
|
||||
auto r = to_chars(buffer, buffer + sizeof(buffer) - 1, value, chars_format::general);
|
||||
if (r.ec != std::errc())
|
||||
throw std::runtime_error("Could not format number");
|
||||
|
||||
assert(r.ptr >= buffer and r.ptr < buffer + sizeof(buffer));
|
||||
*r.ptr = 0;
|
||||
m_value.assign(buffer, r.ptr - buffer);
|
||||
}
|
||||
|
||||
/// \brief constructor for an item with name \a name and as
|
||||
/// content a the formatted integral value \a value
|
||||
template <typename T, std::enable_if_t<std::is_integral_v<T> and not std::is_same_v<T,bool>, int> = 0>
|
||||
item(const std::string_view name, const T &value)
|
||||
: m_name(name)
|
||||
{
|
||||
char buffer[32];
|
||||
|
||||
auto r = std::to_chars(buffer, buffer + sizeof(buffer) - 1, value);
|
||||
if (r.ec != std::errc())
|
||||
throw std::runtime_error("Could not format number");
|
||||
|
||||
assert(r.ptr >= buffer and r.ptr < buffer + sizeof(buffer));
|
||||
*r.ptr = 0;
|
||||
m_value.assign(buffer, r.ptr - buffer);
|
||||
}
|
||||
|
||||
/// \brief constructor for an item with name \a name and as
|
||||
/// content a the formatted boolean value \a value
|
||||
template <typename T, std::enable_if_t<std::is_same_v<T,bool>, int> = 0>
|
||||
item(const std::string_view name, const T &value)
|
||||
: m_name(name)
|
||||
{
|
||||
m_value.assign(value ? "y" : "n");
|
||||
}
|
||||
|
||||
/// \brief constructor for an item with name \a name and as
|
||||
/// content value \a value
|
||||
item(const std::string_view name, const std::string_view value)
|
||||
: m_name(name)
|
||||
, m_value(value)
|
||||
{
|
||||
}
|
||||
|
||||
item(const item &rhs) = default;
|
||||
|
||||
item(item &&rhs) noexcept = default;
|
||||
|
||||
item &operator=(const item &rhs) = default;
|
||||
|
||||
item &operator=(item &&rhs) noexcept = default;
|
||||
|
||||
std::string_view name() const { return m_name; }
|
||||
std::string_view value() const { return m_value; }
|
||||
|
||||
/// \brief replace the content of the stored value with \a v
|
||||
void value(std::string_view v) { m_value = v; }
|
||||
|
||||
/// \brief empty means either null or unknown
|
||||
bool empty() const { return m_value.empty(); }
|
||||
|
||||
/// \brief returns true if the field contains '.'
|
||||
bool is_null() const { return m_value == "."; }
|
||||
|
||||
/// \brief returns true if the field contains '?'
|
||||
bool is_unknown() const { return m_value == "?"; }
|
||||
|
||||
/// \brief the length of the value string
|
||||
size_t length() const { return m_value.length(); }
|
||||
|
||||
/// \brief support for structured binding
|
||||
template<size_t N>
|
||||
decltype(auto) get() const
|
||||
{
|
||||
if constexpr (N == 0) return name();
|
||||
else if constexpr (N == 1) return value();
|
||||
}
|
||||
|
||||
private:
|
||||
std::string_view m_name;
|
||||
std::string m_value;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
/// \brief the internal storage for items in a category
|
||||
///
|
||||
/// Internal storage, strictly forward linked list with minimal space
|
||||
/// requirements. Strings of size 7 or shorter are stored internally.
|
||||
/// Typically, more than 99% of the strings in an mmCIF file are less
|
||||
/// than 8 bytes in length.
|
||||
|
||||
struct item_value
|
||||
{
|
||||
item_value() = default;
|
||||
|
||||
/// \brief constructor
|
||||
item_value(std::string_view text)
|
||||
: m_length(text.length())
|
||||
{
|
||||
if (m_length >= kBufferSize)
|
||||
{
|
||||
m_data = new char[m_length + 1];
|
||||
std::copy(text.begin(), text.end(), m_data);
|
||||
m_data[m_length] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::copy(text.begin(), text.end(), m_local_data);
|
||||
m_local_data[m_length] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
item_value(item_value &&rhs)
|
||||
: m_length(std::exchange(rhs.m_length, 0))
|
||||
, m_data(std::exchange(rhs.m_data, nullptr))
|
||||
{
|
||||
}
|
||||
|
||||
item_value &operator=(item_value &&rhs)
|
||||
{
|
||||
if (this != &rhs)
|
||||
{
|
||||
m_length = std::exchange(rhs.m_length, m_length);
|
||||
m_data = std::exchange(rhs.m_data, m_data);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
~item_value()
|
||||
{
|
||||
if (m_length >= kBufferSize)
|
||||
delete[] m_data;
|
||||
m_data = nullptr;
|
||||
m_length = 0;
|
||||
}
|
||||
|
||||
item_value(const item_value &) = delete;
|
||||
item_value &operator=(const item_value &) = delete;
|
||||
|
||||
explicit operator bool() const
|
||||
{
|
||||
return m_length != 0;
|
||||
}
|
||||
|
||||
size_t m_length = 0;
|
||||
union
|
||||
{
|
||||
char m_local_data[8];
|
||||
char *m_data;
|
||||
};
|
||||
|
||||
static constexpr size_t kBufferSize = sizeof(m_local_data);
|
||||
|
||||
// By using std::string_view instead of c_str we obain a
|
||||
// nice performance gain since we avoid many calls to strlen.
|
||||
constexpr inline std::string_view text() const
|
||||
{
|
||||
return { m_length >= kBufferSize ? m_data : m_local_data, m_length };
|
||||
}
|
||||
};
|
||||
|
||||
// static_assert(sizeof(item_value) == 24, "sizeof(item_value) should be 24 bytes");
|
||||
static_assert(sizeof(item_value) == 16, "sizeof(item_value) should be 16 bytes");
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Transient object to access stored data
|
||||
|
||||
/// \brief This is item_handle, it is used to access the data stored in item_value.
|
||||
|
||||
struct item_handle
|
||||
{
|
||||
public:
|
||||
// conversion helper class
|
||||
template <typename T, typename = void>
|
||||
struct item_value_as;
|
||||
|
||||
template <typename T>
|
||||
item_handle &operator=(const T &value)
|
||||
{
|
||||
item v{ "", value };
|
||||
assign_value(v);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename... Ts>
|
||||
void os(const Ts &...v)
|
||||
{
|
||||
std::ostringstream ss;
|
||||
((ss << v), ...);
|
||||
this->operator=(ss.str());
|
||||
}
|
||||
|
||||
void swap(item_handle &b);
|
||||
|
||||
template <typename T = std::string>
|
||||
auto as() const -> T
|
||||
{
|
||||
using value_type = std::remove_cv_t<std::remove_reference_t<T>>;
|
||||
return item_value_as<value_type>::convert(*this);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
auto value_or(const T &dv) const
|
||||
{
|
||||
return empty() ? dv : this->as<T>();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int compare(const T &value, bool icase = true) const
|
||||
{
|
||||
return item_value_as<T>::compare(*this, value, icase);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool operator==(const T &value) const
|
||||
{
|
||||
// TODO: icase or not icase?
|
||||
return item_value_as<T>::compare(*this, value, true) == 0;
|
||||
}
|
||||
|
||||
// We may not have C++20 yet...
|
||||
template <typename T>
|
||||
bool operator!=(const T &value) const
|
||||
{
|
||||
return not operator==(value);
|
||||
}
|
||||
|
||||
// empty means either null or unknown
|
||||
bool empty() const
|
||||
{
|
||||
auto txt = text();
|
||||
return txt.empty() or (txt.length() == 1 and (txt.front() == '.' or txt.front() == '?'));
|
||||
}
|
||||
|
||||
explicit operator bool() const { return not empty(); }
|
||||
|
||||
// is_null means the field contains '.'
|
||||
bool is_null() const
|
||||
{
|
||||
auto txt = text();
|
||||
return txt.length() == 1 and txt.front() == '.';
|
||||
}
|
||||
|
||||
// is_unknown means the field contains '?'
|
||||
bool is_unknown() const
|
||||
{
|
||||
auto txt = text();
|
||||
return txt.length() == 1 and txt.front() == '?';
|
||||
}
|
||||
|
||||
std::string_view text() const;
|
||||
|
||||
item_handle(uint16_t column, row_handle &row)
|
||||
: m_column(column)
|
||||
, m_row_handle(row)
|
||||
{
|
||||
}
|
||||
|
||||
static const item_handle s_null_item;
|
||||
|
||||
friend void swap(item_handle a, item_handle b)
|
||||
{
|
||||
a.swap(b);
|
||||
}
|
||||
|
||||
private:
|
||||
item_handle();
|
||||
|
||||
uint16_t m_column;
|
||||
row_handle &m_row_handle;
|
||||
|
||||
void assign_value(const item &value);
|
||||
};
|
||||
|
||||
// So sad that older gcc implementations of from_chars did not support floats yet...
|
||||
|
||||
template <typename T>
|
||||
struct item_handle::item_value_as<T, std::enable_if_t<std::is_arithmetic_v<T> and not std::is_same_v<T, bool>>>
|
||||
{
|
||||
using value_type = std::remove_reference_t<std::remove_cv_t<T>>;
|
||||
|
||||
static value_type convert(const item_handle &ref)
|
||||
{
|
||||
value_type result = {};
|
||||
|
||||
if (not ref.empty())
|
||||
{
|
||||
auto txt = ref.text();
|
||||
|
||||
std::from_chars_result r = selected_charconv<value_type>::from_chars(txt.data(), txt.data() + txt.size(), result);
|
||||
|
||||
if (r.ec != std::errc())
|
||||
{
|
||||
result = {};
|
||||
if (cif::VERBOSE)
|
||||
{
|
||||
if (r.ec == std::errc::invalid_argument)
|
||||
std::cerr << "Attempt to convert " << std::quoted(txt) << " into a number" << std::endl;
|
||||
else if (r.ec == std::errc::result_out_of_range)
|
||||
std::cerr << "Conversion of " << std::quoted(txt) << " into a type that is too small" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static int compare(const item_handle &ref, const T &value, bool icase)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
auto txt = ref.text();
|
||||
|
||||
if (txt.empty())
|
||||
result = 1;
|
||||
else
|
||||
{
|
||||
value_type v = {};
|
||||
|
||||
std::from_chars_result r = selected_charconv<value_type>::from_chars(txt.data(), txt.data() + txt.size(), v);
|
||||
|
||||
if (r.ec != std::errc())
|
||||
{
|
||||
if (cif::VERBOSE)
|
||||
{
|
||||
if (r.ec == std::errc::invalid_argument)
|
||||
std::cerr << "Attempt to convert " << std::quoted(txt) << " into a number" << std::endl;
|
||||
else if (r.ec == std::errc::result_out_of_range)
|
||||
std::cerr << "Conversion of " << std::quoted(txt) << " into a type that is too small" << std::endl;
|
||||
}
|
||||
result = 1;
|
||||
}
|
||||
else if (v < value)
|
||||
result = -1;
|
||||
else if (v > value)
|
||||
result = 1;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct item_handle::item_value_as<std::optional<T>>
|
||||
{
|
||||
static std::optional<T> convert(const item_handle &ref)
|
||||
{
|
||||
std::optional<T> result;
|
||||
if (ref)
|
||||
result = ref.as<T>();
|
||||
return result;
|
||||
}
|
||||
|
||||
static int compare(const item_handle &ref, std::optional<T> value, bool icase)
|
||||
{
|
||||
if (ref.empty() and not value)
|
||||
return 0;
|
||||
|
||||
if (ref.empty())
|
||||
return -1;
|
||||
else if (not value)
|
||||
return 1;
|
||||
else
|
||||
return ref.compare(*value, icase);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct item_handle::item_value_as<T, std::enable_if_t<std::is_same_v<T, bool>>>
|
||||
{
|
||||
static bool convert(const item_handle &ref)
|
||||
{
|
||||
bool result = false;
|
||||
if (not ref.empty())
|
||||
result = iequals(ref.text(), "y");
|
||||
return result;
|
||||
}
|
||||
|
||||
static int compare(const item_handle &ref, bool value, bool icase)
|
||||
{
|
||||
bool rv = convert(ref);
|
||||
return value && rv ? 0
|
||||
: (rv < value ? -1 : 1);
|
||||
}
|
||||
};
|
||||
|
||||
template <size_t N>
|
||||
struct item_handle::item_value_as<char[N]>
|
||||
{
|
||||
static std::string convert(const item_handle &ref)
|
||||
{
|
||||
if (ref.empty())
|
||||
return {};
|
||||
return { ref.text().data(), ref.text().size() };
|
||||
}
|
||||
|
||||
static int compare(const item_handle &ref, const char (&value)[N], bool icase)
|
||||
{
|
||||
return icase ? cif::icompare(ref.text(), value) : ref.text().compare(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct item_handle::item_value_as<T, std::enable_if_t<std::is_same_v<T, const char *>>>
|
||||
{
|
||||
static std::string convert(const item_handle &ref)
|
||||
{
|
||||
if (ref.empty())
|
||||
return {};
|
||||
return { ref.text().data(), ref.text().size() };
|
||||
}
|
||||
|
||||
static int compare(const item_handle &ref, const char *value, bool icase)
|
||||
{
|
||||
return icase ? cif::icompare(ref.text(), value) : ref.text().compare(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct item_handle::item_value_as<T, std::enable_if_t<std::is_same_v<T, std::string_view>>>
|
||||
{
|
||||
static std::string convert(const item_handle &ref)
|
||||
{
|
||||
if (ref.empty())
|
||||
return {};
|
||||
return { ref.text().data(), ref.text().size() };
|
||||
}
|
||||
|
||||
static int compare(const item_handle &ref, const std::string_view &value, bool icase)
|
||||
{
|
||||
return icase ? cif::icompare(ref.text(), value) : ref.text().compare(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct item_handle::item_value_as<T, std::enable_if_t<std::is_same_v<T, std::string>>>
|
||||
{
|
||||
static std::string convert(const item_handle &ref)
|
||||
{
|
||||
if (ref.empty())
|
||||
return {};
|
||||
return { ref.text().data(), ref.text().size() };
|
||||
}
|
||||
|
||||
static int compare(const item_handle &ref, const std::string &value, bool icase)
|
||||
{
|
||||
return icase ? cif::icompare(ref.text(), value) : ref.text().compare(value);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace cif
|
||||
|
||||
namespace std
|
||||
{
|
||||
|
||||
template<> struct tuple_size<::cif::item>
|
||||
: public std::integral_constant<std::size_t, 2> {};
|
||||
|
||||
template<> struct tuple_element<0, ::cif::item>
|
||||
{
|
||||
using type = decltype(std::declval<::cif::item>().name());
|
||||
};
|
||||
|
||||
template<> struct tuple_element<1, ::cif::item>
|
||||
{
|
||||
using type = decltype(std::declval<::cif::item>().value());
|
||||
};
|
||||
|
||||
}
|
||||
676
include/cif++/iterator.hpp
Normal file
676
include/cif++/iterator.hpp
Normal file
@@ -0,0 +1,676 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++/row.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
class iterator_impl
|
||||
{
|
||||
public:
|
||||
template <typename, typename...>
|
||||
friend class iterator_impl;
|
||||
|
||||
friend class category;
|
||||
|
||||
static constexpr size_t N = sizeof...(Ts);
|
||||
|
||||
using category_type = std::remove_cv_t<Category>;
|
||||
using row_type = std::conditional_t<std::is_const_v<Category>, const row, row>;
|
||||
|
||||
using tuple_type = std::tuple<Ts...>;
|
||||
|
||||
using iterator_category = std::forward_iterator_tag;
|
||||
using value_type = tuple_type;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = value_type *;
|
||||
using reference = value_type &;
|
||||
|
||||
iterator_impl() = default;
|
||||
|
||||
iterator_impl(const iterator_impl &rhs) = default;
|
||||
|
||||
template <typename C2, typename... T2s>
|
||||
iterator_impl(const iterator_impl<C2, T2s...> &rhs)
|
||||
: m_category(rhs.m_category)
|
||||
, m_current(rhs.m_current)
|
||||
, m_value(rhs.m_value)
|
||||
, m_column_ix(rhs.m_column_ix)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename IRowType>
|
||||
iterator_impl(iterator_impl<IRowType, Ts...> &rhs)
|
||||
: m_category(rhs.m_category)
|
||||
, m_current(const_cast<row_type *>(rhs.m_current))
|
||||
, m_value(rhs.m_value)
|
||||
, m_column_ix(rhs.m_column_ix)
|
||||
{
|
||||
m_value = get(std::make_index_sequence<N>());
|
||||
}
|
||||
|
||||
template <typename IRowType>
|
||||
iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<size_t, N> &cix)
|
||||
: m_category(rhs.m_category)
|
||||
, m_current(rhs.m_current)
|
||||
, m_column_ix(cix)
|
||||
{
|
||||
m_value = get(std::make_index_sequence<N>());
|
||||
}
|
||||
|
||||
iterator_impl &operator=(const iterator_impl &i)
|
||||
{
|
||||
m_category = i.m_category;
|
||||
m_current = i.m_current;
|
||||
m_column_ix = i.m_column_ix;
|
||||
m_value = i.m_value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
virtual ~iterator_impl() = default;
|
||||
|
||||
reference operator*()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
|
||||
pointer operator->()
|
||||
{
|
||||
return &m_value;
|
||||
}
|
||||
|
||||
operator const row_handle() const
|
||||
{
|
||||
return { *m_category, *m_current };
|
||||
}
|
||||
|
||||
operator row_handle()
|
||||
{
|
||||
return { *m_category, *m_current };
|
||||
}
|
||||
|
||||
iterator_impl &operator++()
|
||||
{
|
||||
if (m_current != nullptr)
|
||||
m_current = m_current->m_next;
|
||||
|
||||
m_value = get(std::make_index_sequence<N>());
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
iterator_impl operator++(int)
|
||||
{
|
||||
iterator_impl result(*this);
|
||||
this->operator++();
|
||||
return result;
|
||||
}
|
||||
|
||||
bool operator==(const iterator_impl &rhs) const { return m_current == rhs.m_current; }
|
||||
bool operator!=(const iterator_impl &rhs) const { return m_current != rhs.m_current; }
|
||||
|
||||
template <typename IRowType, typename... ITs>
|
||||
bool operator==(const iterator_impl<IRowType, ITs...> &rhs) const
|
||||
{
|
||||
return m_current == rhs.m_current;
|
||||
}
|
||||
|
||||
template <typename IRowType, typename... ITs>
|
||||
bool operator!=(const iterator_impl<IRowType, ITs...> &rhs) const
|
||||
{
|
||||
return m_current != rhs.m_current;
|
||||
}
|
||||
|
||||
private:
|
||||
template <std::size_t... Is>
|
||||
tuple_type get(std::index_sequence<Is...>) const
|
||||
{
|
||||
if (m_current != nullptr)
|
||||
{
|
||||
row_handle rh{*m_category, *m_current};
|
||||
return tuple_type{rh[m_column_ix[Is]].template as<Ts>()...};
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
category_type *m_category = nullptr;
|
||||
row_type *m_current = nullptr;
|
||||
value_type m_value;
|
||||
std::array<size_t, N> m_column_ix;
|
||||
};
|
||||
|
||||
template<typename Category>
|
||||
class iterator_impl<Category>
|
||||
{
|
||||
public:
|
||||
template <typename, typename...>
|
||||
friend class iterator_impl;
|
||||
|
||||
friend class category;
|
||||
using category_type = std::remove_cv_t<Category>;
|
||||
using row_type = std::conditional_t<std::is_const_v<Category>, const row, row>;
|
||||
|
||||
using iterator_category = std::forward_iterator_tag;
|
||||
using value_type = row_handle;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = row_handle;
|
||||
using reference = row_handle;
|
||||
|
||||
iterator_impl() = default;
|
||||
|
||||
iterator_impl(const iterator_impl &rhs) = default;
|
||||
|
||||
template <typename C2>
|
||||
iterator_impl(const iterator_impl<C2> &rhs)
|
||||
: m_category(rhs.m_category)
|
||||
, m_current(const_cast<row_type*>(rhs.m_current))
|
||||
{
|
||||
}
|
||||
|
||||
iterator_impl(Category &cat, row *current)
|
||||
: m_category(const_cast<category_type *>(&cat))
|
||||
, m_current(current)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename IRowType>
|
||||
iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<size_t, 0> &cix)
|
||||
: m_category(rhs.m_category)
|
||||
, m_current(rhs.m_current)
|
||||
{
|
||||
}
|
||||
|
||||
iterator_impl &operator=(const iterator_impl &i)
|
||||
{
|
||||
m_category = i.m_category;
|
||||
m_current = i.m_current;
|
||||
return *this;
|
||||
}
|
||||
|
||||
virtual ~iterator_impl() = default;
|
||||
|
||||
reference operator*()
|
||||
{
|
||||
return {*m_category, *m_current};
|
||||
}
|
||||
|
||||
pointer operator->()
|
||||
{
|
||||
return &m_current;
|
||||
}
|
||||
|
||||
operator const row_handle() const
|
||||
{
|
||||
return { *m_category, *m_current };
|
||||
}
|
||||
|
||||
operator row_handle()
|
||||
{
|
||||
return { *m_category, *m_current };
|
||||
}
|
||||
|
||||
iterator_impl &operator++()
|
||||
{
|
||||
if (m_current != nullptr)
|
||||
m_current = m_current->m_next;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
iterator_impl operator++(int)
|
||||
{
|
||||
iterator_impl result(*this);
|
||||
this->operator++();
|
||||
return result;
|
||||
}
|
||||
|
||||
bool operator==(const iterator_impl &rhs) const { return m_current == rhs.m_current; }
|
||||
bool operator!=(const iterator_impl &rhs) const { return m_current != rhs.m_current; }
|
||||
|
||||
template <typename IRowType, typename... ITs>
|
||||
bool operator==(const iterator_impl<IRowType, ITs...> &rhs) const
|
||||
{
|
||||
return m_current == rhs.m_current;
|
||||
}
|
||||
|
||||
template <typename IRowType, typename... ITs>
|
||||
bool operator!=(const iterator_impl<IRowType, ITs...> &rhs) const
|
||||
{
|
||||
return m_current != rhs.m_current;
|
||||
}
|
||||
|
||||
private:
|
||||
category_type *m_category = nullptr;
|
||||
row_type *m_current = nullptr;
|
||||
};
|
||||
|
||||
|
||||
template<typename Category, typename T>
|
||||
class iterator_impl<Category, T>
|
||||
{
|
||||
public:
|
||||
template <typename, typename...>
|
||||
friend class iterator_impl;
|
||||
|
||||
friend class category;
|
||||
|
||||
using category_type = std::remove_cv_t<Category>;
|
||||
using row_type = std::conditional_t<std::is_const_v<Category>, const row, row>;
|
||||
|
||||
using iterator_category = std::forward_iterator_tag;
|
||||
using value_type = T;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = value_type *;
|
||||
using reference = value_type &;
|
||||
|
||||
iterator_impl() = default;
|
||||
|
||||
iterator_impl(const iterator_impl &rhs) = default;
|
||||
|
||||
template <typename C2, typename T2>
|
||||
iterator_impl(const iterator_impl<C2, T2> &rhs)
|
||||
: m_category(rhs.m_category)
|
||||
, m_current(rhs.m_current)
|
||||
, m_value(rhs.m_value)
|
||||
, m_column_ix(rhs.m_column_ix)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename IRowType>
|
||||
iterator_impl(iterator_impl<IRowType, T> &rhs)
|
||||
: m_category(rhs.m_category)
|
||||
, m_current(const_cast<row_type *>(rhs.m_current))
|
||||
, m_value(rhs.m_value)
|
||||
, m_column_ix(rhs.m_column_ix)
|
||||
{
|
||||
m_value = get(m_current);
|
||||
}
|
||||
|
||||
template <typename IRowType>
|
||||
iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<size_t, 1> &cix)
|
||||
: m_category(rhs.m_category)
|
||||
, m_current(rhs.m_current)
|
||||
, m_column_ix(cix[0])
|
||||
{
|
||||
m_value = get();
|
||||
}
|
||||
|
||||
iterator_impl &operator=(const iterator_impl &i)
|
||||
{
|
||||
m_category = i.m_category;
|
||||
m_current = i.m_current;
|
||||
m_column_ix = i.m_column_ix;
|
||||
m_value = i.m_value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
virtual ~iterator_impl() = default;
|
||||
|
||||
reference operator*()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
|
||||
pointer operator->()
|
||||
{
|
||||
return &m_value;
|
||||
}
|
||||
|
||||
operator const row_handle() const
|
||||
{
|
||||
return { *m_category, *m_current };
|
||||
}
|
||||
|
||||
operator row_handle()
|
||||
{
|
||||
return { *m_category, *m_current };
|
||||
}
|
||||
|
||||
iterator_impl &operator++()
|
||||
{
|
||||
if (m_current != nullptr)
|
||||
m_current = m_current->m_next;
|
||||
|
||||
m_value = get();
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
iterator_impl operator++(int)
|
||||
{
|
||||
iterator_impl result(*this);
|
||||
this->operator++();
|
||||
return result;
|
||||
}
|
||||
|
||||
bool operator==(const iterator_impl &rhs) const { return m_current == rhs.m_current; }
|
||||
bool operator!=(const iterator_impl &rhs) const { return m_current != rhs.m_current; }
|
||||
|
||||
template <typename IRowType, typename... ITs>
|
||||
bool operator==(const iterator_impl<IRowType, ITs...> &rhs) const
|
||||
{
|
||||
return m_current == rhs.m_current;
|
||||
}
|
||||
|
||||
template <typename IRowType, typename... ITs>
|
||||
bool operator!=(const iterator_impl<IRowType, ITs...> &rhs) const
|
||||
{
|
||||
return m_current != rhs.m_current;
|
||||
}
|
||||
|
||||
private:
|
||||
value_type get() const
|
||||
{
|
||||
if (m_current != nullptr)
|
||||
{
|
||||
row_handle rh{*m_category, *m_current};
|
||||
return rh[m_column_ix].template as<T>();
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
category_type *m_category = nullptr;
|
||||
row_type *m_current = nullptr;
|
||||
value_type m_value;
|
||||
size_t m_column_ix;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// iterator proxy
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
class iterator_proxy
|
||||
{
|
||||
public:
|
||||
static constexpr const size_t N = sizeof...(Ts);
|
||||
|
||||
using category_type = Category;
|
||||
using row_type = std::conditional_t<std::is_const_v<category_type>, const row, row>;
|
||||
|
||||
using iterator = iterator_impl<category_type, Ts...>;
|
||||
using row_iterator = iterator_impl<category_type>;
|
||||
|
||||
iterator_proxy(category_type &cat, row_iterator pos, char const *const columns[N]);
|
||||
iterator_proxy(category_type &cat, row_iterator pos, std::initializer_list<char const *> columns);
|
||||
|
||||
iterator_proxy(iterator_proxy &&p);
|
||||
iterator_proxy &operator=(iterator_proxy &&p);
|
||||
|
||||
iterator_proxy(const iterator_proxy &) = delete;
|
||||
iterator_proxy &operator=(const iterator_proxy &) = delete;
|
||||
|
||||
iterator begin() const { return iterator(m_begin, m_column_ix); }
|
||||
iterator end() const { return iterator(m_end, m_column_ix); }
|
||||
|
||||
bool empty() const { return m_begin == m_end; }
|
||||
|
||||
explicit operator bool() const { return not empty(); }
|
||||
|
||||
size_t size() const { return std::distance(begin(), end()); }
|
||||
|
||||
// row front() { return *begin(); }
|
||||
// row back() { return *(std::prev(end())); }
|
||||
|
||||
category_type &category() const { return *m_category; }
|
||||
|
||||
void swap(iterator_proxy &rhs)
|
||||
{
|
||||
std::swap(m_category, rhs.m_category);
|
||||
std::swap(m_begin, rhs.m_begin);
|
||||
std::swap(m_end, rhs.m_end);
|
||||
std::swap(m_column_ix, rhs.m_column_ix);
|
||||
}
|
||||
|
||||
private:
|
||||
category_type *m_category;
|
||||
row_iterator m_begin, m_end;
|
||||
std::array<size_t, N> m_column_ix;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// conditional iterator proxy
|
||||
|
||||
template <typename CategoryType, typename... Ts>
|
||||
class conditional_iterator_proxy
|
||||
{
|
||||
public:
|
||||
static constexpr const size_t N = sizeof...(Ts);
|
||||
|
||||
using category_type = std::remove_cv_t<CategoryType>;
|
||||
|
||||
using base_iterator = iterator_impl<CategoryType, Ts...>;
|
||||
using value_type = typename base_iterator::value_type;
|
||||
using row_type = typename base_iterator::row_type;
|
||||
using row_iterator = iterator_impl<CategoryType>;
|
||||
|
||||
class conditional_iterator_impl
|
||||
{
|
||||
public:
|
||||
using iterator_category = std::forward_iterator_tag;
|
||||
using value_type = conditional_iterator_proxy::value_type;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = value_type *;
|
||||
using reference = value_type;
|
||||
|
||||
conditional_iterator_impl(CategoryType &cat, row_iterator pos, const condition &cond, const std::array<size_t, N> &cix);
|
||||
conditional_iterator_impl(const conditional_iterator_impl &i) = default;
|
||||
conditional_iterator_impl &operator=(const conditional_iterator_impl &i) = default;
|
||||
|
||||
virtual ~conditional_iterator_impl() = default;
|
||||
|
||||
reference operator*()
|
||||
{
|
||||
return *mBegin;
|
||||
}
|
||||
|
||||
pointer operator->()
|
||||
{
|
||||
return &*mBegin;
|
||||
}
|
||||
|
||||
conditional_iterator_impl &operator++()
|
||||
{
|
||||
while (mBegin != mEnd)
|
||||
{
|
||||
if (++mBegin == mEnd)
|
||||
break;
|
||||
|
||||
if (m_condition->operator()(mBegin))
|
||||
break;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
conditional_iterator_impl operator++(int)
|
||||
{
|
||||
conditional_iterator_impl result(*this);
|
||||
this->operator++();
|
||||
return result;
|
||||
}
|
||||
|
||||
bool operator==(const conditional_iterator_impl &rhs) const { return mBegin == rhs.mBegin; }
|
||||
bool operator!=(const conditional_iterator_impl &rhs) const { return mBegin != rhs.mBegin; }
|
||||
|
||||
template <typename IRowType, typename... ITs>
|
||||
bool operator==(const iterator_impl<IRowType, ITs...> &rhs) const { return mBegin == rhs; }
|
||||
|
||||
template <typename IRowType, typename... ITs>
|
||||
bool operator!=(const iterator_impl<IRowType, ITs...> &rhs) const { return mBegin != rhs; }
|
||||
|
||||
private:
|
||||
CategoryType *mCat;
|
||||
base_iterator mBegin, mEnd;
|
||||
const condition *m_condition;
|
||||
};
|
||||
|
||||
using iterator = conditional_iterator_impl;
|
||||
using reference = typename iterator::reference;
|
||||
|
||||
template <typename... Ns>
|
||||
conditional_iterator_proxy(CategoryType &cat, row_iterator pos, condition &&cond, Ns... names);
|
||||
|
||||
conditional_iterator_proxy(conditional_iterator_proxy &&p);
|
||||
conditional_iterator_proxy &operator=(conditional_iterator_proxy &&p);
|
||||
|
||||
conditional_iterator_proxy(const conditional_iterator_proxy &) = delete;
|
||||
conditional_iterator_proxy &operator=(const conditional_iterator_proxy &) = delete;
|
||||
|
||||
iterator begin() const;
|
||||
iterator end() const;
|
||||
|
||||
bool empty() const;
|
||||
|
||||
explicit operator bool() const { return not empty(); }
|
||||
|
||||
size_t size() const { return std::distance(begin(), end()); }
|
||||
|
||||
row_handle front() { return *begin(); }
|
||||
// row_handle back() { return *begin(); }
|
||||
|
||||
CategoryType &category() const { return *m_cat; }
|
||||
|
||||
void swap(conditional_iterator_proxy &rhs);
|
||||
|
||||
private:
|
||||
CategoryType *m_cat;
|
||||
condition m_condition;
|
||||
row_iterator mCBegin, mCEnd;
|
||||
std::array<size_t, N> mCix;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
iterator_proxy<Category, Ts...>::iterator_proxy(Category &cat, row_iterator pos, char const *const columns[N])
|
||||
: m_category(&cat)
|
||||
, m_begin(pos)
|
||||
, m_end(cat.end())
|
||||
{
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
m_column_ix[i] = m_category->get_column_ix(columns[i]);
|
||||
}
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
iterator_proxy<Category, Ts...>::iterator_proxy(Category &cat, row_iterator pos, std::initializer_list<char const *> columns)
|
||||
: m_category(&cat)
|
||||
, m_begin(pos)
|
||||
, m_end(cat.end())
|
||||
{
|
||||
// static_assert(columns.size() == N, "The list of column names should be exactly the same as the list of requested columns");
|
||||
|
||||
std::size_t i = 0;
|
||||
for (auto column : columns)
|
||||
m_column_ix[i++] = m_category->get_column_ix(column);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
conditional_iterator_proxy<Category, Ts...>::conditional_iterator_impl::conditional_iterator_impl(
|
||||
Category &cat, row_iterator pos, const condition &cond, const std::array<size_t, N> &cix)
|
||||
: mCat(&cat)
|
||||
, mBegin(pos, cix)
|
||||
, mEnd(cat.end(), cix)
|
||||
, m_condition(&cond)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
conditional_iterator_proxy<Category, Ts...>::conditional_iterator_proxy(conditional_iterator_proxy &&p)
|
||||
: m_cat(nullptr)
|
||||
, mCBegin(p.mCBegin)
|
||||
, mCEnd(p.mCEnd)
|
||||
, mCix(p.mCix)
|
||||
{
|
||||
std::swap(m_cat, p.m_cat);
|
||||
std::swap(mCix, p.mCix);
|
||||
m_condition.swap(p.m_condition);
|
||||
}
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
template <typename... Ns>
|
||||
conditional_iterator_proxy<Category, Ts...>::conditional_iterator_proxy(Category &cat, row_iterator pos, condition &&cond, Ns... names)
|
||||
: m_cat(&cat)
|
||||
, m_condition(std::move(cond))
|
||||
, mCBegin(pos)
|
||||
, mCEnd(cat.end())
|
||||
{
|
||||
static_assert(sizeof...(Ts) == sizeof...(Ns), "Number of column names should be equal to number of requested value types");
|
||||
|
||||
m_condition.prepare(cat);
|
||||
|
||||
while (mCBegin != mCEnd and not m_condition(*mCBegin))
|
||||
++mCBegin;
|
||||
|
||||
size_t i = 0;
|
||||
((mCix[i++] = m_cat->get_column_ix(names)), ...);
|
||||
}
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
conditional_iterator_proxy<Category, Ts...> &conditional_iterator_proxy<Category, Ts...>::operator=(conditional_iterator_proxy &&p)
|
||||
{
|
||||
swap(p);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
typename conditional_iterator_proxy<Category, Ts...>::iterator conditional_iterator_proxy<Category, Ts...>::begin() const
|
||||
{
|
||||
return iterator(*m_cat, mCBegin, m_condition, mCix);
|
||||
}
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
typename conditional_iterator_proxy<Category, Ts...>::iterator conditional_iterator_proxy<Category, Ts...>::end() const
|
||||
{
|
||||
return iterator(*m_cat, mCEnd, m_condition, mCix);
|
||||
}
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
bool conditional_iterator_proxy<Category, Ts...>::empty() const
|
||||
{
|
||||
return mCBegin == mCEnd;
|
||||
}
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
void conditional_iterator_proxy<Category, Ts...>::swap(conditional_iterator_proxy &rhs)
|
||||
{
|
||||
std::swap(m_cat, rhs.m_cat);
|
||||
m_condition.swap(rhs.m_condition);
|
||||
std::swap(mCBegin, rhs.mCBegin);
|
||||
std::swap(mCEnd, rhs.mCEnd);
|
||||
std::swap(mCix, rhs.mCix);
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
79
include/cif++/list.hpp
Normal file
79
include/cif++/list.hpp
Normal file
@@ -0,0 +1,79 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
template<typename Allocator = std::allocator<void>>
|
||||
class list
|
||||
{
|
||||
public:
|
||||
|
||||
|
||||
|
||||
protected:
|
||||
|
||||
struct list_item
|
||||
{
|
||||
list_item *m_next = nullptr;
|
||||
};
|
||||
|
||||
using list_item_allocator_type = typename std::allocator_traits<Alloc>::template rebind_alloc<list_item>;
|
||||
using list_item_allocator_traits = std::allocator_traits<item_allocator_type>;
|
||||
|
||||
list_item_allocator_traits::pointer get_item()
|
||||
{
|
||||
list_item_allocator_type ia(get_allocator());
|
||||
return list_item_allocator_traits::allocate(ia, 1);
|
||||
}
|
||||
|
||||
template<typename ...Arguments>
|
||||
list_item *create_list_item(uint16_t column_ix, Arguments... args)
|
||||
{
|
||||
auto p = this->get_item();
|
||||
list_item_allocator_type ia(get_allocator());
|
||||
list_item_allocator_traits::construct(ia, p, std::forward<Arguments>(args)...);
|
||||
return p;
|
||||
}
|
||||
|
||||
void delete_list_item(list_item *iv)
|
||||
{
|
||||
list_item_allocator_type ia(get_allocator());
|
||||
list_item_allocator_traits::destroy(ia, iv);
|
||||
list_item_allocator_traits::deallocate(ia, iv, 1);
|
||||
}
|
||||
|
||||
list_item *m_head = nullptr, *m_tail = nullptr;
|
||||
};
|
||||
|
||||
|
||||
} // namespace cif
|
||||
922
include/cif++/model.hpp
Normal file
922
include/cif++/model.hpp
Normal file
@@ -0,0 +1,922 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#include <cif++/atom_type.hpp>
|
||||
|
||||
#if __cpp_lib_format
|
||||
#include <format>
|
||||
#endif
|
||||
|
||||
namespace cif::mm
|
||||
{
|
||||
|
||||
class atom;
|
||||
class residue;
|
||||
class monomer;
|
||||
class polymer;
|
||||
class structure;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class atom
|
||||
{
|
||||
private:
|
||||
struct atom_impl : public std::enable_shared_from_this<atom_impl>
|
||||
{
|
||||
atom_impl(datablock &db, std::string_view id)
|
||||
: m_db(db)
|
||||
, m_cat(db["atom_site"])
|
||||
, m_id(id)
|
||||
{
|
||||
auto r = row();
|
||||
if (r)
|
||||
tie(m_location.m_x, m_location.m_y, m_location.m_z) = r.get("Cartn_x", "Cartn_y", "Cartn_z");
|
||||
}
|
||||
|
||||
// constructor for a symmetry copy of an atom
|
||||
atom_impl(const atom_impl &impl, const point &loc, const std::string &sym_op)
|
||||
: atom_impl(impl)
|
||||
{
|
||||
m_location = loc;
|
||||
m_symop = sym_op;
|
||||
}
|
||||
|
||||
atom_impl(const atom_impl &i) = default;
|
||||
|
||||
void prefetch();
|
||||
|
||||
int compare(const atom_impl &b) const;
|
||||
|
||||
// bool getAnisoU(float anisou[6]) const;
|
||||
|
||||
int get_charge() const;
|
||||
|
||||
void moveTo(const point &p);
|
||||
|
||||
// const compound *compound() const;
|
||||
|
||||
std::string get_property(std::string_view name) const;
|
||||
int get_property_int(std::string_view name) const;
|
||||
float get_property_float(std::string_view name) const;
|
||||
|
||||
void set_property(const std::string_view name, const std::string &value);
|
||||
|
||||
row_handle row()
|
||||
{
|
||||
return m_cat[{{"id", m_id}}];
|
||||
}
|
||||
|
||||
const row_handle row() const
|
||||
{
|
||||
return m_cat[{{"id", m_id}}];
|
||||
}
|
||||
|
||||
row_handle row_aniso()
|
||||
{
|
||||
auto cat = m_db.get("atom_site_anisotrop");
|
||||
return cat ? cat->find1(key("id") == m_id) : row_handle{};
|
||||
}
|
||||
|
||||
const row_handle row_aniso() const
|
||||
{
|
||||
auto cat = m_db.get("atom_site_anisotrop");
|
||||
return cat ? cat->find1(key("id") == m_id) : row_handle{};
|
||||
}
|
||||
|
||||
const datablock &m_db;
|
||||
category &m_cat;
|
||||
std::string m_id;
|
||||
point m_location;
|
||||
std::string m_symop = "1_555";
|
||||
};
|
||||
|
||||
public:
|
||||
atom() {}
|
||||
|
||||
atom(std::shared_ptr<atom_impl> impl)
|
||||
: m_impl(impl)
|
||||
{
|
||||
}
|
||||
|
||||
atom(const atom &rhs)
|
||||
: m_impl(rhs.m_impl)
|
||||
{
|
||||
}
|
||||
|
||||
atom(datablock &db, row_handle &row)
|
||||
: atom(std::make_shared<atom_impl>(db, row["id"].as<std::string>()))
|
||||
{
|
||||
}
|
||||
|
||||
// a special constructor to create symmetry copies
|
||||
atom(const atom &rhs, const point &symmmetry_location, const std::string &symmetry_operation)
|
||||
: atom(std::make_shared<atom_impl>(*rhs.m_impl, symmmetry_location, symmetry_operation))
|
||||
{
|
||||
}
|
||||
|
||||
explicit operator bool() const { return (bool)m_impl; }
|
||||
|
||||
// // return a copy of this atom, with data copied instead of referenced
|
||||
// atom clone() const
|
||||
// {
|
||||
// auto copy = std::make_shared<atom_impl>(*m_impl);
|
||||
// copy->mClone = true;
|
||||
// return atom(copy);
|
||||
// }
|
||||
|
||||
atom &operator=(const atom &rhs) = default;
|
||||
|
||||
// template <typename T>
|
||||
// T get_property(const std::string_view name) const;
|
||||
|
||||
std::string get_property(std::string_view name) const
|
||||
{
|
||||
if (not m_impl)
|
||||
throw std::logic_error("Error trying to fetch a property from an uninitialized atom");
|
||||
return m_impl->get_property(name);
|
||||
}
|
||||
|
||||
int get_property_int(std::string_view name) const
|
||||
{
|
||||
if (not m_impl)
|
||||
throw std::logic_error("Error trying to fetch a property from an uninitialized atom");
|
||||
return m_impl->get_property_int(name);
|
||||
}
|
||||
|
||||
float get_property_float(std::string_view name) const
|
||||
{
|
||||
if (not m_impl)
|
||||
throw std::logic_error("Error trying to fetch a property from an uninitialized atom");
|
||||
return m_impl->get_property_float(name);
|
||||
}
|
||||
|
||||
void set_property(const std::string_view name, const std::string &value)
|
||||
{
|
||||
if (not m_impl)
|
||||
throw std::logic_error("Error trying to modify an uninitialized atom");
|
||||
m_impl->set_property(name, value);
|
||||
}
|
||||
|
||||
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
|
||||
void set_property(const std::string_view name, const T &value)
|
||||
{
|
||||
set_property(name, std::to_string(value));
|
||||
}
|
||||
|
||||
const std::string &id() const { return impl().m_id; }
|
||||
|
||||
cif::atom_type get_type() const { return atom_type_traits(get_property("type_symbol")).type(); }
|
||||
|
||||
point get_location() const { return impl().m_location; }
|
||||
void set_location(point p)
|
||||
{
|
||||
if (not m_impl)
|
||||
throw std::logic_error("Error trying to modify an uninitialized atom");
|
||||
m_impl->moveTo(p);
|
||||
}
|
||||
|
||||
/// \brief Translate the position of this atom by \a t
|
||||
void translate(point t)
|
||||
{
|
||||
set_location(get_location() + t);
|
||||
}
|
||||
|
||||
/// \brief Rotate the position of this atom by \a q
|
||||
void rotate(quaternion q)
|
||||
{
|
||||
auto loc = get_location();
|
||||
loc.rotate(q);
|
||||
set_location(loc);
|
||||
}
|
||||
|
||||
/// \brief Translate and rotate the position of this atom by \a t and \a q
|
||||
void translate_and_rotate(point t, quaternion q)
|
||||
{
|
||||
auto loc = get_location();
|
||||
loc += t;
|
||||
loc.rotate(q);
|
||||
set_location(loc);
|
||||
}
|
||||
|
||||
/// \brief Translate, rotate and translate again the coordinates this atom by \a t1 , \a q and \a t2
|
||||
void translate_rotate_and_translate(point t1, quaternion q, point t2)
|
||||
{
|
||||
auto loc = get_location();
|
||||
loc += t1;
|
||||
loc.rotate(q);
|
||||
loc += t2;
|
||||
set_location(loc);
|
||||
}
|
||||
|
||||
// for direct access to underlying data, be careful!
|
||||
const row_handle get_row() const { return impl().row(); }
|
||||
const row_handle get_row_aniso() const { return impl().row_aniso(); }
|
||||
|
||||
bool is_symmetry_copy() const { return impl().m_symop != "1_555"; }
|
||||
std::string symmetry() const { return impl().m_symop; }
|
||||
|
||||
// const compound &compound() const;
|
||||
|
||||
bool is_water() const
|
||||
{
|
||||
auto comp_id = get_label_comp_id();
|
||||
return comp_id == "HOH" or comp_id == "H2O" or comp_id == "WAT";
|
||||
}
|
||||
|
||||
int get_charge() const { return impl().get_charge(); }
|
||||
|
||||
// float uIso() const;
|
||||
// bool getAnisoU(float anisou[6]) const { return impl().getAnisoU(anisou); }
|
||||
|
||||
float get_occupancy() const { return get_property_float("occupancy"); }
|
||||
|
||||
// specifications
|
||||
|
||||
std::string get_label_asym_id() const { return get_property("label_asym_id"); }
|
||||
int get_label_seq_id() const { return get_property_int("label_seq_id"); }
|
||||
std::string get_label_atom_id() const { return get_property("label_atom_id"); }
|
||||
std::string get_label_alt_id() const { return get_property("label_alt_id"); }
|
||||
std::string get_label_comp_id() const { return get_property("label_comp_id"); }
|
||||
std::string get_label_entity_id() const { return get_property("label_entity_id"); }
|
||||
|
||||
std::string get_auth_asym_id() const { return get_property("auth_asym_id"); }
|
||||
std::string get_auth_seq_id() const { return get_property("auth_seq_id"); }
|
||||
std::string get_auth_atom_id() const { return get_property("auth_atom_id"); }
|
||||
std::string get_auth_alt_id() const { return get_property("auth_alt_id"); }
|
||||
std::string get_auth_comp_id() const { return get_property("auth_comp_id"); }
|
||||
std::string get_pdb_ins_code() const { return get_property("pdbx_PDB_ins_code"); }
|
||||
|
||||
bool is_alternate() const { return not get_label_alt_id().empty(); }
|
||||
|
||||
// std::string labelID() const; // label_comp_id + '_' + label_asym_id + '_' + label_seq_id
|
||||
|
||||
std::string pdb_id() const
|
||||
{
|
||||
return get_label_comp_id() + '_' + get_auth_asym_id() + '_' + get_auth_seq_id() + get_pdb_ins_code();
|
||||
}
|
||||
|
||||
bool operator==(const atom &rhs) const
|
||||
{
|
||||
if (m_impl == rhs.m_impl)
|
||||
return true;
|
||||
|
||||
if (not(m_impl and rhs.m_impl))
|
||||
return false;
|
||||
|
||||
return &m_impl->m_db == &rhs.m_impl->m_db and m_impl->m_id == rhs.m_impl->m_id;
|
||||
}
|
||||
|
||||
bool operator!=(const atom &rhs) const
|
||||
{
|
||||
return not operator==(rhs);
|
||||
}
|
||||
|
||||
// // access data in compound for this atom
|
||||
|
||||
// convenience routine
|
||||
bool is_back_bone() const
|
||||
{
|
||||
auto atomID = get_label_atom_id();
|
||||
return atomID == "N" or atomID == "O" or atomID == "C" or atomID == "CA";
|
||||
}
|
||||
|
||||
void swap(atom &b)
|
||||
{
|
||||
std::swap(m_impl, b.m_impl);
|
||||
}
|
||||
|
||||
int compare(const atom &b) const { return impl().compare(*b.m_impl); }
|
||||
|
||||
bool operator<(const atom &rhs) const
|
||||
{
|
||||
return compare(rhs) < 0;
|
||||
}
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const atom &atom);
|
||||
|
||||
// /// \brief Synchronize data with underlying cif data
|
||||
// void sync()
|
||||
// {
|
||||
// if (m_impl)
|
||||
// m_impl->prefetch();
|
||||
// }
|
||||
|
||||
private:
|
||||
friend class structure;
|
||||
|
||||
const atom_impl &impl() const
|
||||
{
|
||||
if (not m_impl)
|
||||
throw std::runtime_error("Uninitialized atom, not found?");
|
||||
return *m_impl;
|
||||
}
|
||||
|
||||
std::shared_ptr<atom_impl> m_impl;
|
||||
};
|
||||
|
||||
// template <>
|
||||
// inline std::string atom::get_property<std::string>(const std::string_view name) const
|
||||
// {
|
||||
// return get_property(name);
|
||||
// }
|
||||
|
||||
// template <>
|
||||
// inline int atom::get_property<int>(const std::string_view name) const
|
||||
// {
|
||||
// auto v = impl().get_property(name);
|
||||
// return v.empty() ? 0 : stoi(v);
|
||||
// }
|
||||
|
||||
// template <>
|
||||
// inline float atom::get_property<float>(const std::string_view name) const
|
||||
// {
|
||||
// return stof(impl().get_property(name));
|
||||
// }
|
||||
|
||||
inline void swap(atom &a, atom &b)
|
||||
{
|
||||
a.swap(b);
|
||||
}
|
||||
|
||||
inline float distance(const atom &a, const atom &b)
|
||||
{
|
||||
return distance(a.get_location(), b.get_location());
|
||||
}
|
||||
|
||||
inline float distance_squared(const atom &a, const atom &b)
|
||||
{
|
||||
return distance_squared(a.get_location(), b.get_location());
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
enum class EntityType
|
||||
{
|
||||
polymer,
|
||||
NonPolymer,
|
||||
Macrolide,
|
||||
Water,
|
||||
Branched
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class residue
|
||||
{
|
||||
public:
|
||||
friend class structure;
|
||||
|
||||
// constructor
|
||||
residue(const structure &structure, const std::string &compoundID,
|
||||
const std::string &asymID, int seqID,
|
||||
const std::string &authAsymID, const std::string &authSeqID,
|
||||
const std::string &pdbInsCode)
|
||||
: m_structure(&structure)
|
||||
, m_compound_id(compoundID)
|
||||
, m_asym_id(asymID)
|
||||
, m_seq_id(seqID)
|
||||
, m_auth_asym_id(authAsymID)
|
||||
, m_auth_seq_id(authSeqID)
|
||||
, m_pdb_ins_code(pdbInsCode)
|
||||
{
|
||||
}
|
||||
|
||||
residue(const residue &rhs) = delete;
|
||||
residue &operator=(const residue &rhs) = delete;
|
||||
|
||||
residue(residue &&rhs) = default;
|
||||
residue &operator=(residue &&rhs) = default;
|
||||
|
||||
virtual ~residue() = default;
|
||||
|
||||
std::string get_entity_id() const;
|
||||
|
||||
EntityType entity_type() const;
|
||||
|
||||
const std::string &get_asym_id() const { return m_asym_id; }
|
||||
int get_seq_id() const { return m_seq_id; }
|
||||
|
||||
const std::string get_auth_asym_id() const { return m_auth_asym_id; }
|
||||
const std::string get_auth_seq_id() const { return m_auth_seq_id; }
|
||||
std::string get_pdb_ins_code() const { return m_pdb_ins_code; }
|
||||
|
||||
const std::string &get_compound_id() const { return m_compound_id; }
|
||||
void set_compound_id(const std::string &id) { m_compound_id = id; }
|
||||
|
||||
const structure *get_structure() const { return m_structure; }
|
||||
|
||||
// const compound &compound() const;
|
||||
|
||||
std::vector<atom> &atoms()
|
||||
{
|
||||
return m_atoms;
|
||||
}
|
||||
|
||||
const std::vector<atom> &atoms() const
|
||||
{
|
||||
return m_atoms;
|
||||
}
|
||||
|
||||
void add_atom(atom &atom);
|
||||
|
||||
/// \brief Unique atoms returns only the atoms without alternates and the first of each alternate atom id.
|
||||
std::vector<atom> unique_atoms() const;
|
||||
|
||||
/// \brief The alt ID used for the unique atoms
|
||||
std::string unique_alt_id() const;
|
||||
|
||||
atom get_atom_by_atom_id(const std::string &atomID) const;
|
||||
|
||||
// Is this residue a single entity?
|
||||
bool is_entity() const;
|
||||
bool is_water() const { return m_compound_id == "HOH"; }
|
||||
// bool empty() const { return m_structure == nullptr; }
|
||||
|
||||
bool has_alternate_atoms() const;
|
||||
|
||||
/// \brief Return the list of unique alt ID's present in this residue
|
||||
std::set<std::string> get_alternate_ids() const;
|
||||
|
||||
/// \brief Return the list of unique atom ID's
|
||||
std::set<std::string> get_atom_ids() const;
|
||||
|
||||
/// \brief Return the list of atoms having ID \a atomID
|
||||
std::vector<atom> get_atoms_by_id(const std::string &atomID) const;
|
||||
|
||||
// some routines for 3d work
|
||||
std::tuple<point, float> center_and_radius() const;
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const residue &res);
|
||||
|
||||
bool operator==(const residue &rhs) const
|
||||
{
|
||||
return this == &rhs or (m_structure == rhs.m_structure and
|
||||
m_seq_id == rhs.m_seq_id and
|
||||
m_asym_id == rhs.m_asym_id and
|
||||
m_compound_id == rhs.m_compound_id and
|
||||
m_auth_seq_id == rhs.m_auth_seq_id);
|
||||
}
|
||||
|
||||
protected:
|
||||
residue() {}
|
||||
|
||||
const structure *m_structure = nullptr;
|
||||
std::string m_compound_id, m_asym_id;
|
||||
int m_seq_id = 0;
|
||||
std::string m_auth_asym_id, m_auth_seq_id, m_pdb_ins_code;
|
||||
std::vector<atom> m_atoms;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// a monomer models a single residue in a protein chain
|
||||
|
||||
class monomer : public residue
|
||||
{
|
||||
public:
|
||||
// monomer();
|
||||
monomer(const monomer &rhs) = delete;
|
||||
monomer &operator=(const monomer &rhs) = delete;
|
||||
|
||||
monomer(monomer &&rhs);
|
||||
monomer &operator=(monomer &&rhs);
|
||||
|
||||
monomer(const polymer &polymer, size_t index, int seqID, const std::string &authSeqID,
|
||||
const std::string &pdbInsCode, const std::string &compoundID);
|
||||
|
||||
bool is_first_in_chain() const;
|
||||
bool is_last_in_chain() const;
|
||||
|
||||
// convenience
|
||||
bool has_alpha() const;
|
||||
bool has_kappa() const;
|
||||
|
||||
// Assuming this is really an amino acid...
|
||||
|
||||
float phi() const;
|
||||
float psi() const;
|
||||
float alpha() const;
|
||||
float kappa() const;
|
||||
float tco() const;
|
||||
float omega() const;
|
||||
|
||||
// torsion angles
|
||||
size_t nr_of_chis() const;
|
||||
float chi(size_t i) const;
|
||||
|
||||
bool is_cis() const;
|
||||
|
||||
/// \brief Returns true if the four atoms C, CA, N and O are present
|
||||
bool is_complete() const;
|
||||
|
||||
/// \brief Returns true if any of the backbone atoms has an alternate
|
||||
bool has_alternate_backbone_atoms() const;
|
||||
|
||||
atom CAlpha() const { return get_atom_by_atom_id("CA"); }
|
||||
atom C() const { return get_atom_by_atom_id("C"); }
|
||||
atom N() const { return get_atom_by_atom_id("N"); }
|
||||
atom O() const { return get_atom_by_atom_id("O"); }
|
||||
atom H() const { return get_atom_by_atom_id("H"); }
|
||||
|
||||
bool is_bonded_to(const monomer &rhs) const
|
||||
{
|
||||
return this != &rhs and are_bonded(*this, rhs);
|
||||
}
|
||||
|
||||
static bool are_bonded(const monomer &a, const monomer &b, float errorMargin = 0.5f);
|
||||
static bool is_cis(const monomer &a, const monomer &b);
|
||||
static float omega(const monomer &a, const monomer &b);
|
||||
|
||||
// for LEU and VAL
|
||||
float chiral_volume() const;
|
||||
|
||||
bool operator==(const monomer &rhs) const
|
||||
{
|
||||
return m_polymer == rhs.m_polymer and m_index == rhs.m_index;
|
||||
}
|
||||
|
||||
private:
|
||||
const polymer *m_polymer;
|
||||
size_t m_index;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class polymer : public std::vector<monomer>
|
||||
{
|
||||
public:
|
||||
polymer(const structure &s, const std::string &entityID, const std::string &asymID, const std::string &auth_asym_id);
|
||||
|
||||
polymer(const polymer &) = delete;
|
||||
polymer &operator=(const polymer &) = delete;
|
||||
|
||||
// monomer &getBySeqID(int seqID);
|
||||
// const monomer &getBySeqID(int seqID) const;
|
||||
|
||||
const structure *get_structure() const { return m_structure; }
|
||||
|
||||
std::string get_asym_id() const { return m_asym_id; }
|
||||
std::string get_auth_asym_id() const { return m_auth_asym_id; } // The PDB chain ID, actually
|
||||
std::string get_entity_id() const { return m_entity_id; }
|
||||
|
||||
// int Distance(const monomer &a, const monomer &b) const;
|
||||
|
||||
private:
|
||||
const structure *m_structure;
|
||||
std::string m_entity_id;
|
||||
std::string m_asym_id;
|
||||
std::string m_auth_asym_id;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// sugar and branch, to describe glycosylation sites
|
||||
|
||||
class branch;
|
||||
|
||||
class sugar : public residue
|
||||
{
|
||||
public:
|
||||
sugar(const branch &branch, const std::string &compoundID,
|
||||
const std::string &asymID, int authSeqID);
|
||||
|
||||
sugar(sugar &&rhs);
|
||||
sugar &operator=(sugar &&rhs);
|
||||
|
||||
int num() const { return std::stoi(m_auth_seq_id); }
|
||||
std::string name() const;
|
||||
|
||||
/// \brief Return the atom the C1 is linked to
|
||||
atom get_link() const { return m_link; }
|
||||
void set_link(atom link) { m_link = link; }
|
||||
|
||||
size_t get_link_nr() const
|
||||
{
|
||||
size_t result = 0;
|
||||
if (m_link)
|
||||
result = m_link.get_property_int("auth_seq_id");
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
const branch *m_branch;
|
||||
atom m_link;
|
||||
};
|
||||
|
||||
class branch : public std::vector<sugar>
|
||||
{
|
||||
public:
|
||||
branch(structure &structure, const std::string &asymID);
|
||||
|
||||
void link_atoms();
|
||||
|
||||
std::string name() const;
|
||||
float weight() const;
|
||||
std::string get_asym_id() const { return m_asym_id; }
|
||||
|
||||
structure &get_structure() { return *m_structure; }
|
||||
const structure &get_structure() const { return *m_structure; }
|
||||
|
||||
sugar &getSugarByNum(int nr);
|
||||
const sugar &getSugarByNum(int nr) const;
|
||||
|
||||
private:
|
||||
friend sugar;
|
||||
|
||||
std::string name(const sugar &s) const;
|
||||
|
||||
structure *m_structure;
|
||||
std::string m_asym_id;
|
||||
};
|
||||
|
||||
// // --------------------------------------------------------------------
|
||||
// // file is a reference to the data stored in e.g. the cif file.
|
||||
// // This object is not copyable.
|
||||
|
||||
// class File : public file
|
||||
// {
|
||||
// public:
|
||||
// File() {}
|
||||
|
||||
// // File(const std::filesystem::path &path)
|
||||
// // {
|
||||
// // load(path);
|
||||
// // }
|
||||
|
||||
// // File(const char *data, size_t length)
|
||||
// // {
|
||||
// // load(data, length);
|
||||
// // }
|
||||
|
||||
// File(const File &) = delete;
|
||||
// File &operator=(const File &) = delete;
|
||||
|
||||
// // void load(const std::filesystem::path &p) override;
|
||||
// // void save(const std::filesystem::path &p) override;
|
||||
|
||||
// // using file::load;
|
||||
// // using file::save;
|
||||
|
||||
// datablock &data() { return front(); }
|
||||
// };
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
enum class StructureOpenOptions
|
||||
{
|
||||
SkipHydrogen = 1 << 0
|
||||
};
|
||||
|
||||
inline bool operator&(StructureOpenOptions a, StructureOpenOptions b)
|
||||
{
|
||||
return static_cast<int>(a) bitand static_cast<int>(b);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class structure
|
||||
{
|
||||
public:
|
||||
structure(file &p, size_t modelNr = 1, StructureOpenOptions options = {});
|
||||
|
||||
structure(datablock &db, size_t modelNr = 1, StructureOpenOptions options = {});
|
||||
|
||||
structure(structure &&s) = default;
|
||||
|
||||
// Create a read-only clone of the current structure (for multithreaded calculations that move atoms)
|
||||
// NOTE: removed, simply create a new structure for each thread
|
||||
structure(const structure &) = delete;
|
||||
|
||||
structure &operator=(const structure &) = delete;
|
||||
// Structure &operator=(Structure &&s) = default;
|
||||
|
||||
~structure() = default;
|
||||
|
||||
size_t get_model_nr() const { return m_model_nr; }
|
||||
|
||||
const std::vector<atom> &atoms() const { return m_atoms; }
|
||||
// std::vector<atom> &atoms() { return m_atoms; }
|
||||
|
||||
EntityType get_entity_type_for_entity_id(const std::string entityID) const;
|
||||
EntityType get_entity_type_for_asym_id(const std::string asymID) const;
|
||||
|
||||
// std::vector<atom> waters() const;
|
||||
|
||||
const std::list<polymer> &polymers() const { return m_polymers; }
|
||||
std::list<polymer> &polymers() { return m_polymers; }
|
||||
|
||||
polymer &get_polymer_by_asym_id(const std::string &asymID);
|
||||
|
||||
const polymer &get_polymer_by_asym_id(const std::string &asymID) const
|
||||
{
|
||||
return const_cast<structure *>(this)->get_polymer_by_asym_id(asymID);
|
||||
}
|
||||
|
||||
const std::list<branch> &branches() const { return m_branches; }
|
||||
std::list<branch> &branches() { return m_branches; }
|
||||
|
||||
branch &get_branch_by_asym_id(const std::string &asymID);
|
||||
const branch &get_branch_by_asym_id(const std::string &asymID) const;
|
||||
|
||||
const std::vector<residue> &non_polymers() const { return m_non_polymers; }
|
||||
|
||||
atom get_atom_by_id(const std::string &id) const;
|
||||
// atom getAtomByLocation(point pt, float maxDistance) const;
|
||||
|
||||
atom get_atom_by_label(const std::string &atomID, const std::string &asymID,
|
||||
const std::string &compID, int seqID, const std::string &altID = "");
|
||||
|
||||
// /// \brief Return the atom closest to point \a p
|
||||
atom get_atom_by_position(point p) const;
|
||||
|
||||
/// \brief Return the atom closest to point \a p with atom type \a type in a residue of type \a res_type
|
||||
atom get_atom_by_position_and_type(point p, std::string_view type, std::string_view res_type) const;
|
||||
|
||||
/// \brief Get a non-poly residue for an asym with id \a asymID
|
||||
residue &get_residue(const std::string &asymID)
|
||||
{
|
||||
return get_residue(asymID, 0, "");
|
||||
}
|
||||
|
||||
/// \brief Get a non-poly residue for an asym with id \a asymID
|
||||
const residue &get_residue(const std::string &asymID) const
|
||||
{
|
||||
return get_residue(asymID, 0, "");
|
||||
}
|
||||
|
||||
/// \brief Get a residue for an asym with id \a asymID seq id \a seqID and authSeqID \a authSeqID
|
||||
residue &get_residue(const std::string &asymID, int seqID, const std::string &authSeqID);
|
||||
|
||||
/// \brief Get a the single residue for an asym with id \a asymID seq id \a seqID and authSeqID \a authSeqID
|
||||
const residue &get_residue(const std::string &asymID, int seqID, const std::string &authSeqID) const
|
||||
{
|
||||
return const_cast<structure *>(this)->get_residue(asymID, seqID, authSeqID);
|
||||
}
|
||||
|
||||
/// \brief Get a residue for an asym with id \a asymID, compound id \a compID, seq id \a seqID and authSeqID \a authSeqID
|
||||
residue &get_residue(const std::string &asymID, const std::string &compID, int seqID, const std::string &authSeqID);
|
||||
|
||||
/// \brief Get a residue for an asym with id \a asymID, compound id \a compID, seq id \a seqID and authSeqID \a authSeqID
|
||||
const residue &get_residue(const std::string &asymID, const std::string &compID, int seqID, const std::string &authSeqID) const
|
||||
{
|
||||
return const_cast<structure *>(this)->get_residue(asymID, compID, seqID, authSeqID);
|
||||
}
|
||||
|
||||
/// \brief Get a the residue for atom \a atom
|
||||
residue &get_residue(const atom &atom)
|
||||
{
|
||||
return get_residue(atom.get_label_asym_id(), atom.get_label_comp_id(), atom.get_label_seq_id(), atom.get_auth_seq_id());
|
||||
}
|
||||
|
||||
/// \brief Get a the residue for atom \a atom
|
||||
const residue &get_residue(const atom &atom) const
|
||||
{
|
||||
return get_residue(atom.get_label_asym_id(), atom.get_label_comp_id(), atom.get_label_seq_id(), atom.get_auth_seq_id());
|
||||
}
|
||||
|
||||
// Actions
|
||||
void remove_atom(atom &a)
|
||||
{
|
||||
remove_atom(a, true);
|
||||
}
|
||||
|
||||
void swap_atoms(atom a1, atom a2); // swap the labels for these atoms
|
||||
void move_atom(atom a, point p); // move atom to a new location
|
||||
void change_residue(residue &res, const std::string &newcompound,
|
||||
const std::vector<std::tuple<std::string, std::string>> &remappedAtoms);
|
||||
|
||||
/// \brief Remove a residue, can be monomer or nonpoly
|
||||
///
|
||||
/// \param asym_id The asym ID
|
||||
/// \param seq_id The sequence ID
|
||||
void remove_residue(const std::string &asym_id, int seq_id, const std::string &auth_seq_id)
|
||||
{
|
||||
remove_residue(get_residue(asym_id, seq_id, auth_seq_id));
|
||||
}
|
||||
|
||||
/// \brief Create a new non-polymer entity, returns new ID
|
||||
/// \param mon_id The mon_id for the new nonpoly, must be an existing and known compound from CCD
|
||||
/// \return The ID of the created entity
|
||||
std::string create_non_poly_entity(const std::string &mon_id);
|
||||
|
||||
/// \brief Create a new NonPolymer struct_asym with atoms constructed from \a atoms, returns asym_id.
|
||||
/// This method assumes you are copying data from one cif file to another.
|
||||
///
|
||||
/// \param entity_id The entity ID of the new nonpoly
|
||||
/// \param atoms The array of atom_site rows containing the data.
|
||||
/// \return The newly create asym ID
|
||||
std::string create_non_poly(const std::string &entity_id, const std::vector<atom> &atoms);
|
||||
|
||||
/// \brief Create a new NonPolymer struct_asym with atoms constructed from info in \a atom_info, returns asym_id.
|
||||
/// This method creates new atom records filled with info from the info.
|
||||
///
|
||||
/// \param entity_id The entity ID of the new nonpoly
|
||||
/// \param atoms The array of sets of item data containing the data for the atoms.
|
||||
/// \return The newly create asym ID
|
||||
std::string create_non_poly(const std::string &entity_id, std::vector<row_initializer> atoms);
|
||||
|
||||
/// \brief Create a new (sugar) branch with one first NAG containing atoms constructed from \a atoms
|
||||
branch &create_branch(std::vector<row_initializer> atoms);
|
||||
|
||||
/// \brief Extend an existing (sugar) branch identified by \a asymID with one sugar containing atoms constructed from \a atom_info
|
||||
///
|
||||
/// \param asym_id The asym id of the branch to extend
|
||||
/// \param atom_info Array containing the info for the atoms to construct for the new sugar
|
||||
/// \param link_sugar The sugar to link to, note: this is the sugar number (1 based)
|
||||
/// \param link_atom The atom id of the atom linked in the sugar
|
||||
branch &extend_branch(const std::string &asym_id, std::vector<row_initializer> atom_info,
|
||||
int link_sugar, const std::string &link_atom);
|
||||
|
||||
/// \brief Remove \a branch
|
||||
void remove_branch(branch &branch);
|
||||
|
||||
/// \brief Remove residue \a res
|
||||
///
|
||||
/// \param res The residue to remove
|
||||
void remove_residue(residue &res);
|
||||
|
||||
/// \brief Translate the coordinates of all atoms in the structure by \a t
|
||||
void translate(point t);
|
||||
|
||||
/// \brief Rotate the coordinates of all atoms in the structure by \a q
|
||||
void rotate(quaternion t);
|
||||
|
||||
/// \brief Translate and rotate the coordinates of all atoms in the structure by \a t and \a q
|
||||
void translate_and_rotate(point t, quaternion q);
|
||||
|
||||
/// \brief Translate, rotate and translate again the coordinates of all atoms in the structure by \a t1 , \a q and \a t2
|
||||
void translate_rotate_and_translate(point t1, quaternion q, point t2);
|
||||
|
||||
void cleanup_empty_categories();
|
||||
|
||||
/// \brief Direct access to underlying data
|
||||
category &get_category(std::string_view name) const
|
||||
{
|
||||
return m_db[name];
|
||||
}
|
||||
|
||||
datablock &get_datablock() const
|
||||
{
|
||||
return m_db;
|
||||
}
|
||||
|
||||
void validate_atoms() const;
|
||||
|
||||
private:
|
||||
friend polymer;
|
||||
friend residue;
|
||||
|
||||
std::string insert_compound(const std::string &compoundID, bool is_entity);
|
||||
|
||||
std::string create_entity_for_branch(branch &branch);
|
||||
|
||||
void load_data();
|
||||
|
||||
void load_atoms_for_model(StructureOpenOptions options);
|
||||
|
||||
template <typename... Args>
|
||||
atom &emplace_atom(Args... args)
|
||||
{
|
||||
return emplace_atom(atom{ std::forward<Args>(args)... });
|
||||
}
|
||||
|
||||
atom &emplace_atom(atom &&atom);
|
||||
|
||||
void remove_atom(atom &a, bool removeFromResidue);
|
||||
void remove_sugar(sugar &sugar);
|
||||
|
||||
datablock &m_db;
|
||||
size_t m_model_nr;
|
||||
std::vector<atom> m_atoms;
|
||||
std::vector<size_t> m_atom_index;
|
||||
std::list<polymer> m_polymers;
|
||||
std::list<branch> m_branches;
|
||||
std::vector<residue> m_non_polymers;
|
||||
};
|
||||
|
||||
} // namespace cif::mm
|
||||
289
include/cif++/parser.hpp
Normal file
289
include/cif++/parser.hpp
Normal file
@@ -0,0 +1,289 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
#include <cif++/row.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class parse_error : public std::runtime_error
|
||||
{
|
||||
public:
|
||||
parse_error(uint32_t line_nr, const std::string &message)
|
||||
: std::runtime_error("parse error at line " + std::to_string(line_nr) + ": " + message)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
// TODO: Need to implement support for transformed long lines
|
||||
|
||||
class sac_parser
|
||||
{
|
||||
public:
|
||||
using datablock_index = std::map<std::string, std::size_t>;
|
||||
|
||||
sac_parser(std::istream &is, bool init = true);
|
||||
|
||||
virtual ~sac_parser() = default;
|
||||
|
||||
enum CharTraitsMask : uint8_t
|
||||
{
|
||||
kOrdinaryMask = 1 << 0,
|
||||
kNonBlankMask = 1 << 1,
|
||||
kTextLeadMask = 1 << 2,
|
||||
kAnyPrintMask = 1 << 3
|
||||
};
|
||||
|
||||
static bool is_white(int ch)
|
||||
{
|
||||
return std::isspace(ch) or ch == '#';
|
||||
}
|
||||
|
||||
static constexpr bool is_ordinary(int ch)
|
||||
{
|
||||
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kOrdinaryMask) != 0;
|
||||
}
|
||||
|
||||
static constexpr bool is_non_blank(int ch)
|
||||
{
|
||||
return ch > 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kNonBlankMask) != 0;
|
||||
}
|
||||
|
||||
static constexpr bool is_text_lead(int ch)
|
||||
{
|
||||
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kTextLeadMask) != 0;
|
||||
}
|
||||
|
||||
static constexpr bool is_any_print(int ch)
|
||||
{
|
||||
return ch == '\t' or
|
||||
(ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
|
||||
}
|
||||
|
||||
static bool is_unquoted_string(std::string_view text)
|
||||
{
|
||||
auto s = text.begin();
|
||||
|
||||
bool result = is_ordinary(*s++);
|
||||
while (result and s != text.end())
|
||||
{
|
||||
result = is_non_blank(*s);
|
||||
++s;
|
||||
}
|
||||
|
||||
// but be careful it does not contain e.g. stop_
|
||||
if (result)
|
||||
{
|
||||
static const std::regex reservedRx(R"((^(?:data|save)|.*(?:loop|stop|global))_.+)", std::regex_constants::icase);
|
||||
result = not std::regex_match(text.begin(), text.end(), reservedRx);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
protected:
|
||||
static constexpr uint8_t kCharTraitsTable[128] = {
|
||||
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
|
||||
14, 15, 14, 14, 14, 15, 15, 14, 15, 15, 15, 15, 15, 15, 15, 15, // 2
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 10, 15, 15, 15, 15, // 3
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 4
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 15, 14, 15, 14, // 5
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 6
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, // 7
|
||||
};
|
||||
|
||||
enum class CIFToken
|
||||
{
|
||||
Unknown,
|
||||
|
||||
Eof,
|
||||
|
||||
DATA,
|
||||
LOOP,
|
||||
GLOBAL,
|
||||
SAVE,
|
||||
STOP,
|
||||
Tag,
|
||||
Value
|
||||
};
|
||||
|
||||
static constexpr const char *get_token_name(CIFToken token)
|
||||
{
|
||||
switch (token)
|
||||
{
|
||||
case CIFToken::Unknown: return "Unknown";
|
||||
case CIFToken::Eof: return "Eof";
|
||||
case CIFToken::DATA: return "DATA";
|
||||
case CIFToken::LOOP: return "LOOP";
|
||||
case CIFToken::GLOBAL: return "GLOBAL";
|
||||
case CIFToken::SAVE: return "SAVE";
|
||||
case CIFToken::STOP: return "STOP";
|
||||
case CIFToken::Tag: return "Tag";
|
||||
case CIFToken::Value: return "Value";
|
||||
default: return "Invalid token parameter";
|
||||
}
|
||||
}
|
||||
|
||||
enum class CIFValue
|
||||
{
|
||||
Int,
|
||||
Float,
|
||||
Numeric,
|
||||
String,
|
||||
TextField,
|
||||
Inapplicable,
|
||||
Unknown
|
||||
};
|
||||
|
||||
static constexpr const char *get_value_name(CIFValue type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case CIFValue::Int: return "Int";
|
||||
case CIFValue::Float: return "Float";
|
||||
case CIFValue::Numeric: return "Numeric";
|
||||
case CIFValue::String: return "String";
|
||||
case CIFValue::TextField: return "TextField";
|
||||
case CIFValue::Inapplicable: return "Inapplicable";
|
||||
case CIFValue::Unknown: return "Unknown";
|
||||
default: return "Invalid type parameter";
|
||||
}
|
||||
}
|
||||
|
||||
// get_next_char takes a char from the buffer, or if it is empty
|
||||
// from the istream. This function also does carriage/linefeed
|
||||
// translation.
|
||||
int get_next_char();
|
||||
|
||||
void retract();
|
||||
|
||||
int restart(int start);
|
||||
|
||||
CIFToken get_next_token();
|
||||
|
||||
void match(CIFToken token);
|
||||
|
||||
public:
|
||||
bool parse_single_datablock(const std::string &datablock);
|
||||
|
||||
datablock_index index_datablocks();
|
||||
|
||||
bool parse_single_datablock(const std::string &datablock, const datablock_index &index);
|
||||
|
||||
void parse_file();
|
||||
|
||||
protected:
|
||||
void parse_global();
|
||||
|
||||
void parse_datablock();
|
||||
|
||||
virtual void parse_save_frame();
|
||||
|
||||
void error(const std::string &msg)
|
||||
{
|
||||
throw parse_error(m_line_nr, msg);
|
||||
}
|
||||
|
||||
void warning(const std::string &msg)
|
||||
{
|
||||
std::cerr << "parser warning at line" << m_line_nr << ": " << msg << std::endl;
|
||||
}
|
||||
|
||||
// production methods, these are pure virtual here
|
||||
|
||||
virtual void produce_datablock(const std::string &name) = 0;
|
||||
virtual void produce_category(const std::string &name) = 0;
|
||||
virtual void produce_row() = 0;
|
||||
virtual void produce_item(const std::string &category, const std::string &item, const std::string &value) = 0;
|
||||
|
||||
protected:
|
||||
enum State
|
||||
{
|
||||
Start,
|
||||
White,
|
||||
Esc,
|
||||
Comment,
|
||||
QuestionMark,
|
||||
Dot,
|
||||
QuotedString,
|
||||
QuotedStringQuote,
|
||||
UnquotedString,
|
||||
Tag,
|
||||
TextField,
|
||||
Float = 100,
|
||||
Int = 110,
|
||||
Value = 300,
|
||||
DATA,
|
||||
SAVE
|
||||
};
|
||||
|
||||
std::istream &m_source;
|
||||
|
||||
// Parser state
|
||||
bool m_validate;
|
||||
uint32_t m_line_nr;
|
||||
bool m_bol;
|
||||
CIFToken m_lookahead;
|
||||
std::string m_token_value;
|
||||
CIFValue mTokenType;
|
||||
std::stack<int> m_buffer;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class parser : public sac_parser
|
||||
{
|
||||
public:
|
||||
parser(std::istream &is, file &file)
|
||||
: sac_parser(is)
|
||||
, m_file(file)
|
||||
{
|
||||
}
|
||||
|
||||
void produce_datablock(const std::string &name) override;
|
||||
|
||||
void produce_category(const std::string &name) override;
|
||||
|
||||
void produce_row() override;
|
||||
|
||||
void produce_item(const std::string &category, const std::string &item, const std::string &value) override;
|
||||
|
||||
protected:
|
||||
file &m_file;
|
||||
datablock *m_datablock = nullptr;
|
||||
category *m_category = nullptr;
|
||||
row_handle m_row;
|
||||
};
|
||||
|
||||
} // namespace cif
|
||||
@@ -1,17 +1,17 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
@@ -26,14 +26,17 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cif++/Cif++.hpp"
|
||||
#include <cif++.hpp>
|
||||
|
||||
void WritePDBFile(std::ostream& pdbFile, cif::File& cifFile);
|
||||
namespace cif::pdb
|
||||
{
|
||||
|
||||
/// \brief Just the HEADER, COMPND, SOURCE and AUTHOR lines
|
||||
void WritePDBHeaderLines(std::ostream& os, cif::File& cifFile);
|
||||
void write_header_lines(std::ostream &os, const datablock &data);
|
||||
|
||||
std::string GetPDBHEADERLine(cif::File& cifFile, std::string::size_type truncate_at = 127);
|
||||
std::string GetPDBCOMPNDLine(cif::File& cifFile, std::string::size_type truncate_at = 127);
|
||||
std::string GetPDBSOURCELine(cif::File& cifFile, std::string::size_type truncate_at = 127);
|
||||
std::string GetPDBAUTHORLine(cif::File& cifFile, std::string::size_type truncate_at = 127);
|
||||
std::string get_HEADER_line(const datablock &data, std::string::size_type truncate_at = 127);
|
||||
std::string get_COMPND_line(const datablock &data, std::string::size_type truncate_at = 127);
|
||||
std::string get_SOURCE_line(const datablock &data, std::string::size_type truncate_at = 127);
|
||||
std::string get_AUTHOR_line(const datablock &data, std::string::size_type truncate_at = 127);
|
||||
|
||||
} // namespace pdbx
|
||||
44
include/cif++/pdb/io.hpp
Normal file
44
include/cif++/pdb/io.hpp
Normal file
@@ -0,0 +1,44 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++.hpp>
|
||||
|
||||
namespace cif::pdb
|
||||
{
|
||||
|
||||
file read(std::istream &is);
|
||||
file read(const std::filesystem::path &file);
|
||||
|
||||
void write(std::ostream &os, const datablock &db);
|
||||
|
||||
inline void write(std::ostream &os, const file &f)
|
||||
{
|
||||
write(os, f.front());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,17 +1,17 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
@@ -26,7 +26,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cif++/Cif++.hpp"
|
||||
#include <cif++.hpp>
|
||||
|
||||
namespace cif::pdb
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
@@ -57,4 +60,6 @@ struct PDBRecord
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void ReadPDBFile(std::istream &pdbFile, cif::File &cifFile);
|
||||
void ReadPDBFile(std::istream &pdbFile, file &cifFile);
|
||||
|
||||
} // namespace pdbx
|
||||
@@ -1,17 +1,17 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
@@ -26,10 +26,13 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cif++/PDB2Cif.hpp"
|
||||
#include <cif++/pdb/pdb2cif.hpp>
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
namespace cif::pdb
|
||||
{
|
||||
|
||||
struct TemplateLine;
|
||||
|
||||
class Remark3Parser
|
||||
@@ -37,37 +40,36 @@ class Remark3Parser
|
||||
public:
|
||||
virtual ~Remark3Parser() {}
|
||||
|
||||
static bool parse(const std::string& expMethod, PDBRecord* r, cif::Datablock& db);
|
||||
static bool parse(const std::string &expMethod, PDBRecord *r, cif::datablock &db);
|
||||
|
||||
virtual std::string program();
|
||||
virtual std::string version();
|
||||
|
||||
protected:
|
||||
|
||||
Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::Datablock& db,
|
||||
const TemplateLine templatelines[], uint32_t templateLineCount, std::regex programVersion);
|
||||
Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db,
|
||||
const TemplateLine templatelines[], uint32_t templateLineCount, std::regex programVersion);
|
||||
|
||||
virtual float parse();
|
||||
std::string nextLine();
|
||||
|
||||
bool match(const char* expr, int nextState);
|
||||
void storeCapture(const char* category, std::initializer_list<const char*> items, bool createNew = false);
|
||||
void storeRefineLsRestr(const char* type, std::initializer_list<const char*> values);
|
||||
void updateRefineLsRestr(const char* type, std::initializer_list<const char*> values);
|
||||
bool match(const char *expr, int nextState);
|
||||
void storeCapture(const char *category, std::initializer_list<const char *> items, bool createNew = false);
|
||||
void storeRefineLsRestr(const char *type, std::initializer_list<const char *> values);
|
||||
void updateRefineLsRestr(const char *type, std::initializer_list<const char *> values);
|
||||
|
||||
virtual void fixup() {}
|
||||
|
||||
std::string mName;
|
||||
std::string mExpMethod;
|
||||
PDBRecord* mRec;
|
||||
cif::Datablock mDb;
|
||||
std::string mLine;
|
||||
std::smatch mM;
|
||||
uint32_t mState;
|
||||
std::string mName;
|
||||
std::string mExpMethod;
|
||||
PDBRecord *mRec;
|
||||
cif::datablock mDb;
|
||||
std::string mLine;
|
||||
std::smatch mM;
|
||||
uint32_t mState;
|
||||
|
||||
const TemplateLine* mTemplate;
|
||||
uint32_t mTemplateCount;
|
||||
std::regex mProgramVersion;
|
||||
const TemplateLine *mTemplate;
|
||||
uint32_t mTemplateCount;
|
||||
std::regex mProgramVersion;
|
||||
};
|
||||
|
||||
|
||||
} // namespace pdbx
|
||||
@@ -1,17 +1,17 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
@@ -26,32 +26,30 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "cif++/Cif++.hpp"
|
||||
#include <cif++.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
|
||||
extern const int
|
||||
kResidueNrWildcard,
|
||||
kNoSeqNum;
|
||||
|
||||
struct TLSSelection;
|
||||
typedef std::unique_ptr<TLSSelection> TLSSelectionPtr;
|
||||
struct tls_selection;
|
||||
struct tls_residue;
|
||||
|
||||
struct TLSResidue;
|
||||
|
||||
struct TLSSelection
|
||||
struct tls_selection
|
||||
{
|
||||
virtual ~TLSSelection() {}
|
||||
virtual void CollectResidues(cif::Datablock& db, std::vector<TLSResidue>& residues, std::size_t indentLevel = 0) const = 0;
|
||||
std::vector<std::tuple<std::string,int,int>> GetRanges(cif::Datablock& db, bool pdbNamespace) const;
|
||||
virtual ~tls_selection() {}
|
||||
virtual void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, std::size_t indentLevel = 0) const = 0;
|
||||
std::vector<std::tuple<std::string, int, int>> get_ranges(cif::datablock &db, bool pdbNamespace) const;
|
||||
};
|
||||
|
||||
// Low level: get the selections
|
||||
TLSSelectionPtr ParseSelectionDetails(const std::string& program, const std::string& selection);
|
||||
std::unique_ptr<tls_selection> parse_tls_selection_details(const std::string &program, const std::string &selection);
|
||||
|
||||
}
|
||||
} // namespace cif
|
||||
736
include/cif++/point.hpp
Normal file
736
include/cif++/point.hpp
Normal file
@@ -0,0 +1,736 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
#include <functional>
|
||||
#include <valarray>
|
||||
|
||||
#if __has_include(<clipper/core/coords.h>)
|
||||
#define HAVE_LIBCLIPPER 1
|
||||
#include <clipper/core/coords.h>
|
||||
#endif
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
const double
|
||||
kPI = 3.141592653589793238462643383279502884;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// A stripped down quaternion implementation, based on boost::math::quaternion
|
||||
// We use quaternions to do rotations in 3d space
|
||||
|
||||
template <typename T>
|
||||
class quaternion_type
|
||||
{
|
||||
public:
|
||||
using value_type = T;
|
||||
|
||||
constexpr explicit quaternion_type(value_type const &value_a = value_type(), value_type const &value_b = value_type(), value_type const &value_c = value_type(), value_type const &value_d = value_type())
|
||||
: a(value_a)
|
||||
, b(value_b)
|
||||
, c(value_c)
|
||||
, d(value_d)
|
||||
{
|
||||
}
|
||||
|
||||
constexpr explicit quaternion_type(std::complex<value_type> const &z0, std::complex<value_type> const &z1 = std::complex<value_type>())
|
||||
: a(z0.real())
|
||||
, b(z0.imag())
|
||||
, c(z1.real())
|
||||
, d(z1.imag())
|
||||
{
|
||||
}
|
||||
|
||||
constexpr quaternion_type(quaternion_type const &) = default;
|
||||
constexpr quaternion_type(quaternion_type &&) = default;
|
||||
|
||||
template <typename X>
|
||||
constexpr explicit quaternion_type(quaternion_type<X> const &rhs)
|
||||
: a(static_cast<value_type>(rhs.a))
|
||||
, b(static_cast<value_type>(rhs.b))
|
||||
, c(static_cast<value_type>(rhs.c))
|
||||
, d(static_cast<value_type>(rhs.d))
|
||||
{
|
||||
}
|
||||
|
||||
// accessors
|
||||
//
|
||||
// Note: Like complex number, quaternions do have a meaningful notion of "real part",
|
||||
// but unlike them there is no meaningful notion of "imaginary part".
|
||||
// Instead there is an "unreal part" which itself is a quaternion, and usually
|
||||
// nothing simpler (as opposed to the complex number case).
|
||||
// However, for practicality, there are accessors for the other components
|
||||
// (these are necessary for the templated copy constructor, for instance).
|
||||
|
||||
constexpr value_type real() const
|
||||
{
|
||||
return a;
|
||||
}
|
||||
|
||||
constexpr quaternion_type unreal() const
|
||||
{
|
||||
return { 0, b, c, d };
|
||||
}
|
||||
|
||||
constexpr void swap(quaternion_type &o)
|
||||
{
|
||||
std::swap(a, o.a);
|
||||
std::swap(b, o.b);
|
||||
std::swap(c, o.c);
|
||||
std::swap(d, o.d);
|
||||
}
|
||||
|
||||
// assignment operators
|
||||
|
||||
template <typename X>
|
||||
constexpr quaternion_type &operator=(quaternion_type<X> const &rhs)
|
||||
{
|
||||
a = static_cast<value_type>(rhs.a);
|
||||
b = static_cast<value_type>(rhs.b);
|
||||
c = static_cast<value_type>(rhs.c);
|
||||
d = static_cast<value_type>(rhs.d);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator=(quaternion_type const &rhs)
|
||||
{
|
||||
a = rhs.a;
|
||||
b = rhs.b;
|
||||
c = rhs.c;
|
||||
d = rhs.d;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator=(value_type const &rhs)
|
||||
{
|
||||
a = rhs;
|
||||
|
||||
b = c = d = static_cast<value_type>(0);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator=(std::complex<value_type> const &rhs)
|
||||
{
|
||||
a = rhs.real();
|
||||
b = rhs.imag();
|
||||
|
||||
c = d = static_cast<value_type>(0);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
// other assignment-related operators
|
||||
//
|
||||
// NOTE: Quaternion multiplication is *NOT* commutative;
|
||||
// symbolically, "q *= rhs;" means "q = q * rhs;"
|
||||
// and "q /= rhs;" means "q = q * inverse_of(rhs);"
|
||||
|
||||
constexpr quaternion_type &operator+=(value_type const &rhs)
|
||||
{
|
||||
a += rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator+=(std::complex<value_type> const &rhs)
|
||||
{
|
||||
a += std::real(rhs);
|
||||
b += std::imag(rhs);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <class X>
|
||||
constexpr quaternion_type &operator+=(quaternion_type<X> const &rhs)
|
||||
{
|
||||
a += rhs.a;
|
||||
b += rhs.b;
|
||||
c += rhs.c;
|
||||
d += rhs.d;
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator-=(value_type const &rhs)
|
||||
{
|
||||
a -= rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator-=(std::complex<value_type> const &rhs)
|
||||
{
|
||||
a -= std::real(rhs);
|
||||
b -= std::imag(rhs);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <class X>
|
||||
constexpr quaternion_type &operator-=(quaternion_type<X> const &rhs)
|
||||
{
|
||||
a -= rhs.a;
|
||||
b -= rhs.b;
|
||||
c -= rhs.c;
|
||||
d -= rhs.d;
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator*=(value_type const &rhs)
|
||||
{
|
||||
a *= rhs;
|
||||
b *= rhs;
|
||||
c *= rhs;
|
||||
d *= rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator*=(std::complex<value_type> const &rhs)
|
||||
{
|
||||
value_type ar = rhs.real();
|
||||
value_type br = rhs.imag();
|
||||
quaternion_type result(a * ar - b * br, a * br + b * ar, c * ar + d * br, -c * br + d * ar);
|
||||
swap(result);
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr friend quaternion_type operator*(const quaternion_type &a, const quaternion_type &b)
|
||||
{
|
||||
auto result = a;
|
||||
result *= b;
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename X>
|
||||
constexpr quaternion_type &operator*=(quaternion_type<X> const &rhs)
|
||||
{
|
||||
value_type ar = static_cast<value_type>(rhs.a);
|
||||
value_type br = static_cast<value_type>(rhs.b);
|
||||
value_type cr = static_cast<value_type>(rhs.c);
|
||||
value_type dr = static_cast<value_type>(rhs.d);
|
||||
|
||||
quaternion_type result(a * ar - b * br - c * cr - d * dr, a * br + b * ar + c * dr - d * cr, a * cr - b * dr + c * ar + d * br, a * dr + b * cr - c * br + d * ar);
|
||||
swap(result);
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator/=(value_type const &rhs)
|
||||
{
|
||||
a /= rhs;
|
||||
b /= rhs;
|
||||
c /= rhs;
|
||||
d /= rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator/=(std::complex<value_type> const &rhs)
|
||||
{
|
||||
value_type ar = rhs.real();
|
||||
value_type br = rhs.imag();
|
||||
value_type denominator = ar * ar + br * br;
|
||||
quaternion_type result((+a * ar + b * br) / denominator, (-a * br + b * ar) / denominator, (+c * ar - d * br) / denominator, (+c * br + d * ar) / denominator);
|
||||
swap(result);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename X>
|
||||
constexpr quaternion_type &operator/=(quaternion_type<X> const &rhs)
|
||||
{
|
||||
value_type ar = static_cast<value_type>(rhs.a);
|
||||
value_type br = static_cast<value_type>(rhs.b);
|
||||
value_type cr = static_cast<value_type>(rhs.c);
|
||||
value_type dr = static_cast<value_type>(rhs.d);
|
||||
|
||||
value_type denominator = ar * ar + br * br + cr * cr + dr * dr;
|
||||
quaternion_type result((+a * ar + b * br + c * cr + d * dr) / denominator, (-a * br + b * ar - c * dr + d * cr) / denominator, (-a * cr + b * dr + c * ar - d * br) / denominator, (-a * dr - b * cr + c * br + d * ar) / denominator);
|
||||
swap(result);
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr friend quaternion_type normalize(quaternion_type q)
|
||||
{
|
||||
std::valarray<value_type> t(4);
|
||||
|
||||
t[0] = q.a;
|
||||
t[1] = q.b;
|
||||
t[2] = q.c;
|
||||
t[3] = q.d;
|
||||
|
||||
t *= t;
|
||||
|
||||
value_type length = std::sqrt(t.sum());
|
||||
|
||||
if (length > 0.001)
|
||||
q /= static_cast<value_type>(length);
|
||||
else
|
||||
q = quaternion_type(1, 0, 0, 0);
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
constexpr friend quaternion_type conj(quaternion_type q)
|
||||
{
|
||||
return quaternion_type{ +q.a, -q.b, -q.c, -q.d };
|
||||
}
|
||||
|
||||
constexpr value_type get_a() const { return a; }
|
||||
constexpr value_type get_b() const { return b; }
|
||||
constexpr value_type get_c() const { return c; }
|
||||
constexpr value_type get_d() const { return d; }
|
||||
|
||||
private:
|
||||
value_type a, b, c, d;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline quaternion_type<T> spherical(T const &rho, T const &theta, T const &phi1, T const &phi2)
|
||||
{
|
||||
T cos_phi1 = std::cos(phi1);
|
||||
T cos_phi2 = std::cos(phi2);
|
||||
|
||||
T a = std::cos(theta) * cos_phi1 * cos_phi2;
|
||||
T b = std::sin(theta) * cos_phi1 * cos_phi2;
|
||||
T c = std::sin(phi1) * cos_phi2;
|
||||
T d = std::sin(phi2);
|
||||
|
||||
quaternion_type result(a, b, c, d);
|
||||
result *= rho;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
using quaternion = quaternion_type<float>;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
// point, a location with x, y and z coordinates as floating point.
|
||||
// This one is derived from a tuple<float,float,float> so
|
||||
// you can do things like:
|
||||
//
|
||||
// float x, y, z;
|
||||
// tie(x, y, z) = atom.loc();
|
||||
|
||||
template <typename F>
|
||||
struct point_type
|
||||
{
|
||||
using value_type = F;
|
||||
|
||||
value_type m_x, m_y, m_z;
|
||||
|
||||
constexpr point_type()
|
||||
: m_x(0)
|
||||
, m_y(0)
|
||||
, m_z(0)
|
||||
{
|
||||
}
|
||||
|
||||
constexpr point_type(value_type x, value_type y, value_type z)
|
||||
: m_x(x)
|
||||
, m_y(y)
|
||||
, m_z(z)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename PF>
|
||||
constexpr point_type(const point_type<PF> &pt)
|
||||
: m_x(static_cast<F>(pt.m_x))
|
||||
, m_y(static_cast<F>(pt.m_y))
|
||||
, m_z(static_cast<F>(pt.m_z))
|
||||
{
|
||||
}
|
||||
|
||||
constexpr point_type(const std::tuple<value_type, value_type, value_type> &pt)
|
||||
: point_type(std::get<0>(pt), std::get<1>(pt), std::get<2>(pt))
|
||||
{
|
||||
}
|
||||
|
||||
#if HAVE_LIBCLIPPER
|
||||
constexpr point_type(const clipper::Coord_orth &pt)
|
||||
: m_x(pt[0])
|
||||
, m_y(pt[1])
|
||||
, m_z(pt[2])
|
||||
{
|
||||
}
|
||||
|
||||
constexpr point_type &operator=(const clipper::Coord_orth &rhs)
|
||||
{
|
||||
m_x = rhs[0];
|
||||
m_y = rhs[1];
|
||||
m_z = rhs[2];
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename PF>
|
||||
constexpr point_type &operator=(const point_type<PF> &rhs)
|
||||
{
|
||||
m_x = static_cast<F>(rhs.m_x);
|
||||
m_y = static_cast<F>(rhs.m_y);
|
||||
m_z = static_cast<F>(rhs.m_z);
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr value_type &get_x() { return m_x; }
|
||||
constexpr value_type get_x() const { return m_x; }
|
||||
constexpr void set_x(value_type x) { m_x = x; }
|
||||
|
||||
constexpr value_type &get_y() { return m_y; }
|
||||
constexpr value_type get_y() const { return m_y; }
|
||||
constexpr void set_y(value_type y) { m_y = y; }
|
||||
|
||||
constexpr value_type &get_z() { return m_z; }
|
||||
constexpr value_type get_z() const { return m_z; }
|
||||
constexpr void set_z(value_type z) { m_z = z; }
|
||||
|
||||
constexpr point_type &operator+=(const point_type &rhs)
|
||||
{
|
||||
m_x += rhs.m_x;
|
||||
m_y += rhs.m_y;
|
||||
m_z += rhs.m_z;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr point_type &operator+=(value_type d)
|
||||
{
|
||||
m_x += d;
|
||||
m_y += d;
|
||||
m_z += d;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr point_type &operator-=(const point_type &rhs)
|
||||
{
|
||||
m_x -= rhs.m_x;
|
||||
m_y -= rhs.m_y;
|
||||
m_z -= rhs.m_z;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr point_type &operator-=(value_type d)
|
||||
{
|
||||
m_x -= d;
|
||||
m_y -= d;
|
||||
m_z -= d;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr point_type &operator*=(value_type rhs)
|
||||
{
|
||||
m_x *= rhs;
|
||||
m_y *= rhs;
|
||||
m_z *= rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr point_type &operator/=(value_type rhs)
|
||||
{
|
||||
m_x /= rhs;
|
||||
m_y /= rhs;
|
||||
m_z /= rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr value_type normalize()
|
||||
{
|
||||
auto length = m_x * m_x + m_y * m_y + m_z * m_z;
|
||||
if (length > 0)
|
||||
{
|
||||
length = std::sqrt(length);
|
||||
operator/=(length);
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
constexpr void rotate(const quaternion &q)
|
||||
{
|
||||
quaternion_type<value_type> p(0, m_x, m_y, m_z);
|
||||
|
||||
p = q * p * conj(q);
|
||||
|
||||
m_x = p.get_b();
|
||||
m_y = p.get_c();
|
||||
m_z = p.get_d();
|
||||
}
|
||||
|
||||
#if HAVE_LIBCLIPPER
|
||||
operator clipper::Coord_orth() const
|
||||
{
|
||||
return clipper::Coord_orth(m_x, m_y, m_z);
|
||||
}
|
||||
#endif
|
||||
|
||||
constexpr operator std::tuple<const value_type &, const value_type &, const value_type &>() const
|
||||
{
|
||||
return std::make_tuple(std::ref(m_x), std::ref(m_y), std::ref(m_z));
|
||||
}
|
||||
|
||||
constexpr operator std::tuple<value_type &, value_type &, value_type &>()
|
||||
{
|
||||
return std::make_tuple(std::ref(m_x), std::ref(m_y), std::ref(m_z));
|
||||
}
|
||||
|
||||
constexpr bool operator==(const point_type &rhs) const
|
||||
{
|
||||
return m_x == rhs.m_x and m_y == rhs.m_y and m_z == rhs.m_z;
|
||||
}
|
||||
|
||||
// consider point as a vector... perhaps I should rename point?
|
||||
constexpr value_type length_sq() const
|
||||
{
|
||||
return m_x * m_x + m_y * m_y + m_z * m_z;
|
||||
}
|
||||
|
||||
constexpr value_type length() const
|
||||
{
|
||||
return std::sqrt(m_x * m_x + m_y * m_y + m_z * m_z);
|
||||
}
|
||||
};
|
||||
|
||||
using point = point_type<float>;
|
||||
|
||||
template <typename F>
|
||||
inline constexpr std::ostream &operator<<(std::ostream &os, const point_type<F> &pt)
|
||||
{
|
||||
os << '(' << pt.m_x << ',' << pt.m_y << ',' << pt.m_z << ')';
|
||||
return os;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline constexpr point_type<F> operator+(const point_type<F> &lhs, const point_type<F> &rhs)
|
||||
{
|
||||
return point_type<F>(lhs.m_x + rhs.m_x, lhs.m_y + rhs.m_y, lhs.m_z + rhs.m_z);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline constexpr point_type<F> operator-(const point_type<F> &lhs, const point_type<F> &rhs)
|
||||
{
|
||||
return point_type<F>(lhs.m_x - rhs.m_x, lhs.m_y - rhs.m_y, lhs.m_z - rhs.m_z);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline constexpr point_type<F> operator-(const point_type<F> &pt)
|
||||
{
|
||||
return point_type<F>(-pt.m_x, -pt.m_y, -pt.m_z);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline constexpr point_type<F> operator*(const point_type<F> &pt, F f)
|
||||
{
|
||||
return point_type<F>(pt.m_x * f, pt.m_y * f, pt.m_z * f);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline constexpr point_type<F> operator*(F f, const point_type<F> &pt)
|
||||
{
|
||||
return point_type<F>(pt.m_x * f, pt.m_y * f, pt.m_z * f);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline constexpr point_type<F> operator/(const point_type<F> &pt, F f)
|
||||
{
|
||||
return point_type<F>(pt.m_x / f, pt.m_y / f, pt.m_z / f);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// several standard 3d operations
|
||||
|
||||
template <typename F>
|
||||
inline constexpr auto distance_squared(const point_type<F> &a, const point_type<F> &b)
|
||||
{
|
||||
return (a.m_x - b.m_x) * (a.m_x - b.m_x) +
|
||||
(a.m_y - b.m_y) * (a.m_y - b.m_y) +
|
||||
(a.m_z - b.m_z) * (a.m_z - b.m_z);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline constexpr auto distance(const point_type<F> &a, const point_type<F> &b)
|
||||
{
|
||||
return std::sqrt(
|
||||
(a.m_x - b.m_x) * (a.m_x - b.m_x) +
|
||||
(a.m_y - b.m_y) * (a.m_y - b.m_y) +
|
||||
(a.m_z - b.m_z) * (a.m_z - b.m_z));
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline constexpr auto dot_product(const point_type<F> &a, const point_type<F> &b)
|
||||
{
|
||||
return a.m_x * b.m_x + a.m_y * b.m_y + a.m_z * b.m_z;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline constexpr point_type<F> cross_product(const point_type<F> &a, const point_type<F> &b)
|
||||
{
|
||||
return point_type<F>(a.m_y * b.m_z - b.m_y * a.m_z,
|
||||
a.m_z * b.m_x - b.m_z * a.m_x,
|
||||
a.m_x * b.m_y - b.m_x * a.m_y);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
constexpr auto angle(const point_type<F> &p1, const point_type<F> &p2, const point_type<F> &p3)
|
||||
{
|
||||
point_type<F> v1 = p1 - p2;
|
||||
point_type<F> v2 = p3 - p2;
|
||||
|
||||
return std::acos(dot_product(v1, v2) / (v1.length() * v2.length())) * 180 / kPI;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
constexpr auto dihedral_angle(const point_type<F> &p1, const point_type<F> &p2, const point_type<F> &p3, const point_type<F> &p4)
|
||||
{
|
||||
point_type<F> v12 = p1 - p2; // vector from p2 to p1
|
||||
point_type<F> v43 = p4 - p3; // vector from p3 to p4
|
||||
|
||||
point_type<F> z = p2 - p3; // vector from p3 to p2
|
||||
|
||||
point_type<F> p = cross_product(z, v12);
|
||||
point_type<F> x = cross_product(z, v43);
|
||||
point_type<F> y = cross_product(z, x);
|
||||
|
||||
auto u = dot_product(x, x);
|
||||
auto v = dot_product(y, y);
|
||||
|
||||
F result = 360;
|
||||
if (u > 0 and v > 0)
|
||||
{
|
||||
u = dot_product(p, x) / std::sqrt(u);
|
||||
v = dot_product(p, y) / std::sqrt(v);
|
||||
if (u != 0 or v != 0)
|
||||
result = std::atan2(v, u) * static_cast<F>(180 / kPI);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
constexpr auto cosinus_angle(const point_type<F> &p1, const point_type<F> &p2, const point_type<F> &p3, const point_type<F> &p4)
|
||||
{
|
||||
point_type<F> v12 = p1 - p2;
|
||||
point_type<F> v34 = p3 - p4;
|
||||
|
||||
auto x = dot_product(v12, v12) * dot_product(v34, v34);
|
||||
|
||||
return x > 0 ? dot_product(v12, v34) / std::sqrt(x) : 0;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
constexpr auto distance_point_to_line(const point_type<F> &l1, const point_type<F> &l2, const point_type<F> &p)
|
||||
{
|
||||
auto line = l2 - l1;
|
||||
auto p_to_l1 = p - l1;
|
||||
auto p_to_l2 = p - l2;
|
||||
auto cross = cross_product(p_to_l1, p_to_l2);
|
||||
return cross.length() / line.length();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// For e.g. simulated annealing, returns a new point that is moved in
|
||||
// a random direction with a distance randomly chosen from a normal
|
||||
// distribution with a stddev of offset.
|
||||
|
||||
point nudge(point p, float offset);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
quaternion construct_from_angle_axis(float angle, point axis);
|
||||
std::tuple<double, point> quaternion_to_angle_axis(quaternion q);
|
||||
|
||||
point centroid(const std::vector<point> &Points);
|
||||
point center_points(std::vector<point> &Points);
|
||||
|
||||
/// \brief Returns how the two sets of points \a a and \b b can be aligned
|
||||
///
|
||||
/// \param a The first set of points
|
||||
/// \param b The second set of points
|
||||
/// \result The quaternion which should be applied to the points in \a a to
|
||||
/// obtain the best superposition.
|
||||
quaternion align_points(const std::vector<point> &a, const std::vector<point> &b);
|
||||
|
||||
/// \brief The RMSd for the points in \a a and \a b
|
||||
double RMSd(const std::vector<point> &a, const std::vector<point> &b);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Helper class to generate evenly divided points on a sphere
|
||||
// we use a fibonacci sphere to calculate even distribution of the dots
|
||||
|
||||
template <int N>
|
||||
class spherical_dots
|
||||
{
|
||||
public:
|
||||
|
||||
constexpr static int P = 2 * N * 1;
|
||||
|
||||
using array_type = typename std::array<point, P>;
|
||||
using iterator = typename array_type::const_iterator;
|
||||
|
||||
static spherical_dots &instance()
|
||||
{
|
||||
static spherical_dots sInstance;
|
||||
return sInstance;
|
||||
}
|
||||
|
||||
size_t size() const { return m_points.size(); }
|
||||
const point operator[](uint32_t inIx) const { return m_points[inIx]; }
|
||||
iterator begin() const { return m_points.begin(); }
|
||||
iterator end() const { return m_points.end(); }
|
||||
|
||||
double weight() const { return m_weight; }
|
||||
|
||||
spherical_dots()
|
||||
{
|
||||
const double
|
||||
kGoldenRatio = (1 + std::sqrt(5.0)) / 2;
|
||||
|
||||
m_weight = (4 * kPI) / P;
|
||||
|
||||
auto p = m_points.begin();
|
||||
|
||||
for (int32_t i = -N; i <= N; ++i)
|
||||
{
|
||||
double lat = std::asin((2.0 * i) / P);
|
||||
double lon = std::fmod(i, kGoldenRatio) * 2 * kPI / kGoldenRatio;
|
||||
|
||||
p->m_x = std::sin(lon) * std::cos(lat);
|
||||
p->m_y = std::cos(lon) * std::cos(lat);
|
||||
p->m_z = std::sin(lat);
|
||||
|
||||
++p;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
array_type m_points;
|
||||
double m_weight;
|
||||
};
|
||||
|
||||
} // namespace cif
|
||||
305
include/cif++/row.hpp
Normal file
305
include/cif++/row.hpp
Normal file
@@ -0,0 +1,305 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++/item.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
// some helper classes to help create tuple result types
|
||||
template <typename... C>
|
||||
struct get_row_result
|
||||
{
|
||||
static constexpr size_t N = sizeof...(C);
|
||||
|
||||
get_row_result(const row_handle &r, std::array<size_t, N> &&columns)
|
||||
: m_row(r)
|
||||
, m_columns(std::move(columns))
|
||||
{
|
||||
}
|
||||
|
||||
const item_handle operator[](size_t ix) const
|
||||
{
|
||||
return m_row[m_columns[ix]];
|
||||
}
|
||||
|
||||
template <typename... Ts, std::enable_if_t<N == sizeof...(Ts), int> = 0>
|
||||
operator std::tuple<Ts...>() const
|
||||
{
|
||||
return get<Ts...>(std::index_sequence_for<Ts...>{});
|
||||
}
|
||||
|
||||
template <typename... Ts, std::size_t... Is>
|
||||
std::tuple<Ts...> get(std::index_sequence<Is...>) const
|
||||
{
|
||||
return std::tuple<Ts...>{ m_row[m_columns[Is]].template as<Ts>()... };
|
||||
}
|
||||
|
||||
const row_handle &m_row;
|
||||
std::array<size_t, N> m_columns;
|
||||
};
|
||||
|
||||
// we want to be able to tie some variables to a get_row_result, for this we use tiewraps
|
||||
template <typename... Ts>
|
||||
struct tie_wrap
|
||||
{
|
||||
tie_wrap(Ts... args)
|
||||
: m_value(args...)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename RR>
|
||||
void operator=(const RR &&rr)
|
||||
{
|
||||
// get_row_result will do the conversion, but only if the types
|
||||
// are compatible. That means the number of parameters to the get()
|
||||
// of the row should be equal to the number of items in the tuple
|
||||
// you are trying to tie.
|
||||
|
||||
using RType = std::tuple<typename std::remove_reference<Ts>::type...>;
|
||||
|
||||
m_value = static_cast<RType>(rr);
|
||||
}
|
||||
|
||||
std::tuple<Ts...> m_value;
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
template <typename... Ts>
|
||||
auto tie(Ts &...v)
|
||||
{
|
||||
return detail::tie_wrap<Ts &...>(std::forward<Ts &>(v)...);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
/// \brief the row class, this one is not directly accessible from the outside
|
||||
|
||||
class row : public std::vector<item_value>
|
||||
{
|
||||
public:
|
||||
row() = default;
|
||||
|
||||
item_value* get(size_t ix)
|
||||
{
|
||||
return ix < size() ? &at(ix) : nullptr;
|
||||
}
|
||||
|
||||
const item_value* get(size_t ix) const
|
||||
{
|
||||
return ix < size() ? &at(ix) : nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
friend class category;
|
||||
friend class category_index;
|
||||
|
||||
template <typename, typename...>
|
||||
friend class iterator_impl;
|
||||
|
||||
void append(size_t ix, item_value &&iv)
|
||||
{
|
||||
if (ix >= size())
|
||||
resize(ix + 1);
|
||||
|
||||
at(ix) = std::move(iv);
|
||||
}
|
||||
|
||||
void remove(size_t ix)
|
||||
{
|
||||
if (ix < size())
|
||||
at(ix) = item_value{};
|
||||
}
|
||||
|
||||
row *m_next = nullptr;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
/// \brief row_handle is the way to access data stored in rows
|
||||
|
||||
class row_handle
|
||||
{
|
||||
public:
|
||||
friend class item_handle;
|
||||
friend class category;
|
||||
friend class category_index;
|
||||
friend class row_initializer;
|
||||
|
||||
row_handle() = default;
|
||||
|
||||
row_handle(const row_handle &) = default;
|
||||
row_handle(row_handle &&) = default;
|
||||
|
||||
row_handle &operator=(const row_handle &) = default;
|
||||
row_handle &operator=(row_handle &&) = default;
|
||||
|
||||
row_handle(const category &cat, const row &r)
|
||||
: m_category(const_cast<category *>(&cat))
|
||||
, m_row(const_cast<row *>(&r))
|
||||
{
|
||||
}
|
||||
|
||||
const category &get_category() const
|
||||
{
|
||||
return *m_category;
|
||||
}
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
return m_category == nullptr or m_row == nullptr;
|
||||
}
|
||||
|
||||
explicit operator bool() const
|
||||
{
|
||||
return not empty();
|
||||
}
|
||||
|
||||
item_handle operator[](uint32_t column_ix)
|
||||
{
|
||||
return empty() ? item_handle::s_null_item : item_handle(column_ix, *this);
|
||||
}
|
||||
|
||||
const item_handle operator[](uint32_t column_ix) const
|
||||
{
|
||||
return empty() ? item_handle::s_null_item : item_handle(column_ix, const_cast<row_handle &>(*this));
|
||||
}
|
||||
|
||||
item_handle operator[](std::string_view column_name)
|
||||
{
|
||||
return empty() ? item_handle::s_null_item : item_handle(add_column(column_name), *this);
|
||||
}
|
||||
|
||||
const item_handle operator[](std::string_view column_name) const
|
||||
{
|
||||
return empty() ? item_handle::s_null_item : item_handle(get_column_ix(column_name), const_cast<row_handle &>(*this));
|
||||
}
|
||||
|
||||
template <typename... C>
|
||||
auto get(C... columns) const
|
||||
{
|
||||
return detail::get_row_result<C...>(*this, { get_column_ix(columns)... });
|
||||
}
|
||||
|
||||
template <typename... Ts, typename... C, std::enable_if_t<sizeof...(Ts) == sizeof...(C), int> = 0>
|
||||
std::tuple<Ts...> get(C... columns) const
|
||||
{
|
||||
return detail::get_row_result<Ts...>(*this, { get_column_ix(columns)... });
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T get(const char *column)
|
||||
{
|
||||
return operator[](get_column_ix(column)).template as<T>();
|
||||
}
|
||||
|
||||
void assign(const std::vector<item> &values)
|
||||
{
|
||||
for (auto &value : values)
|
||||
assign(value, true);
|
||||
}
|
||||
|
||||
void assign(std::string_view name, std::string_view value, bool updateLinked, bool validate = true)
|
||||
{
|
||||
assign(add_column(name), value, updateLinked, validate);
|
||||
}
|
||||
|
||||
void assign(size_t column, std::string_view value, bool updateLinked, bool validate = true);
|
||||
|
||||
bool operator==(const row_handle &rhs) const { return m_category == rhs.m_category and m_row == rhs.m_row; }
|
||||
bool operator!=(const row_handle &rhs) const { return m_category != rhs.m_category or m_row != rhs.m_row; }
|
||||
|
||||
private:
|
||||
uint16_t get_column_ix(std::string_view name) const;
|
||||
std::string_view get_column_name(uint16_t ix) const;
|
||||
|
||||
uint16_t add_column(std::string_view name);
|
||||
|
||||
row *get_row()
|
||||
{
|
||||
return m_row;
|
||||
}
|
||||
|
||||
const row *get_row() const
|
||||
{
|
||||
return m_row;
|
||||
}
|
||||
|
||||
void assign(const item &i, bool updateLinked)
|
||||
{
|
||||
assign(i.name(), i.value(), updateLinked);
|
||||
}
|
||||
|
||||
void swap(size_t column, row_handle &r);
|
||||
|
||||
category *m_category = nullptr;
|
||||
row *m_row = nullptr;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class row_initializer : public std::vector<item>
|
||||
{
|
||||
public:
|
||||
friend class category;
|
||||
|
||||
row_initializer() = default;
|
||||
row_initializer(const row_initializer &) = default;
|
||||
row_initializer(row_initializer &&) = default;
|
||||
row_initializer &operator=(const row_initializer &) = default;
|
||||
row_initializer &operator=(row_initializer &&) = default;
|
||||
|
||||
row_initializer(std::initializer_list<item> items)
|
||||
: std::vector<item>(items)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename ItemIter, std::enable_if_t<std::is_same_v<typename ItemIter::value_type, item>, int> = 0>
|
||||
row_initializer(ItemIter b, ItemIter e)
|
||||
: std::vector<item>(b, e)
|
||||
{
|
||||
}
|
||||
|
||||
row_initializer(row_handle rh);
|
||||
|
||||
void set_value(std::string_view name, std::string_view value);
|
||||
void set_value(const item &i)
|
||||
{
|
||||
set_value(i.name(), i.value());
|
||||
}
|
||||
|
||||
void set_value_if_empty(std::string_view name, std::string_view value);
|
||||
void set_value_if_empty(const item &i)
|
||||
{
|
||||
set_value_if_empty(i.name(), i.value());
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace cif
|
||||
@@ -1,17 +1,17 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
@@ -26,57 +26,62 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#include "CifUtils.hpp"
|
||||
|
||||
namespace mmcif
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
struct Spacegroup
|
||||
enum class space_group_name
|
||||
{
|
||||
const char* name;
|
||||
const char* xHM;
|
||||
const char* Hall;
|
||||
full,
|
||||
xHM,
|
||||
Hall
|
||||
};
|
||||
|
||||
struct space_group
|
||||
{
|
||||
const char *name;
|
||||
const char *xHM;
|
||||
const char *Hall;
|
||||
int nr;
|
||||
};
|
||||
|
||||
CIFPP_EXPORT extern const Spacegroup kSpaceGroups[];
|
||||
CIFPP_EXPORT extern const std::size_t kNrOfSpaceGroups;
|
||||
extern const space_group kSpaceGroups[];
|
||||
extern const std::size_t kNrOfSpaceGroups;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
struct SymopData
|
||||
struct symop_data
|
||||
{
|
||||
constexpr SymopData(const std::array<int,15>& data)
|
||||
: m_packed((data[ 0] & 0x03ULL) << 34 bitor
|
||||
(data[ 1] & 0x03ULL) << 32 bitor
|
||||
(data[ 2] & 0x03ULL) << 30 bitor
|
||||
(data[ 3] & 0x03ULL) << 28 bitor
|
||||
(data[ 4] & 0x03ULL) << 26 bitor
|
||||
(data[ 5] & 0x03ULL) << 24 bitor
|
||||
(data[ 6] & 0x03ULL) << 22 bitor
|
||||
(data[ 7] & 0x03ULL) << 20 bitor
|
||||
(data[ 8] & 0x03ULL) << 18 bitor
|
||||
(data[ 9] & 0x07ULL) << 15 bitor
|
||||
constexpr symop_data(const std::array<int, 15> &data)
|
||||
: m_packed((data[0] & 0x03ULL) << 34 bitor
|
||||
(data[1] & 0x03ULL) << 32 bitor
|
||||
(data[2] & 0x03ULL) << 30 bitor
|
||||
(data[3] & 0x03ULL) << 28 bitor
|
||||
(data[4] & 0x03ULL) << 26 bitor
|
||||
(data[5] & 0x03ULL) << 24 bitor
|
||||
(data[6] & 0x03ULL) << 22 bitor
|
||||
(data[7] & 0x03ULL) << 20 bitor
|
||||
(data[8] & 0x03ULL) << 18 bitor
|
||||
(data[9] & 0x07ULL) << 15 bitor
|
||||
(data[10] & 0x07ULL) << 12 bitor
|
||||
(data[11] & 0x07ULL) << 9 bitor
|
||||
(data[12] & 0x07ULL) << 6 bitor
|
||||
(data[13] & 0x07ULL) << 3 bitor
|
||||
(data[14] & 0x07ULL) << 0)
|
||||
(data[11] & 0x07ULL) << 9 bitor
|
||||
(data[12] & 0x07ULL) << 6 bitor
|
||||
(data[13] & 0x07ULL) << 3 bitor
|
||||
(data[14] & 0x07ULL) << 0)
|
||||
{
|
||||
}
|
||||
|
||||
bool operator==(const SymopData& rhs) const
|
||||
bool operator==(const symop_data &rhs) const
|
||||
{
|
||||
return m_packed == rhs.m_packed;
|
||||
}
|
||||
|
||||
std::array<int,15> data() const
|
||||
std::array<int, 15> data() const
|
||||
{
|
||||
return {
|
||||
static_cast<int>(m_packed >> 34) bitand 0x03,
|
||||
@@ -90,49 +95,51 @@ struct SymopData
|
||||
static_cast<int>(m_packed >> 18) bitand 0x03,
|
||||
static_cast<int>(m_packed >> 15) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 12) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 9) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 6) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 3) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 0) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 9) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 6) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 3) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 0) bitand 0x07,
|
||||
};
|
||||
}
|
||||
|
||||
private:
|
||||
friend struct symop_datablock;
|
||||
|
||||
friend struct SymopDataBlock;
|
||||
const uint64_t kPackMask = (~0ULL >> (64 - 36));
|
||||
|
||||
const uint64_t kPackMask = (~0ULL >> (64-36));
|
||||
|
||||
SymopData(uint64_t v)
|
||||
: m_packed(v bitand kPackMask) {}
|
||||
symop_data(uint64_t v)
|
||||
: m_packed(v bitand kPackMask)
|
||||
{
|
||||
}
|
||||
|
||||
uint64_t m_packed;
|
||||
};
|
||||
|
||||
struct SymopDataBlock
|
||||
struct symop_datablock
|
||||
{
|
||||
constexpr SymopDataBlock(int spacegroup, int rotational_number, const std::array<int,15>& rt_data)
|
||||
constexpr symop_datablock(int spacegroup, int rotational_number, const std::array<int, 15> &rt_data)
|
||||
: m_v((spacegroup & 0xffffULL) << 48 bitor
|
||||
(rotational_number & 0xffULL) << 40 bitor
|
||||
SymopData(rt_data).m_packed)
|
||||
symop_data(rt_data).m_packed)
|
||||
{
|
||||
}
|
||||
|
||||
uint16_t spacegroup() const { return m_v >> 48; }
|
||||
SymopData symop() const { return SymopData(m_v); }
|
||||
uint8_t rotational_number() const { return (m_v >> 40) bitand 0xff; }
|
||||
uint16_t spacegroup() const { return m_v >> 48; }
|
||||
symop_data symop() const { return symop_data(m_v); }
|
||||
uint8_t rotational_number() const { return (m_v >> 40) bitand 0xff; }
|
||||
|
||||
private:
|
||||
uint64_t m_v;
|
||||
};
|
||||
|
||||
static_assert(sizeof(SymopDataBlock) == sizeof(uint64_t), "Size of SymopData is wrong");
|
||||
static_assert(sizeof(symop_datablock) == sizeof(uint64_t), "Size of symop_data is wrong");
|
||||
|
||||
CIFPP_EXPORT extern const SymopDataBlock kSymopNrTable[];
|
||||
CIFPP_EXPORT extern const std::size_t kSymopNrTableSize;
|
||||
extern const symop_datablock kSymopNrTable[];
|
||||
extern const std::size_t kSymopNrTableSize;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
int GetSpacegroupNumber(std::string spacegroup); // alternative for clipper's parsing code
|
||||
int get_space_group_number(std::string spacegroup); // alternative for clipper's parsing code, using space_group_name::full
|
||||
int get_space_group_number(std::string spacegroup, space_group_name type); // alternative for clipper's parsing code
|
||||
|
||||
}
|
||||
} // namespace cif
|
||||
458
include/cif++/text.hpp
Normal file
458
include/cif++/text.hpp
Normal file
@@ -0,0 +1,458 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <charconv>
|
||||
#include <cmath>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#if __has_include(<experimental/type_traits>)
|
||||
#include <experimental/type_traits>
|
||||
#endif
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
// some basic utilities: Since we're using ASCII input only, we define for optimisation
|
||||
// our own case conversion routines.
|
||||
|
||||
bool iequals(std::string_view a, std::string_view b);
|
||||
int icompare(std::string_view a, std::string_view b);
|
||||
|
||||
bool iequals(const char *a, const char *b);
|
||||
int icompare(const char *a, const char *b);
|
||||
|
||||
void to_lower(std::string &s);
|
||||
std::string to_lower_copy(std::string_view s);
|
||||
|
||||
void to_upper(std::string &s);
|
||||
// std::string toUpperCopy(const std::string &s);
|
||||
|
||||
template <typename IterType>
|
||||
std::string join(IterType b, IterType e, std::string_view sep)
|
||||
{
|
||||
std::ostringstream s;
|
||||
|
||||
if (b != e)
|
||||
{
|
||||
auto ai = b;
|
||||
auto ni = std::next(ai);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
s << *ai;
|
||||
|
||||
if (ni == e)
|
||||
break;
|
||||
|
||||
ai = ni;
|
||||
ni = std::next(ai);
|
||||
|
||||
s << sep;
|
||||
}
|
||||
}
|
||||
|
||||
return s.str();
|
||||
}
|
||||
|
||||
template <typename V>
|
||||
std::string join(const V &arr, std::string_view sep)
|
||||
{
|
||||
return join(arr.begin(), arr.end(), sep);
|
||||
}
|
||||
|
||||
template <typename StringType = std::string_view>
|
||||
std::vector<StringType> split(std::string_view s, std::string_view separators, bool suppress_empty = false)
|
||||
{
|
||||
std::vector<StringType> result;
|
||||
|
||||
auto b = s.begin();
|
||||
auto e = b;
|
||||
|
||||
while (e != s.end())
|
||||
{
|
||||
if (separators.find(*e) != std::string_view::npos)
|
||||
{
|
||||
if (e > b or not suppress_empty)
|
||||
result.emplace_back(b, e - b);
|
||||
b = e = e + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
++e;
|
||||
}
|
||||
|
||||
if (e > b or not suppress_empty)
|
||||
result.emplace_back(b, e - b);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void replace_all(std::string &s, std::string_view what, std::string_view with = {});
|
||||
|
||||
#if defined(__cpp_lib_starts_ends_with)
|
||||
|
||||
inline bool starts_with(std::string s, std::string_view with)
|
||||
{
|
||||
return s.starts_with(with);
|
||||
}
|
||||
|
||||
inline bool ends_with(std::string_view s, std::string_view with)
|
||||
{
|
||||
return s.ends_with(with);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
inline bool starts_with(std::string s, std::string_view with)
|
||||
{
|
||||
return s.compare(0, with.length(), with) == 0;
|
||||
}
|
||||
|
||||
inline bool ends_with(std::string_view s, std::string_view with)
|
||||
{
|
||||
return s.length() >= with.length() and s.compare(s.length() - with.length(), with.length(), with) == 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__cpp_lib_string_contains)
|
||||
|
||||
inline bool contains(std::string_view s, std::string_view q)
|
||||
{
|
||||
return s.contains(q);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
inline bool contains(std::string_view s, std::string_view q)
|
||||
{
|
||||
return s.find(q) != std::string_view::npos;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
bool icontains(std::string_view s, std::string_view q);
|
||||
|
||||
void trim_left(std::string &s);
|
||||
void trim_right(std::string &s);
|
||||
void trim(std::string &s);
|
||||
|
||||
std::string trim_left_copy(std::string_view s);
|
||||
std::string trim_right_copy(std::string_view s);
|
||||
std::string trim_copy(std::string_view s);
|
||||
|
||||
// To make life easier, we also define iless and iset using iequals
|
||||
|
||||
struct iless
|
||||
{
|
||||
bool operator()(const std::string &a, const std::string &b) const
|
||||
{
|
||||
return icompare(a, b) < 0;
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::set<std::string, iless> iset;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// This really makes a difference, having our own tolower routines
|
||||
|
||||
extern const uint8_t kCharToLowerMap[256];
|
||||
|
||||
inline char tolower(int ch)
|
||||
{
|
||||
return static_cast<char>(kCharToLowerMap[static_cast<uint8_t>(ch)]);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::tuple<std::string, std::string> split_tag_name(std::string_view tag);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// generate a cif name, mainly used to generate asym_id's
|
||||
|
||||
std::string cif_id_for_number(int number);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// custom wordwrapping routine
|
||||
|
||||
std::vector<std::string> word_wrap(const std::string &text, size_t width);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
/// std::from_chars for floating point types.
|
||||
/// These are optional, there's a selected_charconv class below that selects
|
||||
/// the best option to used based on support by the stl library
|
||||
/// I.e. that in case of GNU < 12 (or something) the cif implementation will
|
||||
/// be used, all other cases will use the stl version.
|
||||
|
||||
template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
|
||||
std::from_chars_result from_chars(const char *first, const char *last, FloatType &value)
|
||||
{
|
||||
std::from_chars_result result{ first, {} };
|
||||
|
||||
enum State
|
||||
{
|
||||
IntegerSign,
|
||||
Integer,
|
||||
Fraction,
|
||||
ExponentSign,
|
||||
Exponent
|
||||
} state = IntegerSign;
|
||||
int sign = 1;
|
||||
unsigned long long vi = 0;
|
||||
long double f = 1;
|
||||
int exponent_sign = 1;
|
||||
int exponent = 0;
|
||||
bool done = false;
|
||||
|
||||
while (not done and result.ec == std::errc())
|
||||
{
|
||||
char ch = result.ptr != last ? *result.ptr : 0;
|
||||
++result.ptr;
|
||||
|
||||
switch (state)
|
||||
{
|
||||
case IntegerSign:
|
||||
if (ch == '-')
|
||||
{
|
||||
sign = -1;
|
||||
state = Integer;
|
||||
}
|
||||
else if (ch == '+')
|
||||
state = Integer;
|
||||
else if (ch >= '0' and ch <= '9')
|
||||
{
|
||||
vi = ch - '0';
|
||||
state = Integer;
|
||||
}
|
||||
else if (ch == '.')
|
||||
state = Fraction;
|
||||
else
|
||||
result.ec = std::errc::invalid_argument;
|
||||
break;
|
||||
|
||||
case Integer:
|
||||
if (ch >= '0' and ch <= '9')
|
||||
vi = 10 * vi + (ch - '0');
|
||||
else if (ch == 'e' or ch == 'E')
|
||||
state = ExponentSign;
|
||||
else if (ch == '.')
|
||||
state = Fraction;
|
||||
else
|
||||
{
|
||||
done = true;
|
||||
--result.ptr;
|
||||
}
|
||||
break;
|
||||
|
||||
case Fraction:
|
||||
if (ch >= '0' and ch <= '9')
|
||||
{
|
||||
vi = 10 * vi + (ch - '0');
|
||||
f /= 10;
|
||||
}
|
||||
else if (ch == 'e' or ch == 'E')
|
||||
state = ExponentSign;
|
||||
else
|
||||
{
|
||||
done = true;
|
||||
--result.ptr;
|
||||
}
|
||||
break;
|
||||
|
||||
case ExponentSign:
|
||||
if (ch == '-')
|
||||
{
|
||||
exponent_sign = -1;
|
||||
state = Exponent;
|
||||
}
|
||||
else if (ch == '+')
|
||||
state = Exponent;
|
||||
else if (ch >= '0' and ch <= '9')
|
||||
{
|
||||
exponent = ch - '0';
|
||||
state = Exponent;
|
||||
}
|
||||
else
|
||||
result.ec = std::errc::invalid_argument;
|
||||
break;
|
||||
|
||||
case Exponent:
|
||||
if (ch >= '0' and ch <= '9')
|
||||
exponent = 10 * exponent + (ch - '0');
|
||||
else
|
||||
{
|
||||
done = true;
|
||||
--result.ptr;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (result.ec == std::errc())
|
||||
{
|
||||
long double v = f * vi * sign;
|
||||
if (exponent != 0)
|
||||
v *= std::pow(10, exponent * exponent_sign);
|
||||
|
||||
if (std::isnan(v))
|
||||
result.ec = std::errc::invalid_argument;
|
||||
else if (std::abs(v) > std::numeric_limits<FloatType>::max())
|
||||
result.ec = std::errc::result_out_of_range;
|
||||
|
||||
value = static_cast<FloatType>(v);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
enum class chars_format
|
||||
{
|
||||
scientific = 1,
|
||||
fixed = 2,
|
||||
// hex,
|
||||
general = fixed | scientific
|
||||
};
|
||||
|
||||
template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
|
||||
std::to_chars_result to_chars(char *first, char *last, FloatType &value, chars_format fmt)
|
||||
{
|
||||
int size = last - first;
|
||||
int r;
|
||||
|
||||
switch (fmt)
|
||||
{
|
||||
case chars_format::scientific:
|
||||
if constexpr (std::is_same_v<FloatType, long double>)
|
||||
r = snprintf(first, last - first, "%le", value);
|
||||
else
|
||||
r = snprintf(first, last - first, "%e", value);
|
||||
break;
|
||||
|
||||
case chars_format::fixed:
|
||||
if constexpr (std::is_same_v<FloatType, long double>)
|
||||
r = snprintf(first, last - first, "%lf", value);
|
||||
else
|
||||
r = snprintf(first, last - first, "%f", value);
|
||||
break;
|
||||
|
||||
case chars_format::general:
|
||||
if constexpr (std::is_same_v<FloatType, long double>)
|
||||
r = snprintf(first, last - first, "%lg", value);
|
||||
else
|
||||
r = snprintf(first, last - first, "%g", value);
|
||||
break;
|
||||
}
|
||||
|
||||
std::to_chars_result result;
|
||||
if (r < 0 or r >= size)
|
||||
result = { first, std::errc::value_too_large };
|
||||
else
|
||||
result = { first + r, std::errc() };
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
|
||||
std::to_chars_result to_chars(char *first, char *last, FloatType &value, chars_format fmt, int precision)
|
||||
{
|
||||
int size = last - first;
|
||||
int r;
|
||||
|
||||
switch (fmt)
|
||||
{
|
||||
case chars_format::scientific:
|
||||
if constexpr (std::is_same_v<FloatType, long double>)
|
||||
r = snprintf(first, last - first, "%.*le", precision, value);
|
||||
else
|
||||
r = snprintf(first, last - first, "%.*e", precision, value);
|
||||
break;
|
||||
|
||||
case chars_format::fixed:
|
||||
if constexpr (std::is_same_v<FloatType, long double>)
|
||||
r = snprintf(first, last - first, "%.*lf", precision, value);
|
||||
else
|
||||
r = snprintf(first, last - first, "%.*f", precision, value);
|
||||
break;
|
||||
|
||||
case chars_format::general:
|
||||
if constexpr (std::is_same_v<FloatType, long double>)
|
||||
r = snprintf(first, last - first, "%.*lg", precision, value);
|
||||
else
|
||||
r = snprintf(first, last - first, "%.*g", precision, value);
|
||||
break;
|
||||
}
|
||||
|
||||
std::to_chars_result result;
|
||||
if (r < 0 or r >= size)
|
||||
result = { first, std::errc::value_too_large };
|
||||
else
|
||||
result = { first + r, std::errc() };
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct my_charconv
|
||||
{
|
||||
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
|
||||
{
|
||||
return cif::from_chars(a, b, d);
|
||||
}
|
||||
|
||||
static std::to_chars_result to_chars(char *first, char *last, T &value, chars_format fmt)
|
||||
{
|
||||
return cif::to_chars(first, last, value, fmt);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct std_charconv
|
||||
{
|
||||
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
|
||||
{
|
||||
return std::from_chars(a, b, d);
|
||||
}
|
||||
|
||||
static std::to_chars_result to_chars(char *first, char *last, T &value, chars_format fmt)
|
||||
{
|
||||
return std::to_chars(first, last, value, fmt);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using from_chars_function = decltype(std::from_chars(std::declval<const char *>(), std::declval<const char *>(), std::declval<T &>()));
|
||||
|
||||
template <typename T>
|
||||
using selected_charconv = typename std::conditional_t<std::experimental::is_detected_v<from_chars_function, T>, std_charconv<T>, my_charconv<T>>;
|
||||
|
||||
} // namespace cif
|
||||
@@ -26,13 +26,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#ifndef STDOUT_FILENO
|
||||
#define STDOUT_FILENO 1
|
||||
@@ -45,8 +39,6 @@
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "cif++/Cif++Export.hpp"
|
||||
|
||||
#if _MSC_VER
|
||||
#pragma warning(disable : 4996) // unsafe function or variable (strcpy e.g.)
|
||||
#pragma warning(disable : 4068) // unknown pragma
|
||||
@@ -58,60 +50,12 @@
|
||||
namespace cif
|
||||
{
|
||||
|
||||
extern int VERBOSE;
|
||||
|
||||
// the git 'build' number
|
||||
std::string get_version_nr();
|
||||
// std::string get_version_date();
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
// some basic utilities: Since we're using ASCII input only, we define for optimisation
|
||||
// our own case conversion routines.
|
||||
|
||||
bool iequals(const std::string &a, const std::string &b);
|
||||
int icompare(const std::string &a, const std::string &b);
|
||||
|
||||
bool iequals(const char *a, const char *b);
|
||||
int icompare(const char *a, const char *b);
|
||||
|
||||
void toLower(std::string &s);
|
||||
std::string toLowerCopy(const std::string &s);
|
||||
|
||||
// To make life easier, we also define iless and iset using iequals
|
||||
|
||||
struct iless
|
||||
{
|
||||
bool operator()(const std::string &a, const std::string &b) const
|
||||
{
|
||||
return icompare(a, b) < 0;
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::set<std::string, iless> iset;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// This really makes a difference, having our own tolower routines
|
||||
|
||||
extern const uint8_t kCharToLowerMap[256];
|
||||
|
||||
inline char tolower(int ch)
|
||||
{
|
||||
return static_cast<char>(kCharToLowerMap[static_cast<uint8_t>(ch)]);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::tuple<std::string, std::string> splitTagName(const std::string &tag);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// generate a cif name, mainly used to generate asym_id's
|
||||
|
||||
std::string cifIdForNumber(int number);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// custom wordwrapping routine
|
||||
|
||||
std::vector<std::string> wordWrap(const std::string &text, size_t width);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Code helping with terminal i/o
|
||||
|
||||
@@ -226,14 +170,14 @@ class Progress
|
||||
Progress(const Progress &) = delete;
|
||||
Progress &operator=(const Progress &) = delete;
|
||||
|
||||
struct ProgressImpl *mImpl;
|
||||
struct ProgressImpl *m_impl;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Resources
|
||||
|
||||
std::unique_ptr<std::istream> loadResource(std::filesystem::path name);
|
||||
void addFileResource(const std::string &name, std::filesystem::path dataFile);
|
||||
void addDataDirectory(std::filesystem::path dataDir);
|
||||
std::unique_ptr<std::istream> load_resource(std::filesystem::path name);
|
||||
void add_file_resource(const std::string &name, std::filesystem::path dataFile);
|
||||
void add_data_directory(std::filesystem::path dataDir);
|
||||
|
||||
} // namespace cif
|
||||
242
include/cif++/validate.hpp
Normal file
242
include/cif++/validate.hpp
Normal file
@@ -0,0 +1,242 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
#include <list>
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
|
||||
#include <cif++/text.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
struct category_validator;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class validation_error : public std::exception
|
||||
{
|
||||
public:
|
||||
validation_error(const std::string &msg);
|
||||
validation_error(const std::string &cat, const std::string &item,
|
||||
const std::string &msg);
|
||||
const char *what() const noexcept { return m_msg.c_str(); }
|
||||
std::string m_msg;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
enum class DDL_PrimitiveType
|
||||
{
|
||||
Char,
|
||||
UChar,
|
||||
Numb
|
||||
};
|
||||
|
||||
DDL_PrimitiveType map_to_primitive_type(std::string_view s);
|
||||
|
||||
struct regex_impl;
|
||||
|
||||
struct type_validator
|
||||
{
|
||||
std::string m_name;
|
||||
DDL_PrimitiveType m_primitive_type;
|
||||
regex_impl *m_rx;
|
||||
|
||||
type_validator() = delete;
|
||||
type_validator(std::string_view name, DDL_PrimitiveType type, std::string_view rx);
|
||||
|
||||
type_validator(const type_validator &) = delete;
|
||||
type_validator(type_validator &&rhs)
|
||||
: m_name(std::move(rhs.m_name))
|
||||
, m_primitive_type(rhs.m_primitive_type)
|
||||
{
|
||||
m_rx = std::exchange(rhs.m_rx, nullptr);
|
||||
}
|
||||
|
||||
type_validator &operator=(const type_validator &) = delete;
|
||||
type_validator &operator=(type_validator &&rhs)
|
||||
{
|
||||
m_name = std::move(rhs.m_name);
|
||||
m_primitive_type = rhs.m_primitive_type;
|
||||
m_rx = std::exchange(rhs.m_rx, nullptr);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
~type_validator();
|
||||
|
||||
bool operator<(const type_validator &rhs) const
|
||||
{
|
||||
return icompare(m_name, rhs.m_name) < 0;
|
||||
}
|
||||
|
||||
int compare(std::string_view a, std::string_view b) const;
|
||||
};
|
||||
|
||||
struct item_validator
|
||||
{
|
||||
std::string m_tag;
|
||||
bool m_mandatory;
|
||||
const type_validator *m_type;
|
||||
cif::iset m_enums;
|
||||
std::string m_default;
|
||||
bool m_default_is_null;
|
||||
category_validator *m_category = nullptr;
|
||||
|
||||
// ItemLinked is used for non-key links
|
||||
struct item_link
|
||||
{
|
||||
item_validator *m_parent;
|
||||
std::string m_parent_item;
|
||||
std::string m_child_item;
|
||||
};
|
||||
|
||||
std::vector<item_link> mLinked;
|
||||
|
||||
bool operator<(const item_validator &rhs) const
|
||||
{
|
||||
return icompare(m_tag, rhs.m_tag) < 0;
|
||||
}
|
||||
|
||||
bool operator==(const item_validator &rhs) const
|
||||
{
|
||||
return iequals(m_tag, rhs.m_tag);
|
||||
}
|
||||
|
||||
void operator()(std::string_view value) const;
|
||||
};
|
||||
|
||||
struct category_validator
|
||||
{
|
||||
std::string m_name;
|
||||
std::vector<std::string> m_keys;
|
||||
cif::iset m_groups;
|
||||
cif::iset m_mandatory_fields;
|
||||
std::set<item_validator> m_item_validators;
|
||||
|
||||
bool operator<(const category_validator &rhs) const
|
||||
{
|
||||
return icompare(m_name, rhs.m_name) < 0;
|
||||
}
|
||||
|
||||
void addItemValidator(item_validator &&v);
|
||||
|
||||
const item_validator *get_validator_for_item(std::string_view tag) const;
|
||||
|
||||
const std::set<item_validator> &item_validators() const
|
||||
{
|
||||
return m_item_validators;
|
||||
}
|
||||
};
|
||||
|
||||
struct link_validator
|
||||
{
|
||||
int m_link_group_id;
|
||||
std::string m_parent_category;
|
||||
std::vector<std::string> m_parent_keys;
|
||||
std::string m_child_category;
|
||||
std::vector<std::string> m_child_keys;
|
||||
std::string m_link_group_label;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class validator
|
||||
{
|
||||
public:
|
||||
validator(std::string_view name)
|
||||
: m_name(name)
|
||||
{
|
||||
}
|
||||
|
||||
~validator() = default;
|
||||
|
||||
validator(const validator &rhs) = delete;
|
||||
validator &operator=(const validator &rhs) = delete;
|
||||
|
||||
validator(validator &&rhs) = default;
|
||||
validator &operator=(validator &&rhs) = default;
|
||||
|
||||
friend class dictionary_parser;
|
||||
|
||||
void add_type_validator(type_validator &&v);
|
||||
const type_validator *get_validator_for_type(std::string_view type_code) const;
|
||||
|
||||
void add_category_validator(category_validator &&v);
|
||||
const category_validator *get_validator_for_category(std::string_view category) const;
|
||||
|
||||
void add_link_validator(link_validator &&v);
|
||||
std::vector<const link_validator *> get_links_for_parent(std::string_view category) const;
|
||||
std::vector<const link_validator *> get_links_for_child(std::string_view category) const;
|
||||
|
||||
void report_error(const std::string &msg, bool fatal) const;
|
||||
|
||||
const std::string &name() const { return m_name; }
|
||||
void set_name(const std::string &name) { m_name = name; }
|
||||
|
||||
const std::string &version() const { return m_version; }
|
||||
void version(const std::string &version) { m_version = version; }
|
||||
|
||||
private:
|
||||
// name is fully qualified here:
|
||||
item_validator *get_validator_for_item(std::string_view name) const;
|
||||
|
||||
std::string m_name;
|
||||
std::string m_version;
|
||||
bool m_strict = false;
|
||||
std::set<type_validator> m_type_validators;
|
||||
std::set<category_validator> m_category_validators;
|
||||
std::vector<link_validator> m_link_validators;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
class validator_factory
|
||||
{
|
||||
public:
|
||||
static validator_factory &instance()
|
||||
{
|
||||
static validator_factory s_instance;
|
||||
return s_instance;
|
||||
}
|
||||
|
||||
const validator &operator[](std::string_view dictionary_name);
|
||||
|
||||
private:
|
||||
void construct_validator(std::string_view name, std::istream &is);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
validator_factory() = default;
|
||||
|
||||
std::mutex m_mutex;
|
||||
std::list<validator> m_validators;
|
||||
};
|
||||
|
||||
} // namespace cif
|
||||
@@ -8,5 +8,6 @@ Name: libcifpp
|
||||
Description: C++ library for the manipulation of mmCIF files.
|
||||
Version: @PACKAGE_VERSION@
|
||||
|
||||
Libs: -L${libdir} -lcifpp -lboost_regex -lboost_iostreams
|
||||
Requires.private: zlib, liblzma
|
||||
Libs: -L${libdir} -lcifpp
|
||||
Cflags: -I${includedir} -pthread
|
||||
|
||||
1
regex
Submodule
1
regex
Submodule
Submodule regex added at e5979ae1af
150867
rsrc/mmcif_ma.dic
Normal file
150867
rsrc/mmcif_ma.dic
Normal file
File diff suppressed because it is too large
Load Diff
1318
src/AtomType.cpp
1318
src/AtomType.cpp
File diff suppressed because it is too large
Load Diff
627
src/BondMap.cpp
627
src/BondMap.cpp
@@ -1,627 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <fstream>
|
||||
#include <algorithm>
|
||||
#include <mutex>
|
||||
|
||||
#include "cif++/Cif++.hpp"
|
||||
#include "cif++/Compound.hpp"
|
||||
#include "cif++/CifUtils.hpp"
|
||||
#include "cif++/BondMap.hpp"
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
union IDType
|
||||
{
|
||||
IDType() : id_n(0){}
|
||||
IDType(const IDType& rhs) : id_n(rhs.id_n) {}
|
||||
IDType(const std::string& s)
|
||||
: IDType()
|
||||
{
|
||||
assert(s.length() <= 4);
|
||||
if (s.length() > 4)
|
||||
throw BondMapException("Atom ID '" + s + "' is too long");
|
||||
std::copy(s.begin(), s.end(), id_s);
|
||||
}
|
||||
|
||||
IDType& operator=(const IDType& rhs)
|
||||
{
|
||||
id_n = rhs.id_n;
|
||||
return *this;
|
||||
}
|
||||
|
||||
IDType& operator=(const std::string& s)
|
||||
{
|
||||
id_n = 0;
|
||||
assert(s.length() <= 4);
|
||||
if (s.length() > 4)
|
||||
throw BondMapException("Atom ID '" + s + "' is too long");
|
||||
std::copy(s.begin(), s.end(), id_s);
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator<(const IDType& rhs) const
|
||||
{
|
||||
return id_n < rhs.id_n;
|
||||
}
|
||||
|
||||
bool operator<=(const IDType& rhs) const
|
||||
{
|
||||
return id_n <= rhs.id_n;
|
||||
}
|
||||
|
||||
bool operator==(const IDType& rhs) const
|
||||
{
|
||||
return id_n == rhs.id_n;
|
||||
}
|
||||
|
||||
bool operator!=(const IDType& rhs) const
|
||||
{
|
||||
return id_n != rhs.id_n;
|
||||
}
|
||||
|
||||
char id_s[4];
|
||||
uint32_t id_n;
|
||||
};
|
||||
|
||||
static_assert(sizeof(IDType) == 4, "atom_id_type should be 4 bytes");
|
||||
}
|
||||
|
||||
// // --------------------------------------------------------------------
|
||||
|
||||
// void createBondInfoFile(const fs::path& components, const fs::path& infofile)
|
||||
// {
|
||||
// std::ofstream outfile(infofile.string() + ".tmp", std::ios::binary);
|
||||
// if (not outfile.is_open())
|
||||
// throw BondMapException("Could not create bond info file " + infofile.string() + ".tmp");
|
||||
|
||||
// cif::File infile(components);
|
||||
|
||||
// std::set<atom_id_type> atomIDs;
|
||||
// std::vector<atom_id_type> compoundIDs;
|
||||
|
||||
// for (auto& db: infile)
|
||||
// {
|
||||
// auto chem_comp_bond = db.get("chem_comp_bond");
|
||||
// if (not chem_comp_bond)
|
||||
// {
|
||||
// if (cif::VERBOSE > 1)
|
||||
// std::cerr << "Missing chem_comp_bond category in data block " << db.getName() << std::endl;
|
||||
// continue;
|
||||
// }
|
||||
|
||||
// for (const auto& [atom_id_1, atom_id_2]: chem_comp_bond->rows<std::string,std::string>({"atom_id_1", "atom_id_2"}))
|
||||
// {
|
||||
// atomIDs.insert(atom_id_1);
|
||||
// atomIDs.insert(atom_id_2);
|
||||
// }
|
||||
|
||||
// compoundIDs.push_back({ db.getName() });
|
||||
// }
|
||||
|
||||
// if (cif::VERBOSE)
|
||||
// std::cout << "Number of unique atom names is " << atomIDs.size() << std::endl
|
||||
// << "Number of unique residue names is " << compoundIDs.size() << std::endl;
|
||||
|
||||
// CompoundBondInfoFileHeader header = {};
|
||||
// header.indexEntries = compoundIDs.size();
|
||||
// header.atomEntries = atomIDs.size();
|
||||
|
||||
// outfile << header;
|
||||
|
||||
// for (auto atomID: atomIDs)
|
||||
// outfile << atomID;
|
||||
|
||||
// auto dataOffset = outfile.tellp();
|
||||
|
||||
// std::vector<CompoundBondInfo> entries;
|
||||
// entries.reserve(compoundIDs.size());
|
||||
|
||||
// std::map<atom_id_type, uint16_t> atomIDMap;
|
||||
// for (auto& atomID: atomIDs)
|
||||
// atomIDMap[atomID] = atomIDMap.size();
|
||||
|
||||
// for (auto& db: infile)
|
||||
// {
|
||||
// auto chem_comp_bond = db.get("chem_comp_bond");
|
||||
// if (not chem_comp_bond)
|
||||
// continue;
|
||||
|
||||
// std::set<uint16_t> bondedAtoms;
|
||||
|
||||
// for (const auto& [atom_id_1, atom_id_2]: chem_comp_bond->rows<std::string,std::string>({"atom_id_1", "atom_id_2"}))
|
||||
// {
|
||||
// bondedAtoms.insert(atomIDMap[atom_id_1]);
|
||||
// bondedAtoms.insert(atomIDMap[atom_id_2]);
|
||||
// }
|
||||
|
||||
// std::map<uint16_t, int32_t> bondedAtomMap;
|
||||
// for (auto id: bondedAtoms)
|
||||
// bondedAtomMap[id] = static_cast<int32_t>(bondedAtomMap.size());
|
||||
|
||||
// CompoundBondInfo info = {
|
||||
// db.getName(),
|
||||
// static_cast<uint32_t>(bondedAtomMap.size()),
|
||||
// outfile.tellp() - dataOffset
|
||||
// };
|
||||
|
||||
// entries.push_back(info);
|
||||
|
||||
// // An now first write the array of atom ID's in this compound
|
||||
// for (uint16_t id: bondedAtoms)
|
||||
// write(outfile, id);
|
||||
|
||||
// // And then the symmetric matrix with bonds
|
||||
// size_t N = bondedAtoms.size();
|
||||
// size_t M = (N * (N - 1)) / 2;
|
||||
|
||||
// size_t K = M / 8;
|
||||
// if (M % 8)
|
||||
// K += 1;
|
||||
|
||||
// std::vector<uint8_t> m(K);
|
||||
|
||||
// for (const auto& [atom_id_1, atom_id_2]: chem_comp_bond->rows<std::string,std::string>({"atom_id_1", "atom_id_2"}))
|
||||
// {
|
||||
// auto a = bondedAtomMap[atomIDMap[atom_id_1]];
|
||||
// auto b = bondedAtomMap[atomIDMap[atom_id_2]];
|
||||
|
||||
// assert(a != b);
|
||||
// assert((int)b < (int)N);
|
||||
|
||||
// if (a > b)
|
||||
// std::swap(a, b);
|
||||
|
||||
// size_t ix = ((b - 1) * b) / 2 + a;
|
||||
// assert(ix < M);
|
||||
|
||||
// auto Bix = ix / 8;
|
||||
// auto bix = ix % 8;
|
||||
|
||||
// m[Bix] |= 1 << bix;
|
||||
// }
|
||||
|
||||
// outfile.write(reinterpret_cast<char*>(m.data()), m.size());
|
||||
// }
|
||||
|
||||
// header.dataSize = outfile.tellp() - dataOffset;
|
||||
|
||||
// std::sort(entries.begin(), entries.end(), [](CompoundBondInfo& a, CompoundBondInfo& b)
|
||||
// {
|
||||
// return a.id < b.id;
|
||||
// });
|
||||
|
||||
// for (auto& info: entries)
|
||||
// outfile << info;
|
||||
|
||||
// outfile.seekp(0);
|
||||
// outfile << header;
|
||||
|
||||
// // compress
|
||||
// outfile.close();
|
||||
|
||||
// std::ifstream in(infofile.string() + ".tmp", std::ios::binary);
|
||||
// std::ofstream out(infofile, std::ios::binary);
|
||||
|
||||
// {
|
||||
// io::filtering_stream<io::output> os;
|
||||
// os.push(io::gzip_compressor());
|
||||
// os.push(out);
|
||||
// io::copy(in, os);
|
||||
// }
|
||||
|
||||
// in.close();
|
||||
// out.close();
|
||||
|
||||
// fs::remove(infofile.string() + ".tmp");
|
||||
// }
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
struct CompoundBondInfo
|
||||
{
|
||||
IDType mID;
|
||||
std::set<std::tuple<uint32_t,uint32_t>> mBonded;
|
||||
|
||||
bool bonded(uint32_t a1, uint32_t a2) const
|
||||
{
|
||||
return mBonded.count({ a1, a2 }) > 0;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class CompoundBondMap
|
||||
{
|
||||
public:
|
||||
|
||||
static CompoundBondMap &instance()
|
||||
{
|
||||
static std::unique_ptr<CompoundBondMap> s_instance(new CompoundBondMap);
|
||||
return *s_instance;
|
||||
}
|
||||
|
||||
bool bonded(const std::string& compoundID, const std::string& atomID1, const std::string& atomID2);
|
||||
|
||||
private:
|
||||
|
||||
CompoundBondMap() {}
|
||||
|
||||
uint32_t getAtomID(const std::string& atomID)
|
||||
{
|
||||
IDType id(atomID);
|
||||
|
||||
uint32_t result;
|
||||
|
||||
auto i = mAtomIDIndex.find(id);
|
||||
if (i == mAtomIDIndex.end())
|
||||
{
|
||||
result = uint32_t(mAtomIDIndex.size());
|
||||
mAtomIDIndex[id] = result;
|
||||
}
|
||||
else
|
||||
result = i->second;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::map<IDType,uint32_t> mAtomIDIndex;
|
||||
std::vector<CompoundBondInfo> mCompounds;
|
||||
std::mutex mMutex;
|
||||
};
|
||||
|
||||
bool CompoundBondMap::bonded(const std::string &compoundID, const std::string& atomID1, const std::string& atomID2)
|
||||
{
|
||||
std::lock_guard lock(mMutex);
|
||||
|
||||
using namespace std::literals;
|
||||
|
||||
IDType id(compoundID);
|
||||
uint32_t a1 = getAtomID(atomID1);
|
||||
uint32_t a2 = getAtomID(atomID2);
|
||||
if (a1 > a2)
|
||||
std::swap(a1, a2);
|
||||
|
||||
for (auto &bi: mCompounds)
|
||||
{
|
||||
if (bi.mID != id)
|
||||
continue;
|
||||
|
||||
return bi.bonded(a1, a2);
|
||||
}
|
||||
|
||||
bool result = false;
|
||||
|
||||
// not found in our cache, calculate
|
||||
CompoundBondInfo bondInfo{ id };
|
||||
|
||||
auto compound = mmcif::CompoundFactory::instance().create(compoundID);
|
||||
if (not compound)
|
||||
std::cerr << "Missing compound bond info for " << compoundID << std::endl;
|
||||
else
|
||||
{
|
||||
for (auto &atom: compound->bonds())
|
||||
{
|
||||
uint32_t ca1 = getAtomID(atom.atomID[0]);
|
||||
uint32_t ca2 = getAtomID(atom.atomID[1]);
|
||||
if (ca1 > ca2)
|
||||
std::swap(ca1, ca2);
|
||||
|
||||
bondInfo.mBonded.insert({ca1, ca2});
|
||||
result = result or (a1 == ca1 and a2 == ca2);
|
||||
}
|
||||
}
|
||||
|
||||
mCompounds.push_back(bondInfo);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BondMap::BondMap(const Structure& p)
|
||||
{
|
||||
auto& compoundBondInfo = CompoundBondMap::instance();
|
||||
|
||||
auto atoms = p.atoms();
|
||||
dim = uint32_t(atoms.size());
|
||||
|
||||
// bond = std::vector<bool>(dim * (dim - 1), false);
|
||||
|
||||
for (auto& atom: atoms)
|
||||
index[atom.id()] = uint32_t(index.size());
|
||||
|
||||
auto bindAtoms = [this](const std::string& a, const std::string& b)
|
||||
{
|
||||
uint32_t ixa = index[a];
|
||||
uint32_t ixb = index[b];
|
||||
|
||||
bond.insert(key(ixa, ixb));
|
||||
};
|
||||
|
||||
auto linkAtoms = [this,&bindAtoms](const std::string& a, const std::string& b)
|
||||
{
|
||||
bindAtoms(a, b);
|
||||
|
||||
link[a].insert(b);
|
||||
link[b].insert(a);
|
||||
};
|
||||
|
||||
cif::Datablock& db = p.getFile().data();
|
||||
|
||||
// collect all compounds first
|
||||
std::set<std::string> compounds;
|
||||
for (auto c: db["chem_comp"])
|
||||
compounds.insert(c["id"].as<std::string>());
|
||||
|
||||
// make sure we also have all residues in the polyseq
|
||||
for (auto m: db["entity_poly_seq"])
|
||||
{
|
||||
std::string c = m["mon_id"].as<std::string>();
|
||||
if (compounds.count(c))
|
||||
continue;
|
||||
|
||||
if (cif::VERBOSE > 1)
|
||||
std::cerr << "Warning: mon_id " << c << " is missing in the chem_comp category" << std::endl;
|
||||
compounds.insert(c);
|
||||
}
|
||||
|
||||
cif::Progress progress(compounds.size(), "Creating bond map");
|
||||
|
||||
// some helper indices to speed things up a bit
|
||||
std::map<std::tuple<std::string,int,std::string>,std::string> atomMapByAsymSeqAndAtom;
|
||||
for (auto& a: p.atoms())
|
||||
{
|
||||
auto key = make_tuple(a.labelAsymID(), a.labelSeqID(), a.labelAtomID());
|
||||
atomMapByAsymSeqAndAtom[key] = a.id();
|
||||
}
|
||||
|
||||
// first link all residues in a polyseq
|
||||
|
||||
std::string lastAsymID;
|
||||
int lastSeqID = 0;
|
||||
for (auto r: db["pdbx_poly_seq_scheme"])
|
||||
{
|
||||
std::string asymID;
|
||||
int seqID;
|
||||
|
||||
cif::tie(asymID, seqID) = r.get("asym_id", "seq_id");
|
||||
|
||||
if (asymID != lastAsymID) // first in a new sequece
|
||||
{
|
||||
lastAsymID = asymID;
|
||||
lastSeqID = seqID;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto c = atomMapByAsymSeqAndAtom[make_tuple(asymID, lastSeqID, "C")];
|
||||
auto n = atomMapByAsymSeqAndAtom[make_tuple(asymID, seqID, "N")];
|
||||
|
||||
if (not (c.empty() or n.empty()))
|
||||
bindAtoms(c, n);
|
||||
|
||||
lastSeqID = seqID;
|
||||
}
|
||||
|
||||
for (auto l: db["struct_conn"])
|
||||
{
|
||||
std::string asym1, asym2, atomId1, atomId2;
|
||||
int seqId1 = 0, seqId2 = 0;
|
||||
cif::tie(asym1, asym2, atomId1, atomId2, seqId1, seqId2) =
|
||||
l.get("ptnr1_label_asym_id", "ptnr2_label_asym_id",
|
||||
"ptnr1_label_atom_id", "ptnr2_label_atom_id",
|
||||
"ptnr1_label_seq_id", "ptnr2_label_seq_id");
|
||||
|
||||
std::string a = atomMapByAsymSeqAndAtom[make_tuple(asym1, seqId1, atomId1)];
|
||||
std::string b = atomMapByAsymSeqAndAtom[make_tuple(asym2, seqId2, atomId2)];
|
||||
|
||||
if (not (a.empty() or b.empty()))
|
||||
linkAtoms(a, b);
|
||||
}
|
||||
|
||||
// then link all atoms in the compounds
|
||||
|
||||
for (auto c: compounds)
|
||||
{
|
||||
if (c == "HOH" or c == "H2O" or c == "WAT")
|
||||
{
|
||||
if (cif::VERBOSE)
|
||||
std::cerr << "skipping water in bond map calculation" << std::endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto bonded = [c, &compoundBondInfo](const Atom& a, const Atom& b)
|
||||
{
|
||||
auto label_a = a.labelAtomID();
|
||||
auto label_b = b.labelAtomID();
|
||||
|
||||
return compoundBondInfo.bonded(c, label_a, label_b);
|
||||
};
|
||||
|
||||
// loop over poly_seq_scheme
|
||||
for (auto r: db["pdbx_poly_seq_scheme"].find(cif::Key("mon_id") == c))
|
||||
{
|
||||
std::string asymID;
|
||||
int seqID;
|
||||
cif::tie(asymID, seqID) = r.get("asym_id", "seq_id");
|
||||
|
||||
std::vector<Atom> rAtoms;
|
||||
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
|
||||
[&](auto& a) { return a.labelAsymID() == asymID and a.labelSeqID() == seqID; });
|
||||
|
||||
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
|
||||
{
|
||||
for (uint32_t j = i + 1; j < rAtoms.size(); ++j)
|
||||
{
|
||||
if (bonded(rAtoms[i], rAtoms[j]))
|
||||
bindAtoms(rAtoms[i].id(), rAtoms[j].id());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// loop over pdbx_nonpoly_scheme
|
||||
for (auto r: db["pdbx_nonpoly_scheme"].find(cif::Key("mon_id") == c))
|
||||
{
|
||||
std::string asymID;
|
||||
cif::tie(asymID) = r.get("asym_id");
|
||||
|
||||
std::vector<Atom> rAtoms;
|
||||
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
|
||||
[&](auto& a) { return a.labelAsymID() == asymID; });
|
||||
|
||||
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
|
||||
{
|
||||
for (uint32_t j = i + 1; j < rAtoms.size(); ++j)
|
||||
{
|
||||
if (bonded(rAtoms[i], rAtoms[j]))
|
||||
{
|
||||
uint32_t ixa = index[rAtoms[i].id()];
|
||||
uint32_t ixb = index[rAtoms[j].id()];
|
||||
|
||||
bond.insert(key(ixa, ixb));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// loop over pdbx_branch_scheme
|
||||
for (auto r: db["pdbx_branch_scheme"].find(cif::Key("mon_id") == c))
|
||||
{
|
||||
std::string asymID;
|
||||
cif::tie(asymID) = r.get("asym_id");
|
||||
|
||||
std::vector<Atom> rAtoms;
|
||||
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
|
||||
[&](auto& a) { return a.labelAsymID() == asymID; });
|
||||
|
||||
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
|
||||
{
|
||||
for (uint32_t j = i + 1; j < rAtoms.size(); ++j)
|
||||
{
|
||||
if (bonded(rAtoms[i], rAtoms[j]))
|
||||
{
|
||||
uint32_t ixa = index[rAtoms[i].id()];
|
||||
uint32_t ixb = index[rAtoms[j].id()];
|
||||
|
||||
bond.insert(key(ixa, ixb));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// start by creating an index for single bonds
|
||||
|
||||
std::multimap<uint32_t,uint32_t> b1_2;
|
||||
for (auto& bk: bond)
|
||||
{
|
||||
uint32_t a, b;
|
||||
std::tie(a, b) = dekey(bk);
|
||||
|
||||
b1_2.insert({ a, b });
|
||||
b1_2.insert({ b, a });
|
||||
}
|
||||
|
||||
std::multimap<uint32_t,uint32_t> b1_3;
|
||||
for (uint32_t i = 0; i < dim; ++i)
|
||||
{
|
||||
auto a = b1_2.equal_range(i);
|
||||
|
||||
std::vector<uint32_t> s;
|
||||
for (auto j = a.first; j != a.second; ++j)
|
||||
s.push_back(j->second);
|
||||
|
||||
for (size_t si1 = 0; si1 + 1 < s.size(); ++si1)
|
||||
{
|
||||
for (size_t si2 = si1 + 1; si2 < s.size(); ++si2)
|
||||
{
|
||||
uint32_t x = s[si1];
|
||||
uint32_t y = s[si2];
|
||||
|
||||
if (isBonded(x, y))
|
||||
continue;
|
||||
|
||||
b1_3.insert({ x, y });
|
||||
b1_3.insert({ y, x });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < dim; ++i)
|
||||
{
|
||||
auto a1 = b1_2.equal_range(i);
|
||||
auto a2 = b1_3.equal_range(i);
|
||||
|
||||
for (auto ai1 = a1.first; ai1 != a1.second; ++ai1)
|
||||
{
|
||||
for (auto ai2 = a2.first; ai2 != a2.second; ++ai2)
|
||||
{
|
||||
uint32_t b1 = ai1->second;
|
||||
uint32_t b2 = ai2->second;
|
||||
|
||||
if (isBonded(b1, b2))
|
||||
continue;
|
||||
|
||||
bond_1_4.insert(key(b1, b2));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> BondMap::linked(const Atom& a) const
|
||||
{
|
||||
auto i = link.find(a.id());
|
||||
|
||||
std::vector<std::string> result;
|
||||
|
||||
if (i != link.end())
|
||||
result = std::vector<std::string>(i->second.begin(), i->second.end());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::string> BondMap::atomIDsForCompound(const std::string& compoundID)
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
|
||||
auto* compound = mmcif::CompoundFactory::instance().create(compoundID);
|
||||
|
||||
if (compound == nullptr)
|
||||
throw BondMapException("Missing bond information for compound " + compoundID);
|
||||
|
||||
for (auto& compAtom: compound->atoms())
|
||||
result.push_back(compAtom.id);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
3696
src/Cif++.cpp
3696
src/Cif++.cpp
File diff suppressed because it is too large
Load Diff
3981
src/Cif2PDB.cpp
3981
src/Cif2PDB.cpp
File diff suppressed because it is too large
Load Diff
1326
src/CifParser.cpp
1326
src/CifParser.cpp
File diff suppressed because it is too large
Load Diff
1303
src/CifUtils.cpp
1303
src/CifUtils.cpp
File diff suppressed because it is too large
Load Diff
@@ -1,351 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
#include "cif++/Cif++.hpp"
|
||||
#include "cif++/CifParser.hpp"
|
||||
#include "cif++/CifValidator.hpp"
|
||||
|
||||
namespace ba = boost::algorithm;
|
||||
|
||||
extern int VERBOSE;
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
ValidationError::ValidationError(const std::string& msg)
|
||||
: mMsg(msg)
|
||||
{
|
||||
}
|
||||
|
||||
ValidationError::ValidationError(const std::string& cat, const std::string& item, const std::string& msg)
|
||||
: mMsg("When validating _" + cat + '.' + item + ": " + msg)
|
||||
{
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
DDL_PrimitiveType mapToPrimitiveType(const std::string& s)
|
||||
{
|
||||
DDL_PrimitiveType result;
|
||||
if (iequals(s, "char"))
|
||||
result = DDL_PrimitiveType::Char;
|
||||
else if (iequals(s, "uchar"))
|
||||
result = DDL_PrimitiveType::UChar;
|
||||
else if (iequals(s, "numb"))
|
||||
result = DDL_PrimitiveType::Numb;
|
||||
else
|
||||
throw ValidationError("Not a known primitive type");
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
int ValidateType::compare(const char* a, const char* b) const
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
if (*a == 0)
|
||||
result = *b == 0 ? 0 : -1;
|
||||
else if (*b == 0)
|
||||
result = *a == 0 ? 0 : +1;
|
||||
else
|
||||
{
|
||||
try
|
||||
{
|
||||
switch (mPrimitiveType)
|
||||
{
|
||||
case DDL_PrimitiveType::Numb:
|
||||
{
|
||||
double da = strtod(a, nullptr);
|
||||
double db = strtod(b, nullptr);
|
||||
|
||||
auto d = da - db;
|
||||
if (std::abs(d) > std::numeric_limits<double>::epsilon())
|
||||
{
|
||||
if (d > 0)
|
||||
result = 1;
|
||||
else if (d < 0)
|
||||
result = -1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case DDL_PrimitiveType::UChar:
|
||||
case DDL_PrimitiveType::Char:
|
||||
{
|
||||
// CIF is guaranteed to have ascii only, therefore this primitive code will do
|
||||
// also, we're collapsing spaces
|
||||
|
||||
auto ai = a, bi = b;
|
||||
for (;;)
|
||||
{
|
||||
if (*ai == 0)
|
||||
{
|
||||
if (*bi != 0)
|
||||
result = -1;
|
||||
break;
|
||||
}
|
||||
else if (*bi == 0)
|
||||
{
|
||||
result = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
char ca = *ai;
|
||||
char cb = *bi;
|
||||
|
||||
if (mPrimitiveType == DDL_PrimitiveType::UChar)
|
||||
{
|
||||
ca = tolower(ca);
|
||||
cb = tolower(cb);
|
||||
}
|
||||
|
||||
result = ca - cb;
|
||||
|
||||
if (result != 0)
|
||||
break;
|
||||
|
||||
if (ca == ' ')
|
||||
{
|
||||
while (ai[1] == ' ')
|
||||
++ai;
|
||||
while (bi[1] == ' ')
|
||||
++bi;
|
||||
}
|
||||
|
||||
++ai;
|
||||
++bi;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (const std::invalid_argument& ex)
|
||||
{
|
||||
result = 1;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
//void ValidateItem::addLinked(ValidateItem* parent, const std::string& parentItem, const std::string& childItem)
|
||||
//{
|
||||
//// if (mParent != nullptr and VERBOSE)
|
||||
//// cerr << "replacing parent in " << mCategory->mName << " from " << mParent->mCategory->mName << " to " << parent->mCategory->mName << endl;
|
||||
//// mParent = parent;
|
||||
//
|
||||
// if (mType == nullptr and parent != nullptr)
|
||||
// mType = parent->mType;
|
||||
//
|
||||
// if (parent != nullptr)
|
||||
// {
|
||||
// mLinked.push_back({parent, parentItem, childItem});
|
||||
//
|
||||
// parent->mChildren.insert(this);
|
||||
////
|
||||
//// if (mCategory->mKeys == std::vector<std::string>{mTag})
|
||||
//// parent->mForeignKeys.insert(this);
|
||||
// }
|
||||
//}
|
||||
|
||||
void ValidateItem::operator()(std::string value) const
|
||||
{
|
||||
if (not value.empty() and value != "?" and value != ".")
|
||||
{
|
||||
if (mType != nullptr and not regex_match(value, mType->mRx))
|
||||
throw ValidationError(mCategory->mName, mTag, "Value '" + value + "' does not match type expression for type " + mType->mName);
|
||||
|
||||
if (not mEnums.empty())
|
||||
{
|
||||
if (mEnums.count(value) == 0)
|
||||
throw ValidationError(mCategory->mName, mTag, "Value '" + value + "' is not in the list of allowed values");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void ValidateCategory::addItemValidator(ValidateItem&& v)
|
||||
{
|
||||
if (v.mMandatory)
|
||||
mMandatoryFields.insert(v.mTag);
|
||||
|
||||
v.mCategory = this;
|
||||
|
||||
auto r = mItemValidators.insert(std::move(v));
|
||||
if (not r.second and VERBOSE >= 4)
|
||||
std::cout << "Could not add validator for item " << v.mTag << " to category " << mName << std::endl;
|
||||
}
|
||||
|
||||
const ValidateItem* ValidateCategory::getValidatorForItem(std::string tag) const
|
||||
{
|
||||
const ValidateItem* result = nullptr;
|
||||
auto i = mItemValidators.find(ValidateItem{tag});
|
||||
if (i != mItemValidators.end())
|
||||
result = &*i;
|
||||
else if (VERBOSE > 4)
|
||||
std::cout << "No validator for tag " << tag << std::endl;
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
Validator::Validator()
|
||||
{
|
||||
}
|
||||
|
||||
Validator::~Validator()
|
||||
{
|
||||
}
|
||||
|
||||
void Validator::addTypeValidator(ValidateType&& v)
|
||||
{
|
||||
auto r = mTypeValidators.insert(std::move(v));
|
||||
if (not r.second and VERBOSE > 4)
|
||||
std::cout << "Could not add validator for type " << v.mName << std::endl;
|
||||
}
|
||||
|
||||
const ValidateType* Validator::getValidatorForType(std::string typeCode) const
|
||||
{
|
||||
const ValidateType* result = nullptr;
|
||||
|
||||
auto i = mTypeValidators.find(ValidateType{ typeCode, DDL_PrimitiveType::Char, boost::regex() });
|
||||
if (i != mTypeValidators.end())
|
||||
result = &*i;
|
||||
else if (VERBOSE > 4)
|
||||
std::cout << "No validator for type " << typeCode << std::endl;
|
||||
return result;
|
||||
}
|
||||
|
||||
void Validator::addCategoryValidator(ValidateCategory&& v)
|
||||
{
|
||||
auto r = mCategoryValidators.insert(std::move(v));
|
||||
if (not r.second and VERBOSE > 4)
|
||||
std::cout << "Could not add validator for category " << v.mName << std::endl;
|
||||
}
|
||||
|
||||
const ValidateCategory* Validator::getValidatorForCategory(std::string category) const
|
||||
{
|
||||
const ValidateCategory* result = nullptr;
|
||||
auto i = mCategoryValidators.find(ValidateCategory{category});
|
||||
if (i != mCategoryValidators.end())
|
||||
result = &*i;
|
||||
else if (VERBOSE > 4)
|
||||
std::cout << "No validator for category " << category << std::endl;
|
||||
return result;
|
||||
}
|
||||
|
||||
ValidateItem* Validator::getValidatorForItem(std::string tag) const
|
||||
{
|
||||
ValidateItem* result = nullptr;
|
||||
|
||||
std::string cat, item;
|
||||
std::tie(cat, item) = splitTagName(tag);
|
||||
|
||||
auto* cv = getValidatorForCategory(cat);
|
||||
if (cv != nullptr)
|
||||
result = const_cast<ValidateItem*>(cv->getValidatorForItem(item));
|
||||
|
||||
if (result == nullptr and VERBOSE > 4)
|
||||
std::cout << "No validator for item " << tag << std::endl;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void Validator::addLinkValidator(ValidateLink&& v)
|
||||
{
|
||||
assert(v.mParentKeys.size() == v.mChildKeys.size());
|
||||
if (v.mParentKeys.size() != v.mChildKeys.size())
|
||||
throw std::runtime_error("unequal number of keys for parent and child in link");
|
||||
|
||||
auto pcv = getValidatorForCategory(v.mParentCategory);
|
||||
auto ccv = getValidatorForCategory(v.mChildCategory);
|
||||
|
||||
if (pcv == nullptr)
|
||||
throw std::runtime_error("unknown parent category " + v.mParentCategory);
|
||||
|
||||
if (ccv == nullptr)
|
||||
throw std::runtime_error("unknown child category " + v.mChildCategory);
|
||||
|
||||
for (size_t i = 0; i < v.mParentKeys.size(); ++i)
|
||||
{
|
||||
auto piv = pcv->getValidatorForItem(v.mParentKeys[i]);
|
||||
|
||||
if (piv == nullptr)
|
||||
throw std::runtime_error("unknown parent tag _" + v.mParentCategory + '.' + v.mParentKeys[i]);
|
||||
|
||||
auto civ = ccv->getValidatorForItem(v.mChildKeys[i]);
|
||||
if (civ == nullptr)
|
||||
throw std::runtime_error("unknown child tag _" + v.mChildCategory + '.' + v.mChildKeys[i]);
|
||||
|
||||
if (civ->mType == nullptr and piv->mType != nullptr)
|
||||
const_cast<ValidateItem*>(civ)->mType = piv->mType;
|
||||
}
|
||||
|
||||
mLinkValidators.emplace_back(std::move(v));
|
||||
}
|
||||
|
||||
std::vector<const ValidateLink*> Validator::getLinksForParent(const std::string& category) const
|
||||
{
|
||||
std::vector<const ValidateLink*> result;
|
||||
|
||||
for (auto& l: mLinkValidators)
|
||||
{
|
||||
if (l.mParentCategory == category)
|
||||
result.push_back(&l);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<const ValidateLink*> Validator::getLinksForChild(const std::string& category) const
|
||||
{
|
||||
std::vector<const ValidateLink*> result;
|
||||
|
||||
for (auto& l: mLinkValidators)
|
||||
{
|
||||
if (l.mChildCategory == category)
|
||||
result.push_back(&l);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void Validator::reportError(const std::string& msg, bool fatal)
|
||||
{
|
||||
if (mStrict or fatal)
|
||||
throw ValidationError(msg);
|
||||
else if (VERBOSE)
|
||||
std::cerr << msg << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
/* Define to the name of this package. */
|
||||
#cmakedefine PACKAGE_NAME "@PACKAGE_NAME@"
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#cmakedefine PACKAGE_VERSION "@PACKAGE_VERSION@"
|
||||
|
||||
/* Define the complete package string */
|
||||
#cmakedefine PACKAGE_STRING "@PACKAGE_STRING@"
|
||||
|
||||
/* Using resources? */
|
||||
#cmakedefine USE_RSRC @USE_RSRC@
|
||||
@@ -1,113 +0,0 @@
|
||||
/* src/Config.hpp.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* define if the Boost library is available */
|
||||
#undef HAVE_BOOST
|
||||
|
||||
/* define if the Boost::Date_Time library is available */
|
||||
#undef HAVE_BOOST_DATE_TIME
|
||||
|
||||
/* define if the Boost::IOStreams library is available */
|
||||
#undef HAVE_BOOST_IOSTREAMS
|
||||
|
||||
/* define if the Boost::Regex library is available */
|
||||
#undef HAVE_BOOST_REGEX
|
||||
|
||||
/* define if the compiler supports basic C++17 syntax */
|
||||
#undef HAVE_CXX17
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#undef HAVE_DLFCN_H
|
||||
|
||||
/* Define to 1 if you have the `floor' function. */
|
||||
#undef HAVE_FLOOR
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#undef HAVE_INTTYPES_H
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#undef HAVE_MEMORY_H
|
||||
|
||||
/* Define to 1 if you have the `pow' function. */
|
||||
#undef HAVE_POW
|
||||
|
||||
/* Define if you have POSIX threads libraries and header files. */
|
||||
#undef HAVE_PTHREAD
|
||||
|
||||
/* Have PTHREAD_PRIO_INHERIT. */
|
||||
#undef HAVE_PTHREAD_PRIO_INHERIT
|
||||
|
||||
/* Define to 1 if the system has the type `ptrdiff_t'. */
|
||||
#undef HAVE_PTRDIFF_T
|
||||
|
||||
/* Define to 1 if you have the `rint' function. */
|
||||
#undef HAVE_RINT
|
||||
|
||||
/* Define to 1 if you have the `sqrt' function. */
|
||||
#undef HAVE_SQRT
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#undef HAVE_STDINT_H
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#undef HAVE_STDLIB_H
|
||||
|
||||
/* Define to 1 if you have the `strchr' function. */
|
||||
#undef HAVE_STRCHR
|
||||
|
||||
/* Define to 1 if you have the `strerror' function. */
|
||||
#undef HAVE_STRERROR
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#undef HAVE_STRINGS_H
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#undef HAVE_STRING_H
|
||||
|
||||
/* Define to 1 if you have the <sys/ioctl.h> header file. */
|
||||
#undef HAVE_SYS_IOCTL_H
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#undef HAVE_SYS_STAT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#undef HAVE_SYS_TYPES_H
|
||||
|
||||
/* Define to 1 if you have the <termios.h> header file. */
|
||||
#undef HAVE_TERMIOS_H
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#undef HAVE_UNISTD_H
|
||||
|
||||
/* Define to 1 if the system has the type `_Bool'. */
|
||||
#undef HAVE__BOOL
|
||||
|
||||
/* Define to the sub-directory where libtool stores uninstalled libraries. */
|
||||
#undef LT_OBJDIR
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#undef PACKAGE_BUGREPORT
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#undef PACKAGE_NAME
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#undef PACKAGE_STRING
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#undef PACKAGE_TARNAME
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#undef PACKAGE_URL
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#undef PACKAGE_VERSION
|
||||
|
||||
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||
your system. */
|
||||
#undef PTHREAD_CREATE_JOINABLE
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#undef STDC_HEADERS
|
||||
|
||||
/* Use mrc to store resources */
|
||||
#undef USE_RSRC
|
||||
File diff suppressed because it is too large
Load Diff
306
src/Point.cpp
306
src/Point.cpp
@@ -1,306 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <random>
|
||||
#include <valarray>
|
||||
|
||||
#include "cif++/Point.hpp"
|
||||
#include "cif++/Matrix.hpp"
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
Quaternion Normalize(Quaternion q)
|
||||
{
|
||||
std::valarray<double> t(4);
|
||||
|
||||
t[0] = q.R_component_1();
|
||||
t[1] = q.R_component_2();
|
||||
t[2] = q.R_component_3();
|
||||
t[3] = q.R_component_4();
|
||||
|
||||
t *= t;
|
||||
|
||||
double length = std::sqrt(t.sum());
|
||||
|
||||
if (length > 0.001)
|
||||
q /= static_cast<Quaternion::value_type>(length);
|
||||
else
|
||||
q = Quaternion(1, 0, 0, 0);
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::tuple<double,Point> QuaternionToAngleAxis(Quaternion q)
|
||||
{
|
||||
if (q.R_component_1() > 1)
|
||||
q = Normalize(q);
|
||||
|
||||
// angle:
|
||||
double angle = 2 * acos(q.R_component_1());
|
||||
angle = angle * 180 / kPI;
|
||||
|
||||
// axis:
|
||||
float s = std::sqrt(1 - q.R_component_1() * q.R_component_1());
|
||||
if (s < 0.001)
|
||||
s = 1;
|
||||
|
||||
Point axis(q.R_component_2() / s, q.R_component_3() / s, q.R_component_4() / s);
|
||||
|
||||
return std::make_tuple(angle, axis);
|
||||
}
|
||||
|
||||
Point CenterPoints(std::vector<Point>& Points)
|
||||
{
|
||||
Point t;
|
||||
|
||||
for (Point& pt : Points)
|
||||
{
|
||||
t.mX += pt.mX;
|
||||
t.mY += pt.mY;
|
||||
t.mZ += pt.mZ;
|
||||
}
|
||||
|
||||
t.mX /= Points.size();
|
||||
t.mY /= Points.size();
|
||||
t.mZ /= Points.size();
|
||||
|
||||
for (Point& pt : Points)
|
||||
{
|
||||
pt.mX -= t.mX;
|
||||
pt.mY -= t.mY;
|
||||
pt.mZ -= t.mZ;
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
Point Centroid(std::vector<Point>& Points)
|
||||
{
|
||||
Point result;
|
||||
|
||||
for (Point& pt : Points)
|
||||
result += pt;
|
||||
|
||||
result /= static_cast<float>(Points.size());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
double RMSd(const std::vector<Point>& a, const std::vector<Point>& b)
|
||||
{
|
||||
double sum = 0;
|
||||
for (uint32_t i = 0; i < a.size(); ++i)
|
||||
{
|
||||
std::valarray<double> d(3);
|
||||
|
||||
d[0] = b[i].mX - a[i].mX;
|
||||
d[1] = b[i].mY - a[i].mY;
|
||||
d[2] = b[i].mZ - a[i].mZ;
|
||||
|
||||
d *= d;
|
||||
|
||||
sum += d.sum();
|
||||
}
|
||||
|
||||
return std::sqrt(sum / a.size());
|
||||
}
|
||||
|
||||
// The next function returns the largest solution for a quartic equation
|
||||
// based on Ferrari's algorithm.
|
||||
// A depressed quartic is of the form:
|
||||
//
|
||||
// x^4 + ax^2 + bx + c = 0
|
||||
//
|
||||
// (since I'm too lazy to find out a better way, I've implemented the
|
||||
// routine using complex values to avoid nan's as a result of taking
|
||||
// sqrt of a negative number)
|
||||
double LargestDepressedQuarticSolution(double a, double b, double c)
|
||||
{
|
||||
std::complex<double> P = - (a * a) / 12 - c;
|
||||
std::complex<double> Q = - (a * a * a) / 108 + (a * c) / 3 - (b * b) / 8;
|
||||
std::complex<double> R = - Q / 2.0 + std::sqrt((Q * Q) / 4.0 + (P * P * P) / 27.0);
|
||||
|
||||
std::complex<double> U = std::pow(R, 1 / 3.0);
|
||||
|
||||
std::complex<double> y;
|
||||
if (U == 0.0)
|
||||
y = -5.0 * a / 6.0 + U - std::pow(Q, 1.0 / 3.0);
|
||||
else
|
||||
y = -5.0 * a / 6.0 + U - P / (3.0 * U);
|
||||
|
||||
std::complex<double> W = std::sqrt(a + 2.0 * y);
|
||||
|
||||
// And to get the final result:
|
||||
// result = (±W + std::sqrt(-(3 * alpha + 2 * y ± 2 * beta / W))) / 2;
|
||||
// We want the largest result, so:
|
||||
|
||||
std::valarray<double> t(4);
|
||||
|
||||
t[0] = (( W + std::sqrt(-(3.0 * a + 2.0 * y + 2.0 * b / W))) / 2.0).real();
|
||||
t[1] = (( W + std::sqrt(-(3.0 * a + 2.0 * y - 2.0 * b / W))) / 2.0).real();
|
||||
t[2] = ((-W + std::sqrt(-(3.0 * a + 2.0 * y + 2.0 * b / W))) / 2.0).real();
|
||||
t[3] = ((-W + std::sqrt(-(3.0 * a + 2.0 * y - 2.0 * b / W))) / 2.0).real();
|
||||
|
||||
return t.max();
|
||||
}
|
||||
|
||||
Quaternion AlignPoints(const std::vector<Point>& pa, const std::vector<Point>& pb)
|
||||
{
|
||||
// First calculate M, a 3x3 Matrix containing the sums of products of the coordinates of A and B
|
||||
Matrix<double> M(3, 3, 0);
|
||||
|
||||
for (uint32_t i = 0; i < pa.size(); ++i)
|
||||
{
|
||||
const Point& a = pa[i];
|
||||
const Point& b = pb[i];
|
||||
|
||||
M(0, 0) += a.mX * b.mX; M(0, 1) += a.mX * b.mY; M(0, 2) += a.mX * b.mZ;
|
||||
M(1, 0) += a.mY * b.mX; M(1, 1) += a.mY * b.mY; M(1, 2) += a.mY * b.mZ;
|
||||
M(2, 0) += a.mZ * b.mX; M(2, 1) += a.mZ * b.mY; M(2, 2) += a.mZ * b.mZ;
|
||||
}
|
||||
|
||||
// Now calculate N, a symmetric 4x4 Matrix
|
||||
SymmetricMatrix<double> N(4);
|
||||
|
||||
N(0, 0) = M(0, 0) + M(1, 1) + M(2, 2);
|
||||
N(0, 1) = M(1, 2) - M(2, 1);
|
||||
N(0, 2) = M(2, 0) - M(0, 2);
|
||||
N(0, 3) = M(0, 1) - M(1, 0);
|
||||
|
||||
N(1, 1) = M(0, 0) - M(1, 1) - M(2, 2);
|
||||
N(1, 2) = M(0, 1) + M(1, 0);
|
||||
N(1, 3) = M(0, 2) + M(2, 0);
|
||||
|
||||
N(2, 2) = -M(0, 0) + M(1, 1) - M(2, 2);
|
||||
N(2, 3) = M(1, 2) + M(2, 1);
|
||||
|
||||
N(3, 3) = -M(0, 0) - M(1, 1) + M(2, 2);
|
||||
|
||||
// det(N - λI) = 0
|
||||
// find the largest λ (λm)
|
||||
//
|
||||
// Aλ4 + Bλ3 + Cλ2 + Dλ + E = 0
|
||||
// A = 1
|
||||
// B = 0
|
||||
// and so this is a so-called depressed quartic
|
||||
// solve it using Ferrari's algorithm
|
||||
|
||||
double C = -2 * (
|
||||
M(0, 0) * M(0, 0) + M(0, 1) * M(0, 1) + M(0, 2) * M(0, 2) +
|
||||
M(1, 0) * M(1, 0) + M(1, 1) * M(1, 1) + M(1, 2) * M(1, 2) +
|
||||
M(2, 0) * M(2, 0) + M(2, 1) * M(2, 1) + M(2, 2) * M(2, 2));
|
||||
|
||||
double D = 8 * (M(0, 0) * M(1, 2) * M(2, 1) +
|
||||
M(1, 1) * M(2, 0) * M(0, 2) +
|
||||
M(2, 2) * M(0, 1) * M(1, 0)) -
|
||||
8 * (M(0, 0) * M(1, 1) * M(2, 2) +
|
||||
M(1, 2) * M(2, 0) * M(0, 1) +
|
||||
M(2, 1) * M(1, 0) * M(0, 2));
|
||||
|
||||
double E =
|
||||
(N(0,0) * N(1,1) - N(0,1) * N(0,1)) * (N(2,2) * N(3,3) - N(2,3) * N(2,3)) +
|
||||
(N(0,1) * N(0,2) - N(0,0) * N(2,1)) * (N(2,1) * N(3,3) - N(2,3) * N(1,3)) +
|
||||
(N(0,0) * N(1,3) - N(0,1) * N(0,3)) * (N(2,1) * N(2,3) - N(2,2) * N(1,3)) +
|
||||
(N(0,1) * N(2,1) - N(1,1) * N(0,2)) * (N(0,2) * N(3,3) - N(2,3) * N(0,3)) +
|
||||
(N(1,1) * N(0,3) - N(0,1) * N(1,3)) * (N(0,2) * N(2,3) - N(2,2) * N(0,3)) +
|
||||
(N(0,2) * N(1,3) - N(2,1) * N(0,3)) * (N(0,2) * N(1,3) - N(2,1) * N(0,3));
|
||||
|
||||
// solve quartic
|
||||
double lm = LargestDepressedQuarticSolution(C, D, E);
|
||||
|
||||
// calculate t = (N - λI)
|
||||
Matrix<double> li = IdentityMatrix<double>(4) * lm;
|
||||
Matrix<double> t = N - li;
|
||||
|
||||
// calculate a Matrix of cofactors for t
|
||||
Matrix<double> cf(4, 4);
|
||||
|
||||
const uint32_t ixs[4][3] =
|
||||
{
|
||||
{ 1, 2, 3 },
|
||||
{ 0, 2, 3 },
|
||||
{ 0, 1, 3 },
|
||||
{ 0, 1, 2 }
|
||||
};
|
||||
|
||||
uint32_t maxR = 0;
|
||||
for (uint32_t r = 0; r < 4; ++r)
|
||||
{
|
||||
const uint32_t* ir = ixs[r];
|
||||
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
const uint32_t* ic = ixs[c];
|
||||
|
||||
cf(r, c) =
|
||||
t(ir[0], ic[0]) * t(ir[1], ic[1]) * t(ir[2], ic[2]) +
|
||||
t(ir[0], ic[1]) * t(ir[1], ic[2]) * t(ir[2], ic[0]) +
|
||||
t(ir[0], ic[2]) * t(ir[1], ic[0]) * t(ir[2], ic[1]) -
|
||||
t(ir[0], ic[2]) * t(ir[1], ic[1]) * t(ir[2], ic[0]) -
|
||||
t(ir[0], ic[1]) * t(ir[1], ic[0]) * t(ir[2], ic[2]) -
|
||||
t(ir[0], ic[0]) * t(ir[1], ic[2]) * t(ir[2], ic[1]);
|
||||
}
|
||||
|
||||
if (r > maxR and cf(r, 0) > cf(maxR, 0))
|
||||
maxR = r;
|
||||
}
|
||||
|
||||
// NOTE the negation of the y here, why? Maybe I swapped r/c above?
|
||||
Quaternion q(cf(maxR, 0), cf(maxR, 1), -cf(maxR, 2), cf(maxR, 3));
|
||||
q = Normalize(q);
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
Point Nudge(Point p, float offset)
|
||||
{
|
||||
static std::random_device rd;
|
||||
static std::mt19937_64 rng(rd());
|
||||
|
||||
std::uniform_real_distribution<> randomAngle(0, 2 * kPI);
|
||||
std::normal_distribution<> randomOffset(0, offset);
|
||||
|
||||
float theta = static_cast<float>(randomAngle(rng));
|
||||
float phi1 = static_cast<float>(randomAngle(rng) - kPI);
|
||||
float phi2 = static_cast<float>(randomAngle(rng) - kPI);
|
||||
|
||||
Quaternion q = boost::math::spherical(1.0f, theta, phi1, phi2);
|
||||
|
||||
Point r{ 0, 0, 1 };
|
||||
r.rotate(q);
|
||||
r *= static_cast<float>(randomOffset(rng));
|
||||
|
||||
return p + r;
|
||||
}
|
||||
|
||||
}
|
||||
1528
src/Secondary.cpp
1528
src/Secondary.cpp
File diff suppressed because it is too large
Load Diff
2606
src/Structure.cpp
2606
src/Structure.cpp
File diff suppressed because it is too large
Load Diff
1892
src/TlsParser.cpp
1892
src/TlsParser.cpp
File diff suppressed because it is too large
Load Diff
1154
src/atom_type.cpp
Normal file
1154
src/atom_type.cpp
Normal file
File diff suppressed because it is too large
Load Diff
2064
src/category.cpp
Normal file
2064
src/category.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,7 +1,7 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
* Copyright (c) 2020-2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
@@ -29,83 +29,76 @@
|
||||
#include <numeric>
|
||||
#include <shared_mutex>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
|
||||
#include "cif++/Cif++.hpp"
|
||||
#include "cif++/CifParser.hpp"
|
||||
#include "cif++/CifUtils.hpp"
|
||||
#include "cif++/Compound.hpp"
|
||||
#include "cif++/Point.hpp"
|
||||
#include <cif++/compound.hpp>
|
||||
|
||||
namespace ba = boost::algorithm;
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace mmcif
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::string to_string(BondType bondType)
|
||||
std::string to_string(bond_type bondType)
|
||||
{
|
||||
switch (bondType)
|
||||
{
|
||||
case BondType::sing: return "sing";
|
||||
case BondType::doub: return "doub";
|
||||
case BondType::trip: return "trip";
|
||||
case BondType::quad: return "quad";
|
||||
case BondType::arom: return "arom";
|
||||
case BondType::poly: return "poly";
|
||||
case BondType::delo: return "delo";
|
||||
case BondType::pi: return "pi";
|
||||
case bond_type::sing: return "sing";
|
||||
case bond_type::doub: return "doub";
|
||||
case bond_type::trip: return "trip";
|
||||
case bond_type::quad: return "quad";
|
||||
case bond_type::arom: return "arom";
|
||||
case bond_type::poly: return "poly";
|
||||
case bond_type::delo: return "delo";
|
||||
case bond_type::pi: return "pi";
|
||||
}
|
||||
throw std::invalid_argument("Invalid bondType");
|
||||
}
|
||||
|
||||
BondType from_string(const std::string &bondType)
|
||||
bond_type from_string(const std::string &bondType)
|
||||
{
|
||||
if (cif::iequals(bondType, "sing"))
|
||||
return BondType::sing;
|
||||
return bond_type::sing;
|
||||
if (cif::iequals(bondType, "doub"))
|
||||
return BondType::doub;
|
||||
return bond_type::doub;
|
||||
if (cif::iequals(bondType, "trip"))
|
||||
return BondType::trip;
|
||||
return bond_type::trip;
|
||||
if (cif::iequals(bondType, "quad"))
|
||||
return BondType::quad;
|
||||
return bond_type::quad;
|
||||
if (cif::iequals(bondType, "arom"))
|
||||
return BondType::arom;
|
||||
return bond_type::arom;
|
||||
if (cif::iequals(bondType, "poly"))
|
||||
return BondType::poly;
|
||||
return bond_type::poly;
|
||||
if (cif::iequals(bondType, "delo"))
|
||||
return BondType::delo;
|
||||
return bond_type::delo;
|
||||
if (cif::iequals(bondType, "pi"))
|
||||
return BondType::pi;
|
||||
return bond_type::pi;
|
||||
throw std::invalid_argument("Invalid bondType: " + bondType);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Compound helper classes
|
||||
// compound helper classes
|
||||
|
||||
struct CompoundAtomLess
|
||||
struct compound_atom_less
|
||||
{
|
||||
bool operator()(const CompoundAtom &a, const CompoundAtom &b) const
|
||||
bool operator()(const compound_atom &a, const compound_atom &b) const
|
||||
{
|
||||
int d = a.id.compare(b.id);
|
||||
if (d == 0)
|
||||
d = a.typeSymbol - b.typeSymbol;
|
||||
d = a.type_symbol - b.type_symbol;
|
||||
return d < 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct CompoundBondLess
|
||||
struct compound_bond_less
|
||||
{
|
||||
bool operator()(const CompoundBond &a, const CompoundBond &b) const
|
||||
bool operator()(const compound_bond &a, const compound_bond &b) const
|
||||
{
|
||||
int d = a.atomID[0].compare(b.atomID[0]);
|
||||
int d = a.atom_id[0].compare(b.atom_id[0]);
|
||||
if (d == 0)
|
||||
d = a.atomID[1].compare(b.atomID[1]);
|
||||
d = a.atom_id[1].compare(b.atom_id[1]);
|
||||
if (d == 0)
|
||||
d = static_cast<int>(a.type) - static_cast<int>(b.type);
|
||||
return d < 0;
|
||||
@@ -113,120 +106,126 @@ struct CompoundBondLess
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Compound
|
||||
// compound
|
||||
|
||||
Compound::Compound(cif::Datablock &db)
|
||||
compound::compound(cif::datablock &db)
|
||||
{
|
||||
auto &chemComp = db["chem_comp"];
|
||||
|
||||
if (chemComp.size() != 1)
|
||||
throw std::runtime_error("Invalid compound file, chem_comp should contain a single row");
|
||||
|
||||
cif::tie(mID, mName, mType, mFormula, mFormulaWeight, mFormalCharge) =
|
||||
cif::tie(m_id, m_name, m_type, m_formula, m_formula_weight, m_formal_charge) =
|
||||
chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge");
|
||||
|
||||
// The name should not contain newline characters since that triggers validation errors later on
|
||||
cif::replace_all(m_name, "\n", "");
|
||||
|
||||
m_group = "non-polymer";
|
||||
|
||||
auto &chemCompAtom = db["chem_comp_atom"];
|
||||
for (auto row : chemCompAtom)
|
||||
{
|
||||
CompoundAtom atom;
|
||||
std::string typeSymbol;
|
||||
cif::tie(atom.id, typeSymbol, atom.charge, atom.aromatic, atom.leavingAtom, atom.stereoConfig, atom.x, atom.y, atom.z) =
|
||||
compound_atom atom;
|
||||
std::string type_symbol;
|
||||
cif::tie(atom.id, type_symbol, atom.charge, atom.aromatic, atom.leaving_atom, atom.stereo_config, atom.x, atom.y, atom.z) =
|
||||
row.get("atom_id", "type_symbol", "charge", "pdbx_aromatic_flag", "pdbx_leaving_atom_flag", "pdbx_stereo_config",
|
||||
"model_Cartn_x", "model_Cartn_y", "model_Cartn_z");
|
||||
atom.typeSymbol = AtomTypeTraits(typeSymbol).type();
|
||||
mAtoms.push_back(std::move(atom));
|
||||
atom.type_symbol = atom_type_traits(type_symbol).type();
|
||||
m_atoms.push_back(std::move(atom));
|
||||
}
|
||||
|
||||
auto &chemCompBond = db["chem_comp_bond"];
|
||||
for (auto row : chemCompBond)
|
||||
{
|
||||
CompoundBond bond;
|
||||
compound_bond bond;
|
||||
std::string valueOrder;
|
||||
cif::tie(bond.atomID[0], bond.atomID[1], valueOrder, bond.aromatic, bond.stereoConfig) = row.get("atom_id_1", "atom_id_2", "value_order", "pdbx_aromatic_flag", "pdbx_stereo_config");
|
||||
cif::tie(bond.atom_id[0], bond.atom_id[1], valueOrder, bond.aromatic, bond.stereo_config) = row.get("atom_id_1", "atom_id_2", "value_order", "pdbx_aromatic_flag", "pdbx_stereo_config");
|
||||
bond.type = from_string(valueOrder);
|
||||
mBonds.push_back(std::move(bond));
|
||||
m_bonds.push_back(std::move(bond));
|
||||
}
|
||||
}
|
||||
|
||||
Compound::Compound(cif::Datablock &db, const std::string &id, const std::string &name, const std::string &type)
|
||||
: mID(id)
|
||||
, mName(name)
|
||||
, mType(type)
|
||||
compound::compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group)
|
||||
: m_id(id)
|
||||
, m_name(name)
|
||||
, m_type(type)
|
||||
, m_group(group)
|
||||
{
|
||||
auto &chemCompAtom = db["chem_comp_atom"];
|
||||
for (auto row : chemCompAtom)
|
||||
{
|
||||
CompoundAtom atom;
|
||||
std::string typeSymbol;
|
||||
cif::tie(atom.id, typeSymbol, atom.charge, atom.x, atom.y, atom.z) =
|
||||
compound_atom atom;
|
||||
std::string type_symbol;
|
||||
cif::tie(atom.id, type_symbol, atom.charge, atom.x, atom.y, atom.z) =
|
||||
row.get("atom_id", "type_symbol", "charge", "x", "y", "z");
|
||||
atom.typeSymbol = AtomTypeTraits(typeSymbol).type();
|
||||
atom.type_symbol = atom_type_traits(type_symbol).type();
|
||||
|
||||
mFormalCharge += atom.charge;
|
||||
mFormulaWeight += AtomTypeTraits(atom.typeSymbol).weight();
|
||||
m_formal_charge += atom.charge;
|
||||
m_formula_weight += atom_type_traits(atom.type_symbol).weight();
|
||||
|
||||
mAtoms.push_back(std::move(atom));
|
||||
m_atoms.push_back(std::move(atom));
|
||||
}
|
||||
|
||||
auto &chemCompBond = db["chem_comp_bond"];
|
||||
for (auto row : chemCompBond)
|
||||
{
|
||||
CompoundBond bond;
|
||||
compound_bond bond;
|
||||
std::string btype;
|
||||
cif::tie(bond.atomID[0], bond.atomID[1], btype, bond.aromatic) = row.get("atom_id_1", "atom_id_2", "type", "aromatic");
|
||||
cif::tie(bond.atom_id[0], bond.atom_id[1], btype, bond.aromatic) = row.get("atom_id_1", "atom_id_2", "type", "aromatic");
|
||||
|
||||
using cif::iequals;
|
||||
|
||||
if (iequals(btype, "single"))
|
||||
bond.type = BondType::sing;
|
||||
bond.type = bond_type::sing;
|
||||
else if (iequals(btype, "double"))
|
||||
bond.type = BondType::doub;
|
||||
bond.type = bond_type::doub;
|
||||
else if (iequals(btype, "triple"))
|
||||
bond.type = BondType::trip;
|
||||
bond.type = bond_type::trip;
|
||||
else if (iequals(btype, "deloc") or iequals(btype, "aromat") or iequals(btype, "aromatic"))
|
||||
bond.type = BondType::delo;
|
||||
bond.type = bond_type::delo;
|
||||
else
|
||||
{
|
||||
if (cif::VERBOSE)
|
||||
if (cif::VERBOSE > 0)
|
||||
std::cerr << "Unimplemented chem_comp_bond.type " << btype << " in " << id << std::endl;
|
||||
bond.type = BondType::sing;
|
||||
bond.type = bond_type::sing;
|
||||
}
|
||||
mBonds.push_back(std::move(bond));
|
||||
m_bonds.push_back(std::move(bond));
|
||||
}
|
||||
}
|
||||
|
||||
CompoundAtom Compound::getAtomByID(const std::string &atomID) const
|
||||
compound_atom compound::get_atom_by_atom_id(const std::string &atom_id) const
|
||||
{
|
||||
CompoundAtom result = {};
|
||||
for (auto &a : mAtoms)
|
||||
compound_atom result = {};
|
||||
for (auto &a : m_atoms)
|
||||
{
|
||||
if (a.id == atomID)
|
||||
if (a.id == atom_id)
|
||||
{
|
||||
result = a;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (result.id != atomID)
|
||||
throw std::out_of_range("No atom " + atomID + " in Compound " + mID);
|
||||
if (result.id != atom_id)
|
||||
throw std::out_of_range("No atom " + atom_id + " in compound " + m_id);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool Compound::atomsBonded(const std::string &atomId_1, const std::string &atomId_2) const
|
||||
bool compound::atoms_bonded(const std::string &atomId_1, const std::string &atomId_2) const
|
||||
{
|
||||
auto i = find_if(mBonds.begin(), mBonds.end(),
|
||||
[&](const CompoundBond &b) {
|
||||
return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2) or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
|
||||
auto i = find_if(m_bonds.begin(), m_bonds.end(),
|
||||
[&](const compound_bond &b) {
|
||||
return (b.atom_id[0] == atomId_1 and b.atom_id[1] == atomId_2) or (b.atom_id[0] == atomId_2 and b.atom_id[1] == atomId_1);
|
||||
});
|
||||
|
||||
return i != mBonds.end();
|
||||
return i != m_bonds.end();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// a factory class to generate compounds
|
||||
// known amino acids and bases
|
||||
|
||||
CIFPP_EXPORT const std::map<std::string, char> kAAMap{
|
||||
const std::map<std::string, char> compound_factory::kAAMap{
|
||||
{"ALA", 'A'},
|
||||
{"ARG", 'R'},
|
||||
{"ASN", 'N'},
|
||||
@@ -250,7 +249,7 @@ CIFPP_EXPORT const std::map<std::string, char> kAAMap{
|
||||
{"GLX", 'Z'},
|
||||
{"ASX", 'B'}};
|
||||
|
||||
CIFPP_EXPORT const std::map<std::string, char> kBaseMap{
|
||||
const std::map<std::string, char> compound_factory::kBaseMap{
|
||||
{"A", 'A'},
|
||||
{"C", 'C'},
|
||||
{"G", 'G'},
|
||||
@@ -262,32 +261,33 @@ CIFPP_EXPORT const std::map<std::string, char> kBaseMap{
|
||||
{"DT", 'T'}};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// a factory class to generate compounds
|
||||
|
||||
class CompoundFactoryImpl : public std::enable_shared_from_this<CompoundFactoryImpl>
|
||||
class compound_factory_impl : public std::enable_shared_from_this<compound_factory_impl>
|
||||
{
|
||||
public:
|
||||
CompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next);
|
||||
compound_factory_impl(std::shared_ptr<compound_factory_impl> next);
|
||||
|
||||
CompoundFactoryImpl(const std::filesystem::path &file, std::shared_ptr<CompoundFactoryImpl> next);
|
||||
compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next);
|
||||
|
||||
virtual ~CompoundFactoryImpl()
|
||||
virtual ~compound_factory_impl()
|
||||
{
|
||||
for (auto c: mCompounds)
|
||||
for (auto c: m_compounds)
|
||||
delete c;
|
||||
}
|
||||
|
||||
Compound *get(std::string id)
|
||||
compound *get(std::string id)
|
||||
{
|
||||
std::shared_lock lock(mMutex);
|
||||
|
||||
ba::to_upper(id);
|
||||
cif::to_upper(id);
|
||||
|
||||
Compound *result = nullptr;
|
||||
compound *result = nullptr;
|
||||
|
||||
// walk the list, see if any of us has the compound already
|
||||
for (auto impl = shared_from_this(); impl; impl = impl->mNext)
|
||||
for (auto impl = shared_from_this(); impl; impl = impl->m_next)
|
||||
{
|
||||
for (auto cmp : impl->mCompounds)
|
||||
for (auto cmp : impl->m_compounds)
|
||||
{
|
||||
if (cmp->id() == id)
|
||||
{
|
||||
@@ -300,9 +300,9 @@ class CompoundFactoryImpl : public std::enable_shared_from_this<CompoundFactoryI
|
||||
break;
|
||||
}
|
||||
|
||||
if (result == nullptr and mMissing.count(id) == 0)
|
||||
if (result == nullptr and m_missing.count(id) == 0)
|
||||
{
|
||||
for (auto impl = shared_from_this(); impl; impl = impl->mNext)
|
||||
for (auto impl = shared_from_this(); impl; impl = impl->m_next)
|
||||
{
|
||||
result = impl->create(id);
|
||||
if (result != nullptr)
|
||||
@@ -310,32 +310,32 @@ class CompoundFactoryImpl : public std::enable_shared_from_this<CompoundFactoryI
|
||||
}
|
||||
|
||||
if (result == nullptr)
|
||||
mMissing.insert(id);
|
||||
m_missing.insert(id);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::shared_ptr<CompoundFactoryImpl> next() const
|
||||
std::shared_ptr<compound_factory_impl> next() const
|
||||
{
|
||||
return mNext;
|
||||
return m_next;
|
||||
}
|
||||
|
||||
bool isKnownPeptide(const std::string &resName)
|
||||
bool is_known_peptide(const std::string &resName)
|
||||
{
|
||||
return mKnownPeptides.count(resName) or
|
||||
(mNext and mNext->isKnownPeptide(resName));
|
||||
return m_known_peptides.count(resName) or
|
||||
(m_next and m_next->is_known_peptide(resName));
|
||||
}
|
||||
|
||||
bool isKnownBase(const std::string &resName)
|
||||
bool is_known_base(const std::string &resName)
|
||||
{
|
||||
return mKnownBases.count(resName) or
|
||||
(mNext and mNext->isKnownBase(resName));
|
||||
return m_known_bases.count(resName) or
|
||||
(m_next and m_next->is_known_base(resName));
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
virtual Compound *create(const std::string &id)
|
||||
virtual compound *create(const std::string &id)
|
||||
{
|
||||
// For the base class we assume every compound is preloaded
|
||||
return nullptr;
|
||||
@@ -343,34 +343,34 @@ class CompoundFactoryImpl : public std::enable_shared_from_this<CompoundFactoryI
|
||||
|
||||
std::shared_timed_mutex mMutex;
|
||||
|
||||
std::vector<Compound *> mCompounds;
|
||||
std::set<std::string> mKnownPeptides;
|
||||
std::set<std::string> mKnownBases;
|
||||
std::set<std::string> mMissing;
|
||||
std::shared_ptr<CompoundFactoryImpl> mNext;
|
||||
std::vector<compound *> m_compounds;
|
||||
std::set<std::string> m_known_peptides;
|
||||
std::set<std::string> m_known_bases;
|
||||
std::set<std::string> m_missing;
|
||||
std::shared_ptr<compound_factory_impl> m_next;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
CompoundFactoryImpl::CompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next)
|
||||
: mNext(next)
|
||||
compound_factory_impl::compound_factory_impl(std::shared_ptr<compound_factory_impl> next)
|
||||
: m_next(next)
|
||||
{
|
||||
for (const auto &[key, value] : kAAMap)
|
||||
mKnownPeptides.insert(key);
|
||||
for (const auto &[key, value] : compound_factory::kAAMap)
|
||||
m_known_peptides.insert(key);
|
||||
|
||||
for (const auto &[key, value] : kBaseMap)
|
||||
mKnownBases.insert(key);
|
||||
for (const auto &[key, value] : compound_factory::kBaseMap)
|
||||
m_known_bases.insert(key);
|
||||
}
|
||||
|
||||
CompoundFactoryImpl::CompoundFactoryImpl(const std::filesystem::path &file, std::shared_ptr<CompoundFactoryImpl> next)
|
||||
: mNext(next)
|
||||
compound_factory_impl::compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next)
|
||||
: m_next(next)
|
||||
{
|
||||
cif::File cifFile(file);
|
||||
cif::file cifFile(file);
|
||||
|
||||
auto compList = cifFile.get("comp_list");
|
||||
if (compList) // So this is a CCP4 restraints file, special handling
|
||||
if (cifFile.contains("comp_list")) // So this is a CCP4 restraints file, special handling
|
||||
{
|
||||
auto &chemComp = (*compList)["chem_comp"];
|
||||
auto &compList = cifFile["comp_list"];
|
||||
auto &chemComp = compList["chem_comp"];
|
||||
|
||||
for (const auto &[id, name, group] : chemComp.rows<std::string, std::string, std::string>("id", "name", "group"))
|
||||
{
|
||||
@@ -397,7 +397,7 @@ CompoundFactoryImpl::CompoundFactoryImpl(const std::filesystem::path &file, std:
|
||||
|
||||
if (cif::iequals(id, "gly"))
|
||||
type = "peptide linking";
|
||||
else if (cif::iequals(group, "l-peptide") or cif::iequals(group, "L-peptide linking") or cif::iequals(group, "peptide"))
|
||||
else if (cif::iequals(group, "l-peptide") or cif::iequals(group, "L-peptide linking") or cif::iequals(group, "peptide") or cif::iequals(group, "p-peptide"))
|
||||
type = "L-peptide linking";
|
||||
else if (cif::iequals(group, "DNA"))
|
||||
type = "DNA linking";
|
||||
@@ -408,61 +408,61 @@ CompoundFactoryImpl::CompoundFactoryImpl(const std::filesystem::path &file, std:
|
||||
|
||||
auto &db = cifFile["comp_" + id];
|
||||
|
||||
mCompounds.push_back(new Compound(db, id, name, type));
|
||||
m_compounds.push_back(new compound(db, id, name, type, group));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// A CCD components file, validate it first
|
||||
cifFile.loadDictionary("mmcif_pdbx_v50");
|
||||
cifFile.load_dictionary("mmcif_pdbx");
|
||||
|
||||
if (not cifFile.isValid())
|
||||
if (not cifFile.is_valid())
|
||||
throw std::runtime_error("Invalid compound file");
|
||||
|
||||
for (auto &db : cifFile)
|
||||
mCompounds.push_back(new Compound(db));
|
||||
m_compounds.push_back(new compound(db));
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Version for the default compounds, based on the cached components.cif file from CCD
|
||||
|
||||
class CCDCompoundFactoryImpl : public CompoundFactoryImpl
|
||||
class CCD_compound_factory_impl : public compound_factory_impl
|
||||
{
|
||||
public:
|
||||
CCDCompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next, const fs::path& file)
|
||||
: CompoundFactoryImpl(next)
|
||||
CCD_compound_factory_impl(std::shared_ptr<compound_factory_impl> next, const fs::path& file)
|
||||
: compound_factory_impl(next)
|
||||
, mCompoundsFile(file)
|
||||
{
|
||||
}
|
||||
|
||||
CCDCompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next)
|
||||
: CompoundFactoryImpl(next)
|
||||
CCD_compound_factory_impl(std::shared_ptr<compound_factory_impl> next)
|
||||
: compound_factory_impl(next)
|
||||
{
|
||||
}
|
||||
|
||||
Compound *create(const std::string &id) override;
|
||||
compound *create(const std::string &id) override;
|
||||
|
||||
cif::DatablockIndex mIndex;
|
||||
cif::parser::datablock_index mIndex;
|
||||
fs::path mCompoundsFile;
|
||||
};
|
||||
|
||||
Compound *CCDCompoundFactoryImpl::create(const std::string &id)
|
||||
compound *CCD_compound_factory_impl::create(const std::string &id)
|
||||
{
|
||||
Compound *result = nullptr;
|
||||
compound *result = nullptr;
|
||||
|
||||
std::unique_ptr<std::istream> ccd;
|
||||
|
||||
if (mCompoundsFile.empty())
|
||||
{
|
||||
ccd = cif::loadResource("components.cif");
|
||||
ccd = cif::load_resource("components.cif");
|
||||
if (not ccd)
|
||||
throw std::runtime_error("Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data.");
|
||||
}
|
||||
else
|
||||
ccd.reset(new std::ifstream(mCompoundsFile));
|
||||
|
||||
cif::File file;
|
||||
cif::file file;
|
||||
|
||||
if (mIndex.empty())
|
||||
{
|
||||
@@ -473,8 +473,8 @@ Compound *CCDCompoundFactoryImpl::create(const std::string &id)
|
||||
std::cout.flush();
|
||||
}
|
||||
|
||||
cif::Parser parser(*ccd, file, false);
|
||||
mIndex = parser.indexDatablocks();
|
||||
cif::parser parser(*ccd, file);
|
||||
mIndex = parser.index_datablocks();
|
||||
|
||||
if (cif::VERBOSE > 1)
|
||||
std::cout << " done" << std::endl;
|
||||
@@ -482,7 +482,7 @@ Compound *CCDCompoundFactoryImpl::create(const std::string &id)
|
||||
// reload the resource, perhaps this should be improved...
|
||||
if (mCompoundsFile.empty())
|
||||
{
|
||||
ccd = cif::loadResource("components.cif");
|
||||
ccd = cif::load_resource("components.cif");
|
||||
if (not ccd)
|
||||
throw std::runtime_error("Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data.");
|
||||
}
|
||||
@@ -496,25 +496,25 @@ Compound *CCDCompoundFactoryImpl::create(const std::string &id)
|
||||
std::cout.flush();
|
||||
}
|
||||
|
||||
cif::Parser parser(*ccd, file, false);
|
||||
parser.parseSingleDatablock(id, mIndex);
|
||||
cif::parser parser(*ccd, file);
|
||||
parser.parse_single_datablock(id, mIndex);
|
||||
|
||||
if (cif::VERBOSE > 1)
|
||||
std::cout << " done" << std::endl;
|
||||
|
||||
if (not file.empty())
|
||||
{
|
||||
auto &db = file.firstDatablock();
|
||||
if (db.getName() == id)
|
||||
auto &db = file.front();
|
||||
if (db.name() == id)
|
||||
{
|
||||
result = new Compound(db);
|
||||
result = new compound(db);
|
||||
|
||||
std::shared_lock lock(mMutex);
|
||||
mCompounds.push_back(result);
|
||||
m_compounds.push_back(result);
|
||||
}
|
||||
}
|
||||
|
||||
if (result == nullptr and cif::VERBOSE)
|
||||
if (result == nullptr and cif::VERBOSE > 0)
|
||||
std::cerr << "Could not locate compound " << id << " in the CCD components file" << std::endl;
|
||||
|
||||
return result;
|
||||
@@ -523,43 +523,43 @@ Compound *CCDCompoundFactoryImpl::create(const std::string &id)
|
||||
// --------------------------------------------------------------------
|
||||
// Version for the default compounds, based on the data found in CCP4's monomers lib
|
||||
|
||||
class CCP4CompoundFactoryImpl : public CompoundFactoryImpl
|
||||
class CCP4_compound_factory_impl : public compound_factory_impl
|
||||
{
|
||||
public:
|
||||
CCP4CompoundFactoryImpl(const fs::path &clibd_mon, std::shared_ptr<CompoundFactoryImpl> next = nullptr);
|
||||
CCP4_compound_factory_impl(const fs::path &clibd_mon, std::shared_ptr<compound_factory_impl> next = nullptr);
|
||||
|
||||
Compound *create(const std::string &id) override;
|
||||
compound *create(const std::string &id) override;
|
||||
|
||||
private:
|
||||
cif::File mFile;
|
||||
fs::path mCLIBD_MON;
|
||||
cif::file m_file;
|
||||
fs::path m_CLIBD_MON;
|
||||
};
|
||||
|
||||
CCP4CompoundFactoryImpl::CCP4CompoundFactoryImpl(const fs::path &clibd_mon, std::shared_ptr<CompoundFactoryImpl> next)
|
||||
: CompoundFactoryImpl(next)
|
||||
, mFile((clibd_mon / "list" / "mon_lib_list.cif").string())
|
||||
, mCLIBD_MON(clibd_mon)
|
||||
CCP4_compound_factory_impl::CCP4_compound_factory_impl(const fs::path &clibd_mon, std::shared_ptr<compound_factory_impl> next)
|
||||
: compound_factory_impl(next)
|
||||
, m_file((clibd_mon / "list" / "mon_lib_list.cif").string())
|
||||
, m_CLIBD_MON(clibd_mon)
|
||||
{
|
||||
const std::regex peptideRx("(?:[lmp]-)?peptide", std::regex::icase);
|
||||
|
||||
auto &chemComps = mFile["comp_list"]["chem_comp"];
|
||||
auto &chemComps = m_file["comp_list"]["chem_comp"];
|
||||
|
||||
for (const auto &[group, threeLetterCode] : chemComps.rows<std::string, std::string>("group", "three_letter_code"))
|
||||
{
|
||||
if (std::regex_match(group, peptideRx))
|
||||
mKnownPeptides.insert(threeLetterCode);
|
||||
else if (ba::iequals(group, "DNA") or ba::iequals(group, "RNA"))
|
||||
mKnownBases.insert(threeLetterCode);
|
||||
m_known_peptides.insert(threeLetterCode);
|
||||
else if (cif::iequals(group, "DNA") or cif::iequals(group, "RNA"))
|
||||
m_known_bases.insert(threeLetterCode);
|
||||
}
|
||||
}
|
||||
|
||||
Compound *CCP4CompoundFactoryImpl::create(const std::string &id)
|
||||
compound *CCP4_compound_factory_impl::create(const std::string &id)
|
||||
{
|
||||
Compound *result = nullptr;
|
||||
compound *result = nullptr;
|
||||
|
||||
auto &cat = mFile["comp_list"]["chem_comp"];
|
||||
auto &cat = m_file["comp_list"]["chem_comp"];
|
||||
|
||||
auto rs = cat.find(cif::Key("three_letter_code") == id);
|
||||
auto rs = cat.find(cif::key("three_letter_code") == id);
|
||||
|
||||
if (rs.size() == 1)
|
||||
{
|
||||
@@ -570,14 +570,14 @@ Compound *CCP4CompoundFactoryImpl::create(const std::string &id)
|
||||
cif::tie(name, group, numberAtomsAll, numberAtomsNh) =
|
||||
row.get("name", "group", "number_atoms_all", "number_atoms_nh");
|
||||
|
||||
fs::path resFile = mCLIBD_MON / ba::to_lower_copy(id.substr(0, 1)) / (id + ".cif");
|
||||
fs::path resFile = m_CLIBD_MON / cif::to_lower_copy(id.substr(0, 1)) / (id + ".cif");
|
||||
|
||||
if (not fs::exists(resFile) and (id == "COM" or id == "CON" or "PRN")) // seriously...
|
||||
resFile = mCLIBD_MON / ba::to_lower_copy(id.substr(0, 1)) / (id + '_' + id + ".cif");
|
||||
resFile = m_CLIBD_MON / cif::to_lower_copy(id.substr(0, 1)) / (id + '_' + id + ".cif");
|
||||
|
||||
if (fs::exists(resFile))
|
||||
{
|
||||
cif::File cf(resFile.string());
|
||||
cif::file cf(resFile.string());
|
||||
|
||||
// locate the datablock
|
||||
auto &db = cf["comp_" + id];
|
||||
@@ -605,7 +605,7 @@ Compound *CCP4CompoundFactoryImpl::create(const std::string &id)
|
||||
|
||||
if (cif::iequals(id, "gly"))
|
||||
type = "peptide linking";
|
||||
else if (cif::iequals(group, "l-peptide") or cif::iequals(group, "L-peptide linking") or cif::iequals(group, "peptide"))
|
||||
else if (cif::iequals(group, "l-peptide") or cif::iequals(group, "L-peptide linking") or cif::iequals(group, "peptide") or cif::iequals(group, "p-peptide"))
|
||||
type = "L-peptide linking";
|
||||
else if (cif::iequals(group, "DNA"))
|
||||
type = "DNA linking";
|
||||
@@ -614,8 +614,8 @@ Compound *CCP4CompoundFactoryImpl::create(const std::string &id)
|
||||
else
|
||||
type = "non-polymer";
|
||||
|
||||
mCompounds.push_back(new Compound(db, id, name, type));
|
||||
result = mCompounds.back();
|
||||
m_compounds.push_back(new compound(db, id, name, type, group));
|
||||
result = m_compounds.back();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -624,77 +624,78 @@ Compound *CCP4CompoundFactoryImpl::create(const std::string &id)
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::unique_ptr<CompoundFactory> CompoundFactory::sInstance;
|
||||
thread_local std::unique_ptr<CompoundFactory> CompoundFactory::tlInstance;
|
||||
bool CompoundFactory::sUseThreadLocalInstance;
|
||||
std::unique_ptr<compound_factory> compound_factory::s_instance;
|
||||
thread_local std::unique_ptr<compound_factory> compound_factory::tl_instance;
|
||||
bool compound_factory::s_use_thread_local_instance;
|
||||
|
||||
void CompoundFactory::init(bool useThreadLocalInstanceOnly)
|
||||
void compound_factory::init(bool useThreadLocalInstanceOnly)
|
||||
{
|
||||
sUseThreadLocalInstance = useThreadLocalInstanceOnly;
|
||||
s_use_thread_local_instance = useThreadLocalInstanceOnly;
|
||||
}
|
||||
|
||||
CompoundFactory::CompoundFactory()
|
||||
: mImpl(nullptr)
|
||||
compound_factory::compound_factory()
|
||||
: m_impl(nullptr)
|
||||
{
|
||||
auto ccd = cif::loadResource("components.cif");
|
||||
auto ccd = cif::load_resource("components.cif");
|
||||
if (ccd)
|
||||
mImpl.reset(new CCDCompoundFactoryImpl(mImpl));
|
||||
else if (cif::VERBOSE)
|
||||
m_impl.reset(new CCD_compound_factory_impl(m_impl));
|
||||
else if (cif::VERBOSE > 0)
|
||||
std::cerr << "CCD components.cif file was not found" << std::endl;
|
||||
|
||||
const char *clibd_mon = getenv("CLIBD_MON");
|
||||
if (clibd_mon != nullptr and fs::is_directory(clibd_mon))
|
||||
mImpl.reset(new CCP4CompoundFactoryImpl(clibd_mon));
|
||||
else if (cif::VERBOSE)
|
||||
m_impl.reset(new CCP4_compound_factory_impl(clibd_mon));
|
||||
else if (cif::VERBOSE > 0)
|
||||
std::cerr << "CCP4 monomers library not found, CLIBD_MON is not defined" << std::endl;
|
||||
|
||||
}
|
||||
|
||||
CompoundFactory::~CompoundFactory()
|
||||
compound_factory::~compound_factory()
|
||||
{
|
||||
}
|
||||
|
||||
CompoundFactory &CompoundFactory::instance()
|
||||
compound_factory &compound_factory::instance()
|
||||
{
|
||||
if (sUseThreadLocalInstance)
|
||||
if (s_use_thread_local_instance)
|
||||
{
|
||||
if (not tlInstance)
|
||||
tlInstance.reset(new CompoundFactory());
|
||||
return *tlInstance;
|
||||
if (not tl_instance)
|
||||
tl_instance.reset(new compound_factory());
|
||||
return *tl_instance;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (not sInstance)
|
||||
sInstance.reset(new CompoundFactory());
|
||||
return *sInstance;
|
||||
if (not s_instance)
|
||||
s_instance.reset(new compound_factory());
|
||||
return *s_instance;
|
||||
}
|
||||
}
|
||||
|
||||
void CompoundFactory::clear()
|
||||
void compound_factory::clear()
|
||||
{
|
||||
if (sUseThreadLocalInstance)
|
||||
tlInstance.reset(nullptr);
|
||||
if (s_use_thread_local_instance)
|
||||
tl_instance.reset(nullptr);
|
||||
else
|
||||
sInstance.reset();
|
||||
s_instance.reset();
|
||||
}
|
||||
|
||||
void CompoundFactory::setDefaultDictionary(const std::filesystem::path &inDictFile)
|
||||
void compound_factory::set_default_dictionary(const fs::path &inDictFile)
|
||||
{
|
||||
if (not fs::exists(inDictFile))
|
||||
throw std::runtime_error("file not found: " + inDictFile.string());
|
||||
|
||||
try
|
||||
{
|
||||
mImpl.reset(new CCDCompoundFactoryImpl(mImpl, inDictFile));
|
||||
m_impl.reset(new CCD_compound_factory_impl(m_impl, inDictFile));
|
||||
}
|
||||
catch (const std::exception &)
|
||||
{
|
||||
std::cerr << "Error loading dictionary " << inDictFile << std::endl;
|
||||
if (cif::VERBOSE >= 0)
|
||||
std::cerr << "Error loading dictionary " << inDictFile << std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void CompoundFactory::pushDictionary(const std::filesystem::path &inDictFile)
|
||||
void compound_factory::push_dictionary(const fs::path &inDictFile)
|
||||
{
|
||||
if (not fs::exists(inDictFile))
|
||||
throw std::runtime_error("file not found: " + inDictFile.string());
|
||||
@@ -705,42 +706,43 @@ void CompoundFactory::pushDictionary(const std::filesystem::path &inDictFile)
|
||||
|
||||
try
|
||||
{
|
||||
mImpl.reset(new CompoundFactoryImpl(inDictFile, mImpl));
|
||||
m_impl.reset(new compound_factory_impl(inDictFile, m_impl));
|
||||
}
|
||||
catch (const std::exception &)
|
||||
{
|
||||
std::cerr << "Error loading dictionary " << inDictFile << std::endl;
|
||||
if (cif::VERBOSE >= 0)
|
||||
std::cerr << "Error loading dictionary " << inDictFile << std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void CompoundFactory::popDictionary()
|
||||
void compound_factory::pop_dictionary()
|
||||
{
|
||||
if (mImpl)
|
||||
mImpl = mImpl->next();
|
||||
if (m_impl)
|
||||
m_impl = m_impl->next();
|
||||
}
|
||||
|
||||
const Compound *CompoundFactory::create(std::string id)
|
||||
const compound *compound_factory::create(std::string id)
|
||||
{
|
||||
// static bool warned = false;
|
||||
|
||||
// if (mImpl and warned == false)
|
||||
// if (m_impl and warned == false)
|
||||
// {
|
||||
// std::cerr << "Warning: no compound information library was found, resulting data may be incorrect or incomplete" << std::endl;
|
||||
// warned = true;
|
||||
// }
|
||||
|
||||
return mImpl ? mImpl->get(id) : nullptr;
|
||||
return m_impl ? m_impl->get(id) : nullptr;
|
||||
}
|
||||
|
||||
bool CompoundFactory::isKnownPeptide(const std::string &resName) const
|
||||
bool compound_factory::is_known_peptide(const std::string &resName) const
|
||||
{
|
||||
return mImpl ? mImpl->isKnownPeptide(resName) : kAAMap.count(resName) > 0;
|
||||
return m_impl ? m_impl->is_known_peptide(resName) : kAAMap.count(resName) > 0;
|
||||
}
|
||||
|
||||
bool CompoundFactory::isKnownBase(const std::string &resName) const
|
||||
bool compound_factory::is_known_base(const std::string &resName) const
|
||||
{
|
||||
return mImpl ? mImpl->isKnownBase(resName) : kBaseMap.count(resName) > 0;
|
||||
return m_impl ? m_impl->is_known_base(resName) : kBaseMap.count(resName) > 0;
|
||||
}
|
||||
|
||||
} // namespace mmcif
|
||||
} // namespace pdbx
|
||||
138
src/condition.cpp
Normal file
138
src/condition.cpp
Normal file
@@ -0,0 +1,138 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cif++/category.hpp>
|
||||
#include <cif++/condition.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
iset get_category_fields(const category &cat)
|
||||
{
|
||||
return cat.fields();
|
||||
}
|
||||
|
||||
uint16_t get_column_ix(const category &cat, std::string_view col)
|
||||
{
|
||||
return cat.get_column_ix(col);
|
||||
}
|
||||
|
||||
bool is_column_type_uchar(const category &cat, std::string_view col)
|
||||
{
|
||||
bool result = false;
|
||||
|
||||
auto cv = cat.get_cat_validator();
|
||||
if (cv)
|
||||
{
|
||||
auto iv = cv->get_validator_for_item(col);
|
||||
if (iv != nullptr and iv->m_type != nullptr)
|
||||
{
|
||||
auto type = iv->m_type;
|
||||
result = type->m_primitive_type == DDL_PrimitiveType::UChar;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
condition_impl *key_equals_condition_impl::prepare(const category &c)
|
||||
{
|
||||
m_item_ix = get_column_ix(c, m_item_tag);
|
||||
m_icase = is_column_type_uchar(c, m_item_tag);
|
||||
|
||||
if (c.get_cat_validator() != nullptr and
|
||||
c.key_field_indices().contains(m_item_ix) and
|
||||
c.key_field_indices().size() == 1)
|
||||
{
|
||||
m_single_hit = c[{ { m_item_tag, m_value } }];
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
condition_impl *and_condition_impl::prepare(const category &c)
|
||||
{
|
||||
for (auto &sub : mSub)
|
||||
sub = sub->prepare(c);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
auto si = find_if(mSub.begin(), mSub.end(), [](condition_impl *sub) { return dynamic_cast<and_condition_impl *>(sub) != nullptr; });
|
||||
if (si == mSub.end())
|
||||
break;
|
||||
|
||||
and_condition_impl *sub_and = static_cast<and_condition_impl *>(*si);
|
||||
|
||||
mSub.erase(si);
|
||||
|
||||
mSub.insert(mSub.end(), sub_and->mSub.begin(), sub_and->mSub.end());
|
||||
sub_and->mSub.clear();
|
||||
delete sub_and;
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
condition_impl *or_condition_impl::prepare(const category &c)
|
||||
{
|
||||
condition_impl *result = this;
|
||||
|
||||
mA = mA->prepare(c);
|
||||
mB = mB->prepare(c);
|
||||
|
||||
key_equals_condition_impl *equals = dynamic_cast<key_equals_condition_impl*>(mA);
|
||||
key_is_empty_condition_impl *empty = dynamic_cast<key_is_empty_condition_impl*>(mB);
|
||||
|
||||
if (equals == nullptr and empty == nullptr)
|
||||
{
|
||||
equals = dynamic_cast<key_equals_condition_impl*>(mB);
|
||||
empty = dynamic_cast<key_is_empty_condition_impl*>(mA);
|
||||
}
|
||||
|
||||
if (equals != nullptr and empty != nullptr)
|
||||
{
|
||||
result = new detail::key_equals_or_empty_condition_impl(equals, empty);
|
||||
result = result->prepare(c);
|
||||
delete this;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
void condition::prepare(const category &c)
|
||||
{
|
||||
if (m_impl)
|
||||
m_impl = m_impl->prepare(c);
|
||||
|
||||
m_prepared = true;
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
319
src/datablock.cpp
Normal file
319
src/datablock.cpp
Normal file
@@ -0,0 +1,319 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cif++/datablock.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
void datablock::set_validator(const validator *v)
|
||||
{
|
||||
m_validator = v;
|
||||
|
||||
for (auto &cat : *this)
|
||||
cat.set_validator(v, *this);
|
||||
}
|
||||
|
||||
const validator *datablock::get_validator() const
|
||||
{
|
||||
return m_validator;
|
||||
}
|
||||
|
||||
bool datablock::is_valid() const
|
||||
{
|
||||
if (m_validator == nullptr)
|
||||
throw std::runtime_error("Validator not specified");
|
||||
|
||||
bool result = true;
|
||||
for (auto &cat : *this)
|
||||
result = cat.is_valid() and result;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool datablock::validate_links() const
|
||||
{
|
||||
bool result = true;
|
||||
|
||||
for (auto &cat : *this)
|
||||
result = cat.validate_links() and result;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
category &datablock::operator[](std::string_view name)
|
||||
{
|
||||
auto i = std::find_if(begin(), end(), [name](const category &c)
|
||||
{ return iequals(c.name(), name); });
|
||||
|
||||
if (i != end())
|
||||
return *i;
|
||||
|
||||
auto &cat = emplace_back(name);
|
||||
|
||||
if (m_validator)
|
||||
cat.set_validator(m_validator, *this);
|
||||
|
||||
return back();
|
||||
}
|
||||
|
||||
const category &datablock::operator[](std::string_view name) const
|
||||
{
|
||||
static const category s_empty;
|
||||
auto i = std::find_if(begin(), end(), [name](const category &c)
|
||||
{ return iequals(c.name(), name); });
|
||||
return i == end() ? s_empty : *i;
|
||||
}
|
||||
|
||||
category *datablock::get(std::string_view name)
|
||||
{
|
||||
auto i = std::find_if(begin(), end(), [name](const category &c)
|
||||
{ return iequals(c.name(), name); });
|
||||
return i == end() ? nullptr : &*i;
|
||||
}
|
||||
|
||||
const category *datablock::get(std::string_view name) const
|
||||
{
|
||||
return const_cast<datablock *>(this)->get(name);
|
||||
}
|
||||
|
||||
std::tuple<datablock::iterator, bool> datablock::emplace(std::string_view name)
|
||||
{
|
||||
bool is_new = true;
|
||||
|
||||
auto i = begin();
|
||||
while (i != end())
|
||||
{
|
||||
if (iequals(name, i->name()))
|
||||
{
|
||||
is_new = false;
|
||||
|
||||
if (i != begin())
|
||||
{
|
||||
auto n = std::next(i);
|
||||
splice(begin(), *this, i, n);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
++i;
|
||||
}
|
||||
|
||||
if (is_new)
|
||||
{
|
||||
auto &c = emplace_front(name);
|
||||
c.set_validator(m_validator, *this);
|
||||
}
|
||||
|
||||
return std::make_tuple(begin(), is_new);
|
||||
}
|
||||
|
||||
std::vector<std::string> datablock::get_tag_order() const
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
|
||||
// for entry and audit_conform on top
|
||||
|
||||
auto ci = find_if(begin(), end(), [](const category &cat) { return cat.name() == "entry"; });
|
||||
if (ci != end())
|
||||
{
|
||||
auto cto = ci->get_tag_order();
|
||||
result.insert(result.end(), cto.begin(), cto.end());
|
||||
}
|
||||
|
||||
ci = find_if(begin(), end(), [](const category &cat) { return cat.name() == "audit_conform"; });
|
||||
if (ci != end())
|
||||
{
|
||||
auto cto = ci->get_tag_order();
|
||||
result.insert(result.end(), cto.begin(), cto.end());
|
||||
}
|
||||
|
||||
for (auto &cat : *this)
|
||||
{
|
||||
if (cat.name() == "entry" or cat.name() == "audit_conform")
|
||||
continue;
|
||||
auto cto = cat.get_tag_order();
|
||||
result.insert(result.end(), cto.begin(), cto.end());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void datablock::write(std::ostream &os) const
|
||||
{
|
||||
os << "data_" << m_name << std::endl
|
||||
<< "# " << std::endl;
|
||||
|
||||
// mmcif support, sort of. First write the 'entry' Category
|
||||
// and if it exists, _AND_ we have a Validator, write out the
|
||||
// audit_conform record.
|
||||
|
||||
for (auto &cat : *this)
|
||||
{
|
||||
if (cat.name() != "entry")
|
||||
continue;
|
||||
|
||||
cat.write(os);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// If the dictionary declares an audit_conform category, put it in,
|
||||
// but only if it does not exist already!
|
||||
if (get("audit_conform"))
|
||||
get("audit_conform")->write(os);
|
||||
else if (m_validator != nullptr and m_validator->get_validator_for_category("audit_conform") != nullptr)
|
||||
{
|
||||
category auditConform("audit_conform");
|
||||
auditConform.emplace({
|
||||
{"dict_name", m_validator->name()},
|
||||
{"dict_version", m_validator->version()}});
|
||||
auditConform.write(os);
|
||||
}
|
||||
|
||||
for (auto &cat : *this)
|
||||
{
|
||||
if (cat.name() != "entry" and cat.name() != "audit_conform")
|
||||
cat.write(os);
|
||||
}
|
||||
}
|
||||
|
||||
void datablock::write(std::ostream &os, const std::vector<std::string> &tag_order)
|
||||
{
|
||||
os << "data_" << m_name << std::endl
|
||||
<< "# " << std::endl;
|
||||
|
||||
std::vector<std::string> cat_order;
|
||||
for (auto &o : tag_order)
|
||||
{
|
||||
std::string cat_name, item_name;
|
||||
std::tie(cat_name, item_name) = split_tag_name(o);
|
||||
if (find_if(cat_order.rbegin(), cat_order.rend(), [cat_name](const std::string &s) -> bool
|
||||
{ return iequals(cat_name, s); }) == cat_order.rend())
|
||||
cat_order.push_back(cat_name);
|
||||
}
|
||||
|
||||
for (auto &c : cat_order)
|
||||
{
|
||||
auto cat = get(c);
|
||||
if (cat == nullptr)
|
||||
continue;
|
||||
|
||||
std::vector<std::string> items;
|
||||
for (auto &o : tag_order)
|
||||
{
|
||||
std::string cat_name, item_name;
|
||||
std::tie(cat_name, item_name) = split_tag_name(o);
|
||||
|
||||
if (cat_name == c)
|
||||
items.push_back(item_name);
|
||||
}
|
||||
|
||||
cat->write(os, items);
|
||||
}
|
||||
|
||||
// for any Category we missed in the catOrder
|
||||
for (auto &cat : *this)
|
||||
{
|
||||
if (find_if(cat_order.begin(), cat_order.end(), [&](const std::string &s) -> bool
|
||||
{ return iequals(cat.name(), s); }) != cat_order.end())
|
||||
continue;
|
||||
|
||||
cat.write(os);
|
||||
}
|
||||
}
|
||||
|
||||
bool datablock::operator==(const datablock &rhs) const
|
||||
{
|
||||
auto &dbA = *this;
|
||||
auto &dbB = rhs;
|
||||
|
||||
std::vector<std::string> catA, catB;
|
||||
|
||||
for (auto &cat : dbA)
|
||||
{
|
||||
if (not cat.empty())
|
||||
catA.push_back(cat.name());
|
||||
}
|
||||
std::sort(catA.begin(), catA.end());
|
||||
|
||||
for (auto &cat : dbB)
|
||||
{
|
||||
if (not cat.empty())
|
||||
catB.push_back(cat.name());
|
||||
}
|
||||
std::sort(catB.begin(), catB.end());
|
||||
|
||||
// loop over categories twice, to group output
|
||||
// First iteration is to list missing categories.
|
||||
|
||||
std::vector<std::string> missingA, missingB;
|
||||
|
||||
auto catA_i = catA.begin(), catB_i = catB.begin();
|
||||
|
||||
while (catA_i != catA.end() and catB_i != catB.end())
|
||||
{
|
||||
if (not iequals(*catA_i, *catB_i))
|
||||
return false;
|
||||
|
||||
++catA_i, ++catB_i;
|
||||
}
|
||||
|
||||
if (catA_i != catA.end() or catB_i != catB.end())
|
||||
return false;
|
||||
|
||||
// Second loop, now compare category values
|
||||
catA_i = catA.begin(), catB_i = catB.begin();
|
||||
|
||||
while (catA_i != catA.end() and catB_i != catB.end())
|
||||
{
|
||||
std::string nA = *catA_i;
|
||||
to_lower(nA);
|
||||
|
||||
std::string nB = *catB_i;
|
||||
to_lower(nB);
|
||||
|
||||
int d = nA.compare(nB);
|
||||
if (d > 0)
|
||||
++catB_i;
|
||||
else if (d < 0)
|
||||
++catA_i;
|
||||
else
|
||||
{
|
||||
if (not (*dbA.get(*catA_i) == *dbB.get(*catB_i)))
|
||||
return false;
|
||||
++catA_i;
|
||||
++catB_i;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace cif::cif
|
||||
490
src/dictionary_parser.cpp
Normal file
490
src/dictionary_parser.cpp
Normal file
@@ -0,0 +1,490 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cif++/condition.hpp>
|
||||
#include <cif++/dictionary_parser.hpp>
|
||||
#include <cif++/file.hpp>
|
||||
#include <cif++/parser.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
using namespace literals;
|
||||
|
||||
inline void replace_all(std::string &s, std::string_view pat, std::string_view rep)
|
||||
{
|
||||
for (std::string::size_type i = s.find(pat); i != std::string::npos; i = s.find(pat, i))
|
||||
s.replace(i, pat.size(), rep.data(), rep.size());
|
||||
}
|
||||
|
||||
class dictionary_parser : public parser
|
||||
{
|
||||
public:
|
||||
dictionary_parser(validator &validator, std::istream &is, file &f)
|
||||
: parser(is, f)
|
||||
, m_validator(validator)
|
||||
{
|
||||
}
|
||||
|
||||
void load_dictionary()
|
||||
{
|
||||
std::unique_ptr<datablock> dict;
|
||||
auto savedDatablock = m_datablock;
|
||||
|
||||
try
|
||||
{
|
||||
while (m_lookahead != CIFToken::Eof)
|
||||
{
|
||||
switch (m_lookahead)
|
||||
{
|
||||
case CIFToken::GLOBAL:
|
||||
parse_global();
|
||||
break;
|
||||
|
||||
default:
|
||||
{
|
||||
dict.reset(new datablock(m_token_value)); // dummy datablock, for constructing the validator only
|
||||
m_datablock = dict.get();
|
||||
|
||||
match(CIFToken::DATA);
|
||||
parse_datablock();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (const std::exception &ex)
|
||||
{
|
||||
error(ex.what());
|
||||
}
|
||||
|
||||
// store all validators
|
||||
for (auto &ic : mCategoryValidators)
|
||||
m_validator.add_category_validator(std::move(ic));
|
||||
mCategoryValidators.clear();
|
||||
|
||||
for (auto &iv : mItemValidators)
|
||||
{
|
||||
auto cv = m_validator.get_validator_for_category(iv.first);
|
||||
if (cv == nullptr)
|
||||
error("Undefined category '" + iv.first);
|
||||
|
||||
for (auto &v : iv.second)
|
||||
const_cast<category_validator *>(cv)->addItemValidator(std::move(v));
|
||||
}
|
||||
|
||||
// check all item validators for having a typeValidator
|
||||
|
||||
if (dict)
|
||||
link_items();
|
||||
|
||||
// store meta information
|
||||
datablock::iterator info;
|
||||
bool is_new;
|
||||
std::tie(info, is_new) = m_datablock->emplace("dictionary");
|
||||
if (not is_new and not info->empty())
|
||||
{
|
||||
auto r = info->front();
|
||||
m_validator.set_name(r["title"].as<std::string>());
|
||||
m_validator.version(r["version"].as<std::string>());
|
||||
}
|
||||
|
||||
m_datablock = savedDatablock;
|
||||
|
||||
mItemValidators.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
void parse_save_frame() override
|
||||
{
|
||||
if (not m_collected_item_types)
|
||||
m_collected_item_types = collect_item_types();
|
||||
|
||||
std::string saveFrameName = m_token_value;
|
||||
|
||||
if (saveFrameName.empty())
|
||||
error("Invalid save frame, should contain more than just 'save_' here");
|
||||
|
||||
bool isCategorySaveFrame = m_token_value[0] != '_';
|
||||
|
||||
datablock dict(m_token_value);
|
||||
datablock::iterator cat = dict.end();
|
||||
|
||||
match(CIFToken::SAVE);
|
||||
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag)
|
||||
{
|
||||
if (m_lookahead == CIFToken::LOOP)
|
||||
{
|
||||
cat = dict.end(); // should start a new category
|
||||
|
||||
match(CIFToken::LOOP);
|
||||
|
||||
std::vector<std::string> tags;
|
||||
while (m_lookahead == CIFToken::Tag)
|
||||
{
|
||||
std::string catName, item_name;
|
||||
std::tie(catName, item_name) = split_tag_name(m_token_value);
|
||||
|
||||
if (cat == dict.end())
|
||||
std::tie(cat, std::ignore) = dict.emplace(catName);
|
||||
else if (not iequals(cat->name(), catName))
|
||||
error("inconsistent categories in loop_");
|
||||
|
||||
tags.push_back(item_name);
|
||||
match(CIFToken::Tag);
|
||||
}
|
||||
|
||||
while (m_lookahead == CIFToken::Value)
|
||||
{
|
||||
cat->emplace({});
|
||||
auto row = cat->back();
|
||||
|
||||
for (auto tag : tags)
|
||||
{
|
||||
row[tag] = m_token_value;
|
||||
match(CIFToken::Value);
|
||||
}
|
||||
}
|
||||
|
||||
cat = dict.end();
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string catName, item_name;
|
||||
std::tie(catName, item_name) = split_tag_name(m_token_value);
|
||||
|
||||
if (cat == dict.end() or not iequals(cat->name(), catName))
|
||||
std::tie(cat, std::ignore) = dict.emplace(catName);
|
||||
|
||||
match(CIFToken::Tag);
|
||||
|
||||
if (cat->empty())
|
||||
cat->emplace({});
|
||||
cat->back()[item_name] = m_token_value;
|
||||
|
||||
match(CIFToken::Value);
|
||||
}
|
||||
}
|
||||
|
||||
match(CIFToken::SAVE);
|
||||
|
||||
if (isCategorySaveFrame)
|
||||
{
|
||||
std::string category = dict["category"].front().get<std::string>("id");
|
||||
|
||||
std::vector<std::string> keys;
|
||||
for (auto k : dict["category_key"])
|
||||
keys.push_back(std::get<1>(split_tag_name(k["name"].as<std::string>())));
|
||||
|
||||
iset groups;
|
||||
for (auto g : dict["category_group"])
|
||||
groups.insert(g["id"].as<std::string>());
|
||||
|
||||
mCategoryValidators.push_back(category_validator{ category, keys, groups });
|
||||
}
|
||||
else
|
||||
{
|
||||
// if the type code is missing, this must be a pointer, just skip it
|
||||
std::string typeCode = dict["item_type"].front().get<std::string>("code");
|
||||
|
||||
const type_validator *tv = nullptr;
|
||||
if (not(typeCode.empty() or typeCode == "?"))
|
||||
tv = m_validator.get_validator_for_type(typeCode);
|
||||
|
||||
iset ess;
|
||||
for (auto e : dict["item_enumeration"])
|
||||
ess.insert(e["value"].as<std::string>());
|
||||
|
||||
std::string defaultValue = dict["item_default"].front().get<std::string>("value");
|
||||
bool defaultIsNull = false;
|
||||
if (defaultValue.empty())
|
||||
{
|
||||
// TODO: Is this correct???
|
||||
for (auto r : dict["_item_default"])
|
||||
{
|
||||
defaultIsNull = r["value"].is_null();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// collect the dict from our dataBlock and construct validators
|
||||
for (auto i : dict["item"])
|
||||
{
|
||||
std::string tagName, category, mandatory;
|
||||
cif::tie(tagName, category, mandatory) = i.get("name", "category_id", "mandatory_code");
|
||||
|
||||
std::string cat_name, item_name;
|
||||
std::tie(cat_name, item_name) = split_tag_name(tagName);
|
||||
|
||||
if (cat_name.empty() or item_name.empty())
|
||||
error("Invalid tag name in _item.name " + tagName);
|
||||
|
||||
if (not iequals(category, cat_name) and not(category.empty() or category == "?"))
|
||||
error("specified category id does match the implicit category name for tag '" + tagName + '\'');
|
||||
else
|
||||
category = cat_name;
|
||||
|
||||
auto &ivs = mItemValidators[category];
|
||||
|
||||
auto vi = find(ivs.begin(), ivs.end(), item_validator{ item_name });
|
||||
if (vi == ivs.end())
|
||||
ivs.push_back(item_validator{ item_name, iequals(mandatory, "yes"), tv, ess, defaultValue, defaultIsNull });
|
||||
else
|
||||
{
|
||||
// need to update the itemValidator?
|
||||
if (vi->m_mandatory != (iequals(mandatory, "yes")))
|
||||
{
|
||||
if (VERBOSE > 2)
|
||||
{
|
||||
std::cerr << "inconsistent mandatory value for " << tagName << " in dictionary" << std::endl;
|
||||
|
||||
if (iequals(tagName, saveFrameName))
|
||||
std::cerr << "choosing " << mandatory << std::endl;
|
||||
else
|
||||
std::cerr << "choosing " << (vi->m_mandatory ? "Y" : "N") << std::endl;
|
||||
}
|
||||
|
||||
if (iequals(tagName, saveFrameName))
|
||||
vi->m_mandatory = (iequals(mandatory, "yes"));
|
||||
}
|
||||
|
||||
if (vi->m_type != nullptr and tv != nullptr and vi->m_type != tv)
|
||||
{
|
||||
if (VERBOSE > 1)
|
||||
std::cerr << "inconsistent type for " << tagName << " in dictionary" << std::endl;
|
||||
}
|
||||
|
||||
// vi->mMandatory = (iequals(mandatory, "yes"));
|
||||
if (vi->m_type == nullptr)
|
||||
vi->m_type = tv;
|
||||
|
||||
vi->m_enums.insert(ess.begin(), ess.end());
|
||||
|
||||
// anything else yet?
|
||||
// ...
|
||||
}
|
||||
}
|
||||
|
||||
// collect the dict from our dataBlock and construct validators
|
||||
for (auto i : dict["item_linked"])
|
||||
{
|
||||
mLinkedItems.emplace(i.get<std::string,std::string>("child_name", "parent_name"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void link_items()
|
||||
{
|
||||
if (not m_datablock)
|
||||
error("no datablock");
|
||||
|
||||
auto &dict = *m_datablock;
|
||||
|
||||
// links are identified by a parent category, a child category and a group ID
|
||||
|
||||
using key_type = std::tuple<std::string, std::string, int>;
|
||||
|
||||
std::map<key_type, size_t> linkIndex;
|
||||
|
||||
// Each link group consists of a set of keys
|
||||
std::vector<std::tuple<std::vector<std::string>, std::vector<std::string>>> linkKeys;
|
||||
|
||||
auto addLink = [&](size_t ix, const std::string &pk, const std::string &ck)
|
||||
{
|
||||
auto &&[pkeys, ckeys] = linkKeys.at(ix);
|
||||
|
||||
bool found = false;
|
||||
for (size_t i = 0; i < pkeys.size(); ++i)
|
||||
{
|
||||
if (pkeys[i] == pk and ckeys[i] == ck)
|
||||
{
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (not found)
|
||||
{
|
||||
pkeys.push_back(pk);
|
||||
ckeys.push_back(ck);
|
||||
}
|
||||
};
|
||||
|
||||
auto &linkedGroupList = dict["pdbx_item_linked_group_list"];
|
||||
|
||||
for (auto gl : linkedGroupList)
|
||||
{
|
||||
std::string child, parent;
|
||||
int link_group_id;
|
||||
cif::tie(child, parent, link_group_id) = gl.get("child_name", "parent_name", "link_group_id");
|
||||
|
||||
auto civ = m_validator.get_validator_for_item(child);
|
||||
if (civ == nullptr)
|
||||
error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
|
||||
|
||||
auto piv = m_validator.get_validator_for_item(parent);
|
||||
if (piv == nullptr)
|
||||
error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
|
||||
|
||||
key_type key{ piv->m_category->m_name, civ->m_category->m_name, link_group_id };
|
||||
if (not linkIndex.count(key))
|
||||
{
|
||||
linkIndex[key] = linkKeys.size();
|
||||
linkKeys.push_back({});
|
||||
}
|
||||
|
||||
size_t ix = linkIndex.at(key);
|
||||
addLink(ix, piv->m_tag, civ->m_tag);
|
||||
}
|
||||
|
||||
// Only process inline linked items if the linked group list is absent
|
||||
if (linkedGroupList.empty())
|
||||
{
|
||||
// for links recorded in categories but not in pdbx_item_linked_group_list
|
||||
for (auto li : mLinkedItems)
|
||||
{
|
||||
std::string child, parent;
|
||||
std::tie(child, parent) = li;
|
||||
|
||||
auto civ = m_validator.get_validator_for_item(child);
|
||||
if (civ == nullptr)
|
||||
error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
|
||||
|
||||
auto piv = m_validator.get_validator_for_item(parent);
|
||||
if (piv == nullptr)
|
||||
error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
|
||||
|
||||
key_type key{ piv->m_category->m_name, civ->m_category->m_name, 0 };
|
||||
if (not linkIndex.count(key))
|
||||
{
|
||||
linkIndex[key] = linkKeys.size();
|
||||
linkKeys.push_back({});
|
||||
}
|
||||
|
||||
size_t ix = linkIndex.at(key);
|
||||
addLink(ix, piv->m_tag, civ->m_tag);
|
||||
}
|
||||
}
|
||||
|
||||
auto &linkedGroup = dict["pdbx_item_linked_group"];
|
||||
|
||||
// now store the links in the validator
|
||||
for (auto &kv : linkIndex)
|
||||
{
|
||||
link_validator link = {};
|
||||
std::tie(link.m_parent_category, link.m_child_category, link.m_link_group_id) = kv.first;
|
||||
|
||||
std::tie(link.m_parent_keys, link.m_child_keys) = linkKeys[kv.second];
|
||||
|
||||
// look up the label
|
||||
for (auto r : linkedGroup.find("category_id"_key == link.m_child_category and "link_group_id"_key == link.m_link_group_id))
|
||||
{
|
||||
link.m_link_group_label = r["label"].as<std::string>();
|
||||
break;
|
||||
}
|
||||
|
||||
m_validator.add_link_validator(std::move(link));
|
||||
}
|
||||
|
||||
// now make sure the itemType is specified for all itemValidators
|
||||
|
||||
for (auto &cv : m_validator.m_category_validators)
|
||||
{
|
||||
for (auto &iv : cv.m_item_validators)
|
||||
{
|
||||
if (iv.m_type == nullptr and cif::VERBOSE >= 0)
|
||||
std::cerr << "Missing item_type for " << iv.m_tag << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool collect_item_types()
|
||||
{
|
||||
bool result = false;
|
||||
|
||||
if (not m_datablock)
|
||||
error("no datablock");
|
||||
|
||||
auto &dict = *m_datablock;
|
||||
|
||||
for (auto t : dict["item_type_list"])
|
||||
{
|
||||
std::string code, primitiveCode, construct;
|
||||
cif::tie(code, primitiveCode, construct) = t.get("code", "primitive_code", "construct");
|
||||
|
||||
replace_all(construct, "\\n", "\n");
|
||||
replace_all(construct, "\\t", "\t");
|
||||
replace_all(construct, "\\\n", "");
|
||||
|
||||
try
|
||||
{
|
||||
type_validator v = {
|
||||
code, map_to_primitive_type(primitiveCode), construct
|
||||
};
|
||||
|
||||
m_validator.add_type_validator(std::move(v));
|
||||
}
|
||||
catch (const std::exception &)
|
||||
{
|
||||
std::throw_with_nested(parse_error(/*t.lineNr()*/ 0, "error in regular expression"));
|
||||
}
|
||||
|
||||
// Do not replace an already defined type validator, this won't work with pdbx_v40
|
||||
// as it has a name that is too strict for its own names :-)
|
||||
// if (mFileImpl.mTypeValidators.count(v))
|
||||
// mFileImpl.mTypeValidators.erase(v);
|
||||
|
||||
if (VERBOSE >= 5)
|
||||
std::cerr << "Added type " << code << " (" << primitiveCode << ") => " << construct << std::endl;
|
||||
|
||||
result = true;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
validator &m_validator;
|
||||
bool m_collected_item_types = false;
|
||||
|
||||
std::vector<category_validator> mCategoryValidators;
|
||||
std::map<std::string, std::vector<item_validator>> mItemValidators;
|
||||
std::set<std::tuple<std::string, std::string>> mLinkedItems;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
validator parse_dictionary(std::string_view name, std::istream &is)
|
||||
{
|
||||
validator result(name);
|
||||
|
||||
file f;
|
||||
dictionary_parser p(result, is, f);
|
||||
p.load_dictionary();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
216
src/file.cpp
Normal file
216
src/file.cpp
Normal file
@@ -0,0 +1,216 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <gxrio.hpp>
|
||||
|
||||
#include <cif++/file.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
void file::set_validator(const validator *v)
|
||||
{
|
||||
m_validator = v;
|
||||
for (auto &db : *this)
|
||||
db.set_validator(v);
|
||||
}
|
||||
|
||||
bool file::is_valid() const
|
||||
{
|
||||
if (m_validator == nullptr)
|
||||
std::runtime_error("No validator loaded explicitly, cannot continue");
|
||||
|
||||
bool result = true;
|
||||
for (auto &d : *this)
|
||||
result = d.is_valid() and result;
|
||||
|
||||
if (result)
|
||||
result = validate_links();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool file::is_valid()
|
||||
{
|
||||
if (m_validator == nullptr)
|
||||
{
|
||||
if (VERBOSE > 0)
|
||||
std::cerr << "No dictionary loaded explicitly, loading default" << std::endl;
|
||||
|
||||
load_dictionary();
|
||||
}
|
||||
|
||||
bool result = not empty();
|
||||
|
||||
for (auto &d : *this)
|
||||
result = d.is_valid() and result;
|
||||
|
||||
if (result)
|
||||
result = validate_links();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool file::validate_links() const
|
||||
{
|
||||
if (m_validator == nullptr)
|
||||
std::runtime_error("No validator loaded explicitly, cannot continue");
|
||||
|
||||
bool result = true;
|
||||
|
||||
for (auto &db : *this)
|
||||
result = db.validate_links() and result;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void file::load_dictionary()
|
||||
{
|
||||
if (not empty())
|
||||
{
|
||||
auto *audit_conform = front().get("audit_conform");
|
||||
if (audit_conform and not audit_conform->empty())
|
||||
{
|
||||
std::string name = audit_conform->front().get<std::string>("dict_name");
|
||||
|
||||
if (not name.empty())
|
||||
{
|
||||
try
|
||||
{
|
||||
load_dictionary(name);
|
||||
}
|
||||
catch (const std::exception &ex)
|
||||
{
|
||||
if (VERBOSE)
|
||||
std::cerr << "Failed to load dictionary " << std::quoted(name) << ": " << ex.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (not m_validator)
|
||||
load_dictionary("mmcif_ddl");
|
||||
}
|
||||
|
||||
void file::load_dictionary(std::string_view name)
|
||||
{
|
||||
set_validator(&validator_factory::instance()[name]);
|
||||
}
|
||||
|
||||
bool file::contains(std::string_view name) const
|
||||
{
|
||||
return std::find_if(begin(), end(), [name](const datablock &db) { return db.name() == name; }) != end();
|
||||
}
|
||||
|
||||
datablock &file::operator[](std::string_view name)
|
||||
{
|
||||
auto i = std::find_if(begin(), end(), [name](const datablock &c)
|
||||
{ return iequals(c.name(), name); });
|
||||
|
||||
if (i != end())
|
||||
return *i;
|
||||
|
||||
emplace_back(name);
|
||||
return back();
|
||||
}
|
||||
|
||||
const datablock &file::operator[](std::string_view name) const
|
||||
{
|
||||
static const datablock s_empty;
|
||||
auto i = std::find_if(begin(), end(), [name](const datablock &c)
|
||||
{ return iequals(c.name(), name); });
|
||||
return i == end() ? s_empty : *i;
|
||||
}
|
||||
|
||||
std::tuple<file::iterator, bool> file::emplace(std::string_view name)
|
||||
{
|
||||
bool is_new = true;
|
||||
|
||||
auto i = begin();
|
||||
while (i != end())
|
||||
{
|
||||
if (iequals(name, i->name()))
|
||||
{
|
||||
is_new = false;
|
||||
|
||||
if (i != begin())
|
||||
{
|
||||
auto n = std::next(i);
|
||||
splice(begin(), *this, i, n);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
++i;
|
||||
}
|
||||
|
||||
if (is_new)
|
||||
{
|
||||
auto &db = emplace_front(name);
|
||||
db.set_validator(m_validator);
|
||||
}
|
||||
|
||||
return std::make_tuple(begin(), is_new);
|
||||
}
|
||||
|
||||
void file::load(const std::filesystem::path &p)
|
||||
{
|
||||
gxrio::ifstream in(p);
|
||||
load(in);
|
||||
}
|
||||
|
||||
void file::load(std::istream &is)
|
||||
{
|
||||
auto saved = m_validator;
|
||||
set_validator(nullptr);
|
||||
|
||||
parser p(is, *this);
|
||||
p.parse_file();
|
||||
|
||||
if (saved != nullptr)
|
||||
set_validator(saved);
|
||||
else
|
||||
load_dictionary();
|
||||
}
|
||||
|
||||
void file::save(const std::filesystem::path &p) const
|
||||
{
|
||||
gxrio::ofstream outFile(p);
|
||||
save(outFile);
|
||||
}
|
||||
|
||||
void file::save(std::ostream &os) const
|
||||
{
|
||||
// if (not is_valid())
|
||||
// std::cout << "File is not valid!" << std::endl;
|
||||
|
||||
for (auto &db : *this)
|
||||
db.write(os);
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
68
src/item.cpp
Normal file
68
src/item.cpp
Normal file
@@ -0,0 +1,68 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include <cif++/row.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
const item_handle item_handle::s_null_item;
|
||||
row_handle s_null_row_handle;
|
||||
|
||||
item_handle::item_handle()
|
||||
: m_column(std::numeric_limits<uint16_t>::max())
|
||||
, m_row_handle(s_null_row_handle)
|
||||
{
|
||||
}
|
||||
|
||||
std::string_view item_handle::text() const
|
||||
{
|
||||
if (not m_row_handle.empty())
|
||||
{
|
||||
auto iv = m_row_handle.m_row->get(m_column);
|
||||
if (iv != nullptr)
|
||||
return iv->text();
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
void item_handle::assign_value(const item &v)
|
||||
{
|
||||
assert(not m_row_handle.empty());
|
||||
m_row_handle.assign(m_column, v.value(), true);
|
||||
}
|
||||
|
||||
void item_handle::swap(item_handle &b)
|
||||
{
|
||||
assert(m_column == b.m_column);
|
||||
// assert(&m_row_handle.m_category == &b.m_row_handle.m_category);
|
||||
m_row_handle.swap(m_column, b.m_row_handle);
|
||||
}
|
||||
|
||||
}
|
||||
2609
src/model.cpp
Normal file
2609
src/model.cpp
Normal file
File diff suppressed because it is too large
Load Diff
820
src/parser.cpp
Normal file
820
src/parser.cpp
Normal file
@@ -0,0 +1,820 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <regex>
|
||||
#include <stack>
|
||||
|
||||
#include <cif++/utilities.hpp>
|
||||
|
||||
#include <cif++/forward_decl.hpp>
|
||||
#include <cif++/parser.hpp>
|
||||
#include <cif++/file.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
extern int VERBOSE;
|
||||
}
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
sac_parser::sac_parser(std::istream &is, bool init)
|
||||
: m_source(is)
|
||||
{
|
||||
m_validate = true;
|
||||
m_line_nr = 1;
|
||||
m_bol = true;
|
||||
|
||||
if (init)
|
||||
m_lookahead = get_next_token();
|
||||
}
|
||||
|
||||
// get_next_char takes a char from the buffer, or if it is empty
|
||||
// from the istream. This function also does carriage/linefeed
|
||||
// translation.
|
||||
int sac_parser::get_next_char()
|
||||
{
|
||||
int result;
|
||||
|
||||
if (m_buffer.empty())
|
||||
result = m_source.get();
|
||||
else
|
||||
{
|
||||
result = m_buffer.top();
|
||||
m_buffer.pop();
|
||||
}
|
||||
|
||||
// very simple CR/LF translation into LF
|
||||
if (result == '\r')
|
||||
{
|
||||
int lookahead = m_source.get();
|
||||
if (lookahead != '\n')
|
||||
m_buffer.push(lookahead);
|
||||
result = '\n';
|
||||
}
|
||||
|
||||
m_token_value += static_cast<char>(result);
|
||||
|
||||
if (result == '\n')
|
||||
++m_line_nr;
|
||||
|
||||
if (VERBOSE >= 6)
|
||||
{
|
||||
std::cerr << "get_next_char => ";
|
||||
if (iscntrl(result) or not isprint(result))
|
||||
std::cerr << int(result) << std::endl;
|
||||
else
|
||||
std::cerr << char(result) << std::endl;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void sac_parser::retract()
|
||||
{
|
||||
assert(not m_token_value.empty());
|
||||
|
||||
char ch = m_token_value.back();
|
||||
if (ch == '\n')
|
||||
--m_line_nr;
|
||||
|
||||
m_buffer.push(ch);
|
||||
m_token_value.pop_back();
|
||||
}
|
||||
|
||||
int sac_parser::restart(int start)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
while (not m_token_value.empty())
|
||||
retract();
|
||||
|
||||
switch (start)
|
||||
{
|
||||
case State::Start:
|
||||
result = State::Float;
|
||||
break;
|
||||
|
||||
case State::Float:
|
||||
result = State::Int;
|
||||
break;
|
||||
|
||||
case State::Int:
|
||||
result = State::Value;
|
||||
break;
|
||||
|
||||
default:
|
||||
error("Invalid state in SacParser");
|
||||
}
|
||||
|
||||
m_bol = false;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
sac_parser::CIFToken sac_parser::get_next_token()
|
||||
{
|
||||
const auto kEOF = std::char_traits<char>::eof();
|
||||
|
||||
CIFToken result = CIFToken::Unknown;
|
||||
int quoteChar = 0;
|
||||
int state = State::Start, start = State::Start;
|
||||
m_bol = false;
|
||||
|
||||
m_token_value.clear();
|
||||
mTokenType = CIFValue::Unknown;
|
||||
|
||||
while (result == CIFToken::Unknown)
|
||||
{
|
||||
auto ch = get_next_char();
|
||||
|
||||
switch (state)
|
||||
{
|
||||
case State::Start:
|
||||
if (ch == kEOF)
|
||||
result = CIFToken::Eof;
|
||||
else if (ch == '\n')
|
||||
{
|
||||
m_bol = true;
|
||||
state = State::White;
|
||||
}
|
||||
else if (ch == ' ' or ch == '\t')
|
||||
state = State::White;
|
||||
else if (ch == '#')
|
||||
state = State::Comment;
|
||||
else if (ch == '_')
|
||||
state = State::Tag;
|
||||
else if (ch == ';' and m_bol)
|
||||
state = State::TextField;
|
||||
else if (ch == '\'' or ch == '"')
|
||||
{
|
||||
quoteChar = ch;
|
||||
state = State::QuotedString;
|
||||
}
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::White:
|
||||
if (ch == kEOF)
|
||||
result = CIFToken::Eof;
|
||||
else if (not isspace(ch))
|
||||
{
|
||||
state = State::Start;
|
||||
retract();
|
||||
m_token_value.clear();
|
||||
}
|
||||
else
|
||||
m_bol = (ch == '\n');
|
||||
break;
|
||||
|
||||
case State::Comment:
|
||||
if (ch == '\n')
|
||||
{
|
||||
state = State::Start;
|
||||
m_bol = true;
|
||||
m_token_value.clear();
|
||||
}
|
||||
else if (ch == kEOF)
|
||||
result = CIFToken::Eof;
|
||||
else if (not is_any_print(ch))
|
||||
error("invalid character in comment");
|
||||
break;
|
||||
|
||||
case State::TextField:
|
||||
if (ch == '\n')
|
||||
state = State::TextField + 1;
|
||||
else if (ch == kEOF)
|
||||
error("unterminated textfield");
|
||||
// else if (ch == '\\')
|
||||
// state = State::Esc;
|
||||
else if (not is_any_print(ch))
|
||||
warning("invalid character in text field '" + std::string({static_cast<char>(ch)}) + "' (" + std::to_string((int)ch) + ")");
|
||||
break;
|
||||
|
||||
// case State::Esc:
|
||||
// if (ch == '\n')
|
||||
|
||||
// break;
|
||||
|
||||
case State::TextField + 1:
|
||||
if (is_text_lead(ch) or ch == ' ' or ch == '\t')
|
||||
state = State::TextField;
|
||||
else if (ch == ';')
|
||||
{
|
||||
assert(m_token_value.length() >= 2);
|
||||
m_token_value = m_token_value.substr(1, m_token_value.length() - 3);
|
||||
mTokenType = CIFValue::TextField;
|
||||
result = CIFToken::Value;
|
||||
}
|
||||
else if (ch == kEOF)
|
||||
error("unterminated textfield");
|
||||
else if (ch != '\n')
|
||||
error("invalid character in text field");
|
||||
break;
|
||||
|
||||
case State::QuotedString:
|
||||
if (ch == kEOF)
|
||||
error("unterminated quoted string");
|
||||
else if (ch == quoteChar)
|
||||
state = State::QuotedStringQuote;
|
||||
else if (not is_any_print(ch))
|
||||
warning("invalid character in quoted string: '" + std::string({static_cast<char>(ch)}) + '\'');
|
||||
break;
|
||||
|
||||
case State::QuotedStringQuote:
|
||||
if (is_white(ch))
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
mTokenType = CIFValue::String;
|
||||
|
||||
if (m_token_value.length() < 2)
|
||||
error("Invalid quoted string token");
|
||||
|
||||
m_token_value = m_token_value.substr(1, m_token_value.length() - 2);
|
||||
}
|
||||
else if (ch == quoteChar)
|
||||
;
|
||||
else if (is_any_print(ch))
|
||||
state = State::QuotedString;
|
||||
else if (ch == kEOF)
|
||||
error("unterminated quoted string");
|
||||
else
|
||||
error("invalid character in quoted string");
|
||||
break;
|
||||
|
||||
case State::Tag:
|
||||
if (not is_non_blank(ch))
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Tag;
|
||||
}
|
||||
break;
|
||||
|
||||
case State::Float:
|
||||
if (ch == '+' or ch == '-')
|
||||
{
|
||||
state = State::Float + 1;
|
||||
}
|
||||
else if (isdigit(ch))
|
||||
state = State::Float + 1;
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::Float + 1:
|
||||
// if (ch == '(') // numeric???
|
||||
// mState = State::NumericSuffix;
|
||||
// else
|
||||
if (ch == '.')
|
||||
state = State::Float + 2;
|
||||
else if (tolower(ch) == 'e')
|
||||
state = State::Float + 3;
|
||||
else if (is_white(ch) or ch == kEOF)
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
mTokenType = CIFValue::Int;
|
||||
}
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
// parsed '.'
|
||||
case State::Float + 2:
|
||||
if (tolower(ch) == 'e')
|
||||
state = State::Float + 3;
|
||||
else if (is_white(ch) or ch == kEOF)
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
mTokenType = CIFValue::Float;
|
||||
}
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
// parsed 'e'
|
||||
case State::Float + 3:
|
||||
if (ch == '-' or ch == '+')
|
||||
state = State::Float + 4;
|
||||
else if (isdigit(ch))
|
||||
state = State::Float + 5;
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::Float + 4:
|
||||
if (isdigit(ch))
|
||||
state = State::Float + 5;
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::Float + 5:
|
||||
if (is_white(ch) or ch == kEOF)
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
mTokenType = CIFValue::Float;
|
||||
}
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::Int:
|
||||
if (isdigit(ch) or ch == '+' or ch == '-')
|
||||
state = State::Int + 1;
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::Int + 1:
|
||||
if (is_white(ch) or ch == kEOF)
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
mTokenType = CIFValue::Int;
|
||||
}
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::Value:
|
||||
if (ch == '_')
|
||||
{
|
||||
std::string s = to_lower_copy(m_token_value);
|
||||
|
||||
if (s == "global_")
|
||||
result = CIFToken::GLOBAL;
|
||||
else if (s == "stop_")
|
||||
result = CIFToken::STOP;
|
||||
else if (s == "loop_")
|
||||
result = CIFToken::LOOP;
|
||||
else if (s == "data_")
|
||||
{
|
||||
state = State::DATA;
|
||||
continue;
|
||||
}
|
||||
else if (s == "save_")
|
||||
{
|
||||
state = State::SAVE;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (result == CIFToken::Unknown and not is_non_blank(ch))
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
|
||||
if (m_token_value == ".")
|
||||
mTokenType = CIFValue::Inapplicable;
|
||||
else if (m_token_value == "?")
|
||||
{
|
||||
mTokenType = CIFValue::Unknown;
|
||||
m_token_value.clear();
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case State::DATA:
|
||||
case State::SAVE:
|
||||
if (not is_non_blank(ch))
|
||||
{
|
||||
retract();
|
||||
|
||||
if (state == State::DATA)
|
||||
result = CIFToken::DATA;
|
||||
else
|
||||
result = CIFToken::SAVE;
|
||||
|
||||
m_token_value.erase(m_token_value.begin(), m_token_value.begin() + 5);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
error("Invalid state in get_next_token");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (VERBOSE >= 5)
|
||||
{
|
||||
std::cerr << get_token_name(result);
|
||||
if (mTokenType != CIFValue::Unknown)
|
||||
std::cerr << ' ' << get_value_name(mTokenType);
|
||||
if (result != CIFToken::Eof)
|
||||
std::cerr << " " << std::quoted(m_token_value);
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void sac_parser::match(CIFToken token)
|
||||
{
|
||||
if (m_lookahead != token)
|
||||
error(std::string("Unexpected token, expected ") + get_token_name(token) + " but found " + get_token_name(m_lookahead));
|
||||
|
||||
m_lookahead = get_next_token();
|
||||
}
|
||||
|
||||
bool sac_parser::parse_single_datablock(const std::string &datablock)
|
||||
{
|
||||
// first locate the start, as fast as we can
|
||||
auto &sb = *m_source.rdbuf();
|
||||
|
||||
enum
|
||||
{
|
||||
start,
|
||||
comment,
|
||||
string,
|
||||
string_quote,
|
||||
qstring,
|
||||
data
|
||||
} state = start;
|
||||
|
||||
int quote = 0;
|
||||
bool bol = true;
|
||||
std::string dblk = "data_" + datablock;
|
||||
std::string::size_type si = 0;
|
||||
bool found = false;
|
||||
|
||||
for (auto ch = sb.sbumpc(); not found and ch != std::streambuf::traits_type::eof(); ch = sb.sbumpc())
|
||||
{
|
||||
switch (state)
|
||||
{
|
||||
case start:
|
||||
switch (ch)
|
||||
{
|
||||
case '#': state = comment; break;
|
||||
case 'd':
|
||||
case 'D':
|
||||
state = data;
|
||||
si = 1;
|
||||
break;
|
||||
case '\'':
|
||||
case '"':
|
||||
state = string;
|
||||
quote = ch;
|
||||
break;
|
||||
case ';':
|
||||
if (bol)
|
||||
state = qstring;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case comment:
|
||||
if (ch == '\n')
|
||||
state = start;
|
||||
break;
|
||||
|
||||
case string:
|
||||
if (ch == quote)
|
||||
state = string_quote;
|
||||
break;
|
||||
|
||||
case string_quote:
|
||||
if (std::isspace(ch))
|
||||
state = start;
|
||||
else
|
||||
state = string;
|
||||
break;
|
||||
|
||||
case qstring:
|
||||
if (ch == ';' and bol)
|
||||
state = start;
|
||||
break;
|
||||
|
||||
case data:
|
||||
if (isspace(ch) and dblk[si] == 0)
|
||||
found = true;
|
||||
else if (dblk[si++] != ch)
|
||||
state = start;
|
||||
break;
|
||||
}
|
||||
|
||||
bol = (ch == '\n');
|
||||
}
|
||||
|
||||
if (found)
|
||||
{
|
||||
produce_datablock(datablock);
|
||||
m_lookahead = get_next_token();
|
||||
parse_datablock();
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
sac_parser::datablock_index sac_parser::index_datablocks()
|
||||
{
|
||||
datablock_index index;
|
||||
|
||||
// first locate the start, as fast as we can
|
||||
auto &sb = *m_source.rdbuf();
|
||||
|
||||
enum
|
||||
{
|
||||
start,
|
||||
comment,
|
||||
string,
|
||||
string_quote,
|
||||
qstring,
|
||||
data,
|
||||
data_name
|
||||
} state = start;
|
||||
|
||||
int quote = 0;
|
||||
bool bol = true;
|
||||
const char dblk[] = "data_";
|
||||
std::string::size_type si = 0;
|
||||
std::string datablock;
|
||||
|
||||
for (auto ch = sb.sbumpc(); ch != std::streambuf::traits_type::eof(); ch = sb.sbumpc())
|
||||
{
|
||||
switch (state)
|
||||
{
|
||||
case start:
|
||||
switch (ch)
|
||||
{
|
||||
case '#': state = comment; break;
|
||||
case 'd':
|
||||
case 'D':
|
||||
state = data;
|
||||
si = 1;
|
||||
break;
|
||||
case '\'':
|
||||
case '"':
|
||||
state = string;
|
||||
quote = ch;
|
||||
break;
|
||||
case ';':
|
||||
if (bol)
|
||||
state = qstring;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case comment:
|
||||
if (ch == '\n')
|
||||
state = start;
|
||||
break;
|
||||
|
||||
case string:
|
||||
if (ch == quote)
|
||||
state = string_quote;
|
||||
break;
|
||||
|
||||
case string_quote:
|
||||
if (std::isspace(ch))
|
||||
state = start;
|
||||
else
|
||||
state = string;
|
||||
break;
|
||||
|
||||
case qstring:
|
||||
if (ch == ';' and bol)
|
||||
state = start;
|
||||
break;
|
||||
|
||||
case data:
|
||||
if (dblk[si] == 0 and is_non_blank(ch))
|
||||
{
|
||||
datablock = {static_cast<char>(ch)};
|
||||
state = data_name;
|
||||
}
|
||||
else if (dblk[si++] != ch)
|
||||
state = start;
|
||||
break;
|
||||
|
||||
case data_name:
|
||||
if (is_non_blank(ch))
|
||||
datablock.insert(datablock.end(), char(ch));
|
||||
else if (isspace(ch))
|
||||
{
|
||||
if (not datablock.empty())
|
||||
index[datablock] = m_source.tellg();
|
||||
|
||||
state = start;
|
||||
}
|
||||
else
|
||||
state = start;
|
||||
break;
|
||||
}
|
||||
|
||||
bol = (ch == '\n');
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
bool sac_parser::parse_single_datablock(const std::string &datablock, const datablock_index &index)
|
||||
{
|
||||
bool result = false;
|
||||
|
||||
auto i = index.find(datablock);
|
||||
if (i != index.end())
|
||||
{
|
||||
m_source.seekg(i->second);
|
||||
|
||||
produce_datablock(datablock);
|
||||
m_lookahead = get_next_token();
|
||||
parse_datablock();
|
||||
|
||||
result = true;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void sac_parser::parse_file()
|
||||
{
|
||||
while (m_lookahead != CIFToken::Eof)
|
||||
{
|
||||
switch (m_lookahead)
|
||||
{
|
||||
case CIFToken::GLOBAL:
|
||||
parse_global();
|
||||
break;
|
||||
|
||||
case CIFToken::DATA:
|
||||
produce_datablock(m_token_value);
|
||||
|
||||
match(CIFToken::DATA);
|
||||
parse_datablock();
|
||||
break;
|
||||
|
||||
default:
|
||||
error("This file does not seem to be an mmCIF file");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void sac_parser::parse_global()
|
||||
{
|
||||
match(CIFToken::GLOBAL);
|
||||
while (m_lookahead == CIFToken::Tag)
|
||||
{
|
||||
match(CIFToken::Tag);
|
||||
match(CIFToken::Value);
|
||||
}
|
||||
}
|
||||
|
||||
void sac_parser::parse_datablock()
|
||||
{
|
||||
std::string cat;
|
||||
|
||||
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag or m_lookahead == CIFToken::SAVE)
|
||||
{
|
||||
switch (m_lookahead)
|
||||
{
|
||||
case CIFToken::LOOP:
|
||||
{
|
||||
cat.clear(); // should start a new category
|
||||
|
||||
match(CIFToken::LOOP);
|
||||
|
||||
std::vector<std::string> tags;
|
||||
|
||||
while (m_lookahead == CIFToken::Tag)
|
||||
{
|
||||
std::string catName, itemName;
|
||||
std::tie(catName, itemName) = split_tag_name(m_token_value);
|
||||
|
||||
if (cat.empty())
|
||||
{
|
||||
produce_category(catName);
|
||||
cat = catName;
|
||||
}
|
||||
else if (not iequals(cat, catName))
|
||||
error("inconsistent categories in loop_");
|
||||
|
||||
tags.push_back(itemName);
|
||||
|
||||
match(CIFToken::Tag);
|
||||
}
|
||||
|
||||
while (m_lookahead == CIFToken::Value)
|
||||
{
|
||||
produce_row();
|
||||
|
||||
for (auto tag : tags)
|
||||
{
|
||||
produce_item(cat, tag, m_token_value);
|
||||
match(CIFToken::Value);
|
||||
}
|
||||
}
|
||||
|
||||
cat.clear();
|
||||
break;
|
||||
}
|
||||
|
||||
case CIFToken::Tag:
|
||||
{
|
||||
std::string catName, itemName;
|
||||
std::tie(catName, itemName) = split_tag_name(m_token_value);
|
||||
|
||||
if (not iequals(cat, catName))
|
||||
{
|
||||
produce_category(catName);
|
||||
cat = catName;
|
||||
produce_row();
|
||||
}
|
||||
|
||||
match(CIFToken::Tag);
|
||||
|
||||
produce_item(cat, itemName, m_token_value);
|
||||
|
||||
match(CIFToken::Value);
|
||||
break;
|
||||
}
|
||||
|
||||
case CIFToken::SAVE:
|
||||
parse_save_frame();
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void sac_parser::parse_save_frame()
|
||||
{
|
||||
error("A regular CIF file should not contain a save frame");
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void parser::produce_datablock(const std::string &name)
|
||||
{
|
||||
const auto &[iter, ignore] = m_file.emplace(name);
|
||||
m_datablock = &(*iter);
|
||||
}
|
||||
|
||||
void parser::produce_category(const std::string &name)
|
||||
{
|
||||
if (VERBOSE >= 4)
|
||||
std::cerr << "producing category " << name << std::endl;
|
||||
|
||||
const auto &[cat, ignore] = m_datablock->emplace(name);
|
||||
m_category = &*cat;
|
||||
}
|
||||
|
||||
void parser::produce_row()
|
||||
{
|
||||
if (VERBOSE >= 4)
|
||||
std::cerr << "producing row for category " << m_category->name() << std::endl;
|
||||
|
||||
m_category->emplace({});
|
||||
m_row = m_category->back();
|
||||
// m_row.lineNr(m_line_nr);
|
||||
}
|
||||
|
||||
void parser::produce_item(const std::string &category, const std::string &item, const std::string &value)
|
||||
{
|
||||
if (VERBOSE >= 4)
|
||||
std::cerr << "producing _" << category << '.' << item << " -> " << value << std::endl;
|
||||
|
||||
if (not iequals(category, m_category->name()))
|
||||
error("inconsistent categories in loop_");
|
||||
|
||||
m_row[item] = m_token_value;
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
3726
src/pdb/cif2pdb.cpp
Normal file
3726
src/pdb/cif2pdb.cpp
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
1500
src/pdb/pdb2cif_remark_3.cpp
Normal file
1500
src/pdb/pdb2cif_remark_3.cpp
Normal file
File diff suppressed because it is too large
Load Diff
2016
src/pdb/tls.cpp
Normal file
2016
src/pdb/tls.cpp
Normal file
File diff suppressed because it is too large
Load Diff
544
src/point.cpp
Normal file
544
src/point.cpp
Normal file
@@ -0,0 +1,544 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
#include <random>
|
||||
|
||||
#include <cif++/point.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// We're using expression templates here
|
||||
|
||||
template <typename M>
|
||||
class MatrixExpression
|
||||
{
|
||||
public:
|
||||
uint32_t dim_m() const { return static_cast<const M &>(*this).dim_m(); }
|
||||
uint32_t dim_n() const { return static_cast<const M &>(*this).dim_n(); }
|
||||
|
||||
double &operator()(uint32_t i, uint32_t j)
|
||||
{
|
||||
return static_cast<M &>(*this).operator()(i, j);
|
||||
}
|
||||
|
||||
double operator()(uint32_t i, uint32_t j) const
|
||||
{
|
||||
return static_cast<const M &>(*this).operator()(i, j);
|
||||
}
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// matrix is m x n, addressing i,j is 0 <= i < m and 0 <= j < n
|
||||
// element m i,j is mapped to [i * n + j] and thus storage is row major
|
||||
|
||||
class Matrix : public MatrixExpression<Matrix>
|
||||
{
|
||||
public:
|
||||
template <typename M2>
|
||||
Matrix(const MatrixExpression<M2> &m)
|
||||
: m_m(m.dim_m())
|
||||
, m_n(m.dim_n())
|
||||
, m_data(m_m * m_n)
|
||||
{
|
||||
for (uint32_t i = 0; i < m_m; ++i)
|
||||
{
|
||||
for (uint32_t j = 0; j < m_n; ++j)
|
||||
operator()(i, j) = m(i, j);
|
||||
}
|
||||
}
|
||||
|
||||
Matrix(size_t m, size_t n, double v = 0)
|
||||
: m_m(m)
|
||||
, m_n(n)
|
||||
, m_data(m_m * m_n)
|
||||
{
|
||||
std::fill(m_data.begin(), m_data.end(), v);
|
||||
}
|
||||
|
||||
Matrix() = default;
|
||||
Matrix(Matrix &&m) = default;
|
||||
Matrix(const Matrix &m) = default;
|
||||
Matrix &operator=(Matrix &&m) = default;
|
||||
Matrix &operator=(const Matrix &m) = default;
|
||||
|
||||
uint32_t dim_m() const { return m_m; }
|
||||
uint32_t dim_n() const { return m_n; }
|
||||
|
||||
double operator()(uint32_t i, uint32_t j) const
|
||||
{
|
||||
assert(i < m_m);
|
||||
assert(j < m_n);
|
||||
return m_data[i * m_n + j];
|
||||
}
|
||||
|
||||
double &operator()(uint32_t i, uint32_t j)
|
||||
{
|
||||
assert(i < m_m);
|
||||
assert(j < m_n);
|
||||
return m_data[i * m_n + j];
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_m = 0, m_n = 0;
|
||||
std::vector<double> m_data;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class SymmetricMatrix : public MatrixExpression<SymmetricMatrix>
|
||||
{
|
||||
public:
|
||||
SymmetricMatrix(uint32_t n, double v = 0)
|
||||
: m_n(n)
|
||||
, m_data((m_n * (m_n + 1)) / 2)
|
||||
{
|
||||
std::fill(m_data.begin(), m_data.end(), v);
|
||||
}
|
||||
|
||||
SymmetricMatrix() = default;
|
||||
SymmetricMatrix(SymmetricMatrix &&m) = default;
|
||||
SymmetricMatrix(const SymmetricMatrix &m) = default;
|
||||
SymmetricMatrix &operator=(SymmetricMatrix &&m) = default;
|
||||
SymmetricMatrix &operator=(const SymmetricMatrix &m) = default;
|
||||
|
||||
uint32_t dim_m() const { return m_n; }
|
||||
uint32_t dim_n() const { return m_n; }
|
||||
|
||||
double operator()(uint32_t i, uint32_t j) const
|
||||
{
|
||||
return i < j
|
||||
? m_data[(j * (j + 1)) / 2 + i]
|
||||
: m_data[(i * (i + 1)) / 2 + j];
|
||||
}
|
||||
|
||||
double &operator()(uint32_t i, uint32_t j)
|
||||
{
|
||||
if (i > j)
|
||||
std::swap(i, j);
|
||||
assert(j < m_n);
|
||||
return m_data[(j * (j + 1)) / 2 + i];
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_n;
|
||||
std::vector<double> m_data;
|
||||
};
|
||||
|
||||
class IdentityMatrix : public MatrixExpression<IdentityMatrix>
|
||||
{
|
||||
public:
|
||||
IdentityMatrix(uint32_t n)
|
||||
: m_n(n)
|
||||
{
|
||||
}
|
||||
|
||||
uint32_t dim_m() const { return m_n; }
|
||||
uint32_t dim_n() const { return m_n; }
|
||||
|
||||
double operator()(uint32_t i, uint32_t j) const
|
||||
{
|
||||
return i == j ? 1 : 0;
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_n;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// matrix functions, implemented as expression templates
|
||||
|
||||
template <typename M1, typename M2>
|
||||
class MatrixSubtraction : public MatrixExpression<MatrixSubtraction<M1, M2>>
|
||||
{
|
||||
public:
|
||||
MatrixSubtraction(const M1 &m1, const M2 &m2)
|
||||
: m_m1(m1)
|
||||
, m_m2(m2)
|
||||
{
|
||||
assert(m_m1.dim_m() == m_m2.dim_m());
|
||||
assert(m_m1.dim_n() == m_m2.dim_n());
|
||||
}
|
||||
|
||||
uint32_t dim_m() const { return m_m1.dim_m(); }
|
||||
uint32_t dim_n() const { return m_m1.dim_n(); }
|
||||
|
||||
double operator()(uint32_t i, uint32_t j) const
|
||||
{
|
||||
return m_m1(i, j) - m_m2(i, j);
|
||||
}
|
||||
|
||||
private:
|
||||
const M1 &m_m1;
|
||||
const M2 &m_m2;
|
||||
};
|
||||
|
||||
template <typename M1, typename M2>
|
||||
MatrixSubtraction<M1, M2> operator-(const MatrixExpression<M1> &m1, const MatrixExpression<M2> &m2)
|
||||
{
|
||||
return MatrixSubtraction(*static_cast<const M1 *>(&m1), *static_cast<const M2 *>(&m2));
|
||||
}
|
||||
|
||||
template <typename M>
|
||||
class MatrixMultiplication : public MatrixExpression<MatrixMultiplication<M>>
|
||||
{
|
||||
public:
|
||||
MatrixMultiplication(const M &m, double v)
|
||||
: m_m(m)
|
||||
, m_v(v)
|
||||
{
|
||||
}
|
||||
|
||||
uint32_t dim_m() const { return m_m.dim_m(); }
|
||||
uint32_t dim_n() const { return m_m.dim_n(); }
|
||||
|
||||
double operator()(uint32_t i, uint32_t j) const
|
||||
{
|
||||
return m_m(i, j) * m_v;
|
||||
}
|
||||
|
||||
private:
|
||||
const M &m_m;
|
||||
double m_v;
|
||||
};
|
||||
|
||||
template <typename M>
|
||||
MatrixMultiplication<M> operator*(const MatrixExpression<M> &m, double v)
|
||||
{
|
||||
return MatrixMultiplication(*static_cast<const M *>(&m), v);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
template <class M1>
|
||||
Matrix Cofactors(const M1 &m)
|
||||
{
|
||||
Matrix cf(m.dim_m(), m.dim_m());
|
||||
|
||||
const size_t ixs[4][3] = {
|
||||
{ 1, 2, 3 },
|
||||
{ 0, 2, 3 },
|
||||
{ 0, 1, 3 },
|
||||
{ 0, 1, 2 }
|
||||
};
|
||||
|
||||
for (size_t x = 0; x < 4; ++x)
|
||||
{
|
||||
const size_t *ix = ixs[x];
|
||||
|
||||
for (size_t y = 0; y < 4; ++y)
|
||||
{
|
||||
const size_t *iy = ixs[y];
|
||||
|
||||
cf(x, y) =
|
||||
m(ix[0], iy[0]) * m(ix[1], iy[1]) * m(ix[2], iy[2]) +
|
||||
m(ix[0], iy[1]) * m(ix[1], iy[2]) * m(ix[2], iy[0]) +
|
||||
m(ix[0], iy[2]) * m(ix[1], iy[0]) * m(ix[2], iy[1]) -
|
||||
m(ix[0], iy[2]) * m(ix[1], iy[1]) * m(ix[2], iy[0]) -
|
||||
m(ix[0], iy[1]) * m(ix[1], iy[0]) * m(ix[2], iy[2]) -
|
||||
m(ix[0], iy[0]) * m(ix[1], iy[2]) * m(ix[2], iy[1]);
|
||||
|
||||
if ((x + y) % 2 == 1)
|
||||
cf(x, y) *= -1;
|
||||
}
|
||||
}
|
||||
|
||||
return cf;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
template<typename T>
|
||||
quaternion_type<T> normalize(quaternion_type<T> q)
|
||||
{
|
||||
std::valarray<double> t(4);
|
||||
|
||||
t[0] = q.get_a();
|
||||
t[1] = q.get_b();
|
||||
t[2] = q.get_c();
|
||||
t[3] = q.get_d();
|
||||
|
||||
t *= t;
|
||||
|
||||
double length = std::sqrt(t.sum());
|
||||
|
||||
if (length > 0.001)
|
||||
q /= static_cast<quaternion::value_type>(length);
|
||||
else
|
||||
q = quaternion(1, 0, 0, 0);
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
quaternion construct_from_angle_axis(float angle, point axis)
|
||||
{
|
||||
auto q = std::cos((angle * kPI / 180) / 2);
|
||||
auto s = std::sqrt(1 - q * q);
|
||||
|
||||
axis.normalize();
|
||||
|
||||
return normalize(quaternion{
|
||||
static_cast<float>(q),
|
||||
static_cast<float>(s * axis.m_x),
|
||||
static_cast<float>(s * axis.m_y),
|
||||
static_cast<float>(s * axis.m_z) });
|
||||
}
|
||||
|
||||
std::tuple<double, point> quaternion_to_angle_axis(quaternion q)
|
||||
{
|
||||
if (q.get_a() > 1)
|
||||
q = normalize(q);
|
||||
|
||||
// angle:
|
||||
double angle = 2 * std::acos(q.get_a());
|
||||
angle = angle * 180 / kPI;
|
||||
|
||||
// axis:
|
||||
float s = std::sqrt(1 - q.get_a() * q.get_a());
|
||||
if (s < 0.001)
|
||||
s = 1;
|
||||
|
||||
point axis(q.get_b() / s, q.get_c() / s, q.get_d() / s);
|
||||
|
||||
return { angle, axis };
|
||||
}
|
||||
|
||||
point center_points(std::vector<point> &Points)
|
||||
{
|
||||
point t;
|
||||
|
||||
for (point &pt : Points)
|
||||
{
|
||||
t.m_x += pt.m_x;
|
||||
t.m_y += pt.m_y;
|
||||
t.m_z += pt.m_z;
|
||||
}
|
||||
|
||||
t.m_x /= Points.size();
|
||||
t.m_y /= Points.size();
|
||||
t.m_z /= Points.size();
|
||||
|
||||
for (point &pt : Points)
|
||||
{
|
||||
pt.m_x -= t.m_x;
|
||||
pt.m_y -= t.m_y;
|
||||
pt.m_z -= t.m_z;
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
point centroid(const std::vector<point> &pts)
|
||||
{
|
||||
point result;
|
||||
|
||||
for (auto &pt : pts)
|
||||
result += pt;
|
||||
|
||||
result /= static_cast<float>(pts.size());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
double RMSd(const std::vector<point> &a, const std::vector<point> &b)
|
||||
{
|
||||
double sum = 0;
|
||||
for (uint32_t i = 0; i < a.size(); ++i)
|
||||
{
|
||||
std::valarray<double> d(3);
|
||||
|
||||
d[0] = b[i].m_x - a[i].m_x;
|
||||
d[1] = b[i].m_y - a[i].m_y;
|
||||
d[2] = b[i].m_z - a[i].m_z;
|
||||
|
||||
d *= d;
|
||||
|
||||
sum += d.sum();
|
||||
}
|
||||
|
||||
return std::sqrt(sum / a.size());
|
||||
}
|
||||
|
||||
// The next function returns the largest solution for a quartic equation
|
||||
// based on Ferrari's algorithm.
|
||||
// A depressed quartic is of the form:
|
||||
//
|
||||
// x^4 + ax^2 + bx + c = 0
|
||||
//
|
||||
// (since I'm too lazy to find out a better way, I've implemented the
|
||||
// routine using complex values to avoid nan's as a result of taking
|
||||
// sqrt of a negative number)
|
||||
double LargestDepressedQuarticSolution(double a, double b, double c)
|
||||
{
|
||||
std::complex<double> P = -(a * a) / 12 - c;
|
||||
std::complex<double> Q = -(a * a * a) / 108 + (a * c) / 3 - (b * b) / 8;
|
||||
std::complex<double> R = -Q / 2.0 + std::sqrt((Q * Q) / 4.0 + (P * P * P) / 27.0);
|
||||
|
||||
std::complex<double> U = std::pow(R, 1 / 3.0);
|
||||
|
||||
std::complex<double> y;
|
||||
if (U == 0.0)
|
||||
y = -5.0 * a / 6.0 + U - std::pow(Q, 1.0 / 3.0);
|
||||
else
|
||||
y = -5.0 * a / 6.0 + U - P / (3.0 * U);
|
||||
|
||||
std::complex<double> W = std::sqrt(a + 2.0 * y);
|
||||
|
||||
// And to get the final result:
|
||||
// result = (±W + std::sqrt(-(3 * alpha + 2 * y ± 2 * beta / W))) / 2;
|
||||
// We want the largest result, so:
|
||||
|
||||
std::valarray<double> t(4);
|
||||
|
||||
t[0] = ((W + std::sqrt(-(3.0 * a + 2.0 * y + 2.0 * b / W))) / 2.0).real();
|
||||
t[1] = ((W + std::sqrt(-(3.0 * a + 2.0 * y - 2.0 * b / W))) / 2.0).real();
|
||||
t[2] = ((-W + std::sqrt(-(3.0 * a + 2.0 * y + 2.0 * b / W))) / 2.0).real();
|
||||
t[3] = ((-W + std::sqrt(-(3.0 * a + 2.0 * y - 2.0 * b / W))) / 2.0).real();
|
||||
|
||||
return t.max();
|
||||
}
|
||||
|
||||
quaternion align_points(const std::vector<point> &pa, const std::vector<point> &pb)
|
||||
{
|
||||
// First calculate M, a 3x3 Matrix containing the sums of products of the coordinates of A and B
|
||||
Matrix M(3, 3, 0);
|
||||
|
||||
for (uint32_t i = 0; i < pa.size(); ++i)
|
||||
{
|
||||
const point &a = pa[i];
|
||||
const point &b = pb[i];
|
||||
|
||||
M(0, 0) += a.m_x * b.m_x;
|
||||
M(0, 1) += a.m_x * b.m_y;
|
||||
M(0, 2) += a.m_x * b.m_z;
|
||||
M(1, 0) += a.m_y * b.m_x;
|
||||
M(1, 1) += a.m_y * b.m_y;
|
||||
M(1, 2) += a.m_y * b.m_z;
|
||||
M(2, 0) += a.m_z * b.m_x;
|
||||
M(2, 1) += a.m_z * b.m_y;
|
||||
M(2, 2) += a.m_z * b.m_z;
|
||||
}
|
||||
|
||||
// Now calculate N, a symmetric 4x4 Matrix
|
||||
SymmetricMatrix N(4);
|
||||
|
||||
N(0, 0) = M(0, 0) + M(1, 1) + M(2, 2);
|
||||
N(0, 1) = M(1, 2) - M(2, 1);
|
||||
N(0, 2) = M(2, 0) - M(0, 2);
|
||||
N(0, 3) = M(0, 1) - M(1, 0);
|
||||
|
||||
N(1, 1) = M(0, 0) - M(1, 1) - M(2, 2);
|
||||
N(1, 2) = M(0, 1) + M(1, 0);
|
||||
N(1, 3) = M(0, 2) + M(2, 0);
|
||||
|
||||
N(2, 2) = -M(0, 0) + M(1, 1) - M(2, 2);
|
||||
N(2, 3) = M(1, 2) + M(2, 1);
|
||||
|
||||
N(3, 3) = -M(0, 0) - M(1, 1) + M(2, 2);
|
||||
|
||||
// det(N - λI) = 0
|
||||
// find the largest λ (λm)
|
||||
//
|
||||
// Aλ4 + Bλ3 + Cλ2 + Dλ + E = 0
|
||||
// A = 1
|
||||
// B = 0
|
||||
// and so this is a so-called depressed quartic
|
||||
// solve it using Ferrari's algorithm
|
||||
|
||||
double C = -2 * (M(0, 0) * M(0, 0) + M(0, 1) * M(0, 1) + M(0, 2) * M(0, 2) +
|
||||
M(1, 0) * M(1, 0) + M(1, 1) * M(1, 1) + M(1, 2) * M(1, 2) +
|
||||
M(2, 0) * M(2, 0) + M(2, 1) * M(2, 1) + M(2, 2) * M(2, 2));
|
||||
|
||||
double D = 8 * (M(0, 0) * M(1, 2) * M(2, 1) +
|
||||
M(1, 1) * M(2, 0) * M(0, 2) +
|
||||
M(2, 2) * M(0, 1) * M(1, 0)) -
|
||||
8 * (M(0, 0) * M(1, 1) * M(2, 2) +
|
||||
M(1, 2) * M(2, 0) * M(0, 1) +
|
||||
M(2, 1) * M(1, 0) * M(0, 2));
|
||||
|
||||
// E is the determinant of N:
|
||||
double E =
|
||||
(N(0, 0) * N(1, 1) - N(0, 1) * N(0, 1)) * (N(2, 2) * N(3, 3) - N(2, 3) * N(2, 3)) +
|
||||
(N(0, 1) * N(0, 2) - N(0, 0) * N(2, 1)) * (N(2, 1) * N(3, 3) - N(2, 3) * N(1, 3)) +
|
||||
(N(0, 0) * N(1, 3) - N(0, 1) * N(0, 3)) * (N(2, 1) * N(2, 3) - N(2, 2) * N(1, 3)) +
|
||||
(N(0, 1) * N(2, 1) - N(1, 1) * N(0, 2)) * (N(0, 2) * N(3, 3) - N(2, 3) * N(0, 3)) +
|
||||
(N(1, 1) * N(0, 3) - N(0, 1) * N(1, 3)) * (N(0, 2) * N(2, 3) - N(2, 2) * N(0, 3)) +
|
||||
(N(0, 2) * N(1, 3) - N(2, 1) * N(0, 3)) * (N(0, 2) * N(1, 3) - N(2, 1) * N(0, 3));
|
||||
|
||||
// solve quartic
|
||||
double lambda = LargestDepressedQuarticSolution(C, D, E);
|
||||
|
||||
// calculate t = (N - λI)
|
||||
Matrix t = N - IdentityMatrix(4) * lambda;
|
||||
|
||||
// calculate a Matrix of cofactors for t
|
||||
Matrix cf = Cofactors(t);
|
||||
|
||||
int maxR = 0;
|
||||
for (int r = 1; r < 4; ++r)
|
||||
{
|
||||
if (std::abs(cf(r, 0)) > std::abs(cf(maxR, 0)))
|
||||
maxR = r;
|
||||
}
|
||||
|
||||
quaternion q(
|
||||
static_cast<float>(cf(maxR, 0)),
|
||||
static_cast<float>(cf(maxR, 1)),
|
||||
static_cast<float>(cf(maxR, 2)),
|
||||
static_cast<float>(cf(maxR, 3)));
|
||||
q = normalize(q);
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
point nudge(point p, float offset)
|
||||
{
|
||||
static std::random_device rd;
|
||||
static std::mt19937_64 rng(rd());
|
||||
|
||||
std::uniform_real_distribution<float> randomAngle(0, 2 * kPI);
|
||||
std::normal_distribution<> randomOffset(0, offset);
|
||||
|
||||
float theta = randomAngle(rng);
|
||||
float phi1 = randomAngle(rng) - kPI;
|
||||
float phi2 = randomAngle(rng) - kPI;
|
||||
|
||||
quaternion q = spherical(1.0f, theta, phi1, phi2);
|
||||
|
||||
point r{ 0, 0, 1 };
|
||||
r.rotate(q);
|
||||
r *= randomOffset(rng);
|
||||
|
||||
return p + r;
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
@@ -1,5 +0,0 @@
|
||||
const char kRevision[] = R"(
|
||||
lib@PROJECT_NAME@-version: @PROJECT_VERSION@
|
||||
@BUILD_VERSION_STRING@
|
||||
Date: @BUILD_DATE_TIME@
|
||||
)";
|
||||
100
src/row.cpp
Normal file
100
src/row.cpp
Normal file
@@ -0,0 +1,100 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cif++/category.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
void row_handle::assign(size_t column, std::string_view value, bool updateLinked, bool validate)
|
||||
{
|
||||
assert(m_category);
|
||||
m_category->update_value(m_row, column, value, updateLinked, validate);
|
||||
}
|
||||
|
||||
uint16_t row_handle::get_column_ix(std::string_view name) const
|
||||
{
|
||||
assert(m_category);
|
||||
return m_category->get_column_ix(name);
|
||||
}
|
||||
|
||||
std::string_view row_handle::get_column_name(uint16_t ix) const
|
||||
{
|
||||
assert(m_category);
|
||||
return m_category->get_column_name(ix);
|
||||
}
|
||||
|
||||
uint16_t row_handle::add_column(std::string_view name)
|
||||
{
|
||||
assert(m_category);
|
||||
return m_category->add_column(name);
|
||||
}
|
||||
|
||||
void row_handle::swap(size_t column, row_handle &b)
|
||||
{
|
||||
m_category->swap_item(column, *this, b);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
row_initializer::row_initializer(row_handle rh)
|
||||
{
|
||||
assert(rh.m_category);
|
||||
assert(rh.m_row);
|
||||
|
||||
row *r = rh.get_row();
|
||||
auto &cat = *rh.m_category;
|
||||
|
||||
for (size_t ix = 0; ix < r->size(); ++ix)
|
||||
{
|
||||
auto &i = r->operator[](ix);
|
||||
if (not i)
|
||||
continue;
|
||||
emplace_back(cat.get_column_name(ix), i.text());
|
||||
}
|
||||
}
|
||||
|
||||
void row_initializer::set_value(std::string_view name, std::string_view value)
|
||||
{
|
||||
for (auto &i : *this)
|
||||
{
|
||||
if (i.name() == name)
|
||||
{
|
||||
i.value(value);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
emplace_back(name, value);
|
||||
}
|
||||
|
||||
void row_initializer::set_value_if_empty(std::string_view name, std::string_view value)
|
||||
{
|
||||
if (find_if(begin(), end(), [name](auto &i) { return i.name() == name; }) == end())
|
||||
emplace_back(name, value);
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
@@ -27,12 +27,11 @@
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
|
||||
#include "cif++/Symmetry.hpp"
|
||||
#include "cif++/CifUtils.hpp"
|
||||
#include <cif++/symmetry.hpp>
|
||||
|
||||
#include "SymOpTable_data.hpp"
|
||||
#include "./symop_table_data.hpp"
|
||||
|
||||
namespace mmcif
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
@@ -42,7 +41,7 @@ namespace mmcif
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
int GetSpacegroupNumber(std::string spacegroup)
|
||||
int get_space_group_number(std::string spacegroup)
|
||||
{
|
||||
if (spacegroup == "P 21 21 2 A")
|
||||
spacegroup = "P 21 21 2 (a)";
|
||||
@@ -90,4 +89,66 @@ int GetSpacegroupNumber(std::string spacegroup)
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
int get_space_group_number(std::string spacegroup, space_group_name type)
|
||||
{
|
||||
if (spacegroup == "P 21 21 2 A")
|
||||
spacegroup = "P 21 21 2 (a)";
|
||||
else if (spacegroup.empty())
|
||||
throw std::runtime_error("No spacegroup, cannot continue");
|
||||
|
||||
int result = 0;
|
||||
|
||||
if (type == space_group_name::full)
|
||||
{
|
||||
const size_t N = kNrOfSpaceGroups;
|
||||
int32_t L = 0, R = static_cast<int32_t>(N - 1);
|
||||
while (L <= R)
|
||||
{
|
||||
int32_t i = (L + R) / 2;
|
||||
|
||||
int d = spacegroup.compare(kSpaceGroups[i].name);
|
||||
|
||||
if (d > 0)
|
||||
L = i + 1;
|
||||
else if (d < 0)
|
||||
R = i - 1;
|
||||
else
|
||||
{
|
||||
result = kSpaceGroups[i].nr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (type == space_group_name::xHM)
|
||||
{
|
||||
for (auto &sg : kSpaceGroups)
|
||||
{
|
||||
if (sg.xHM == spacegroup)
|
||||
{
|
||||
result = sg.nr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto &sg : kSpaceGroups)
|
||||
{
|
||||
if (sg.Hall == spacegroup)
|
||||
{
|
||||
result = sg.nr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// not found, see if we can find a match based on xHM name
|
||||
if (result == 0)
|
||||
throw std::runtime_error("Spacegroup name " + spacegroup + " was not found in table");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -2,12 +2,12 @@
|
||||
// and $CLIBD/syminfo.lib using symop-map-generator,
|
||||
// part of the PDB-REDO suite of programs.
|
||||
|
||||
#include "cif++/Symmetry.hpp"
|
||||
#include <cif++/symmetry.hpp>
|
||||
|
||||
namespace mmcif
|
||||
namespace cif
|
||||
{
|
||||
|
||||
const Spacegroup kSpaceGroups[] =
|
||||
const space_group kSpaceGroups[] =
|
||||
{
|
||||
{ "" , "P 2 1 1" , " P 2y (y,z,x)" , 10005 },
|
||||
{ "" , "P 21 1 1" , " P 2yb (y,z,x)" , 10008 },
|
||||
@@ -632,9 +632,9 @@ const Spacegroup kSpaceGroups[] =
|
||||
|
||||
};
|
||||
|
||||
const size_t kNrOfSpaceGroups = sizeof(kSpaceGroups) / sizeof(Spacegroup);
|
||||
const size_t kNrOfSpaceGroups = sizeof(kSpaceGroups) / sizeof(space_group);
|
||||
|
||||
const SymopDataBlock kSymopNrTable[] = {
|
||||
const symop_datablock kSymopNrTable[] = {
|
||||
|
||||
// P 1
|
||||
{ 1, 1, { 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, } },
|
||||
@@ -8654,7 +8654,7 @@ const SymopDataBlock kSymopNrTable[] = {
|
||||
{ 10528, 192, { 0, 0, 1, 0,-1, 0,-1, 0, 0, 1, 2, 3, 4, 3, 4, } },
|
||||
};
|
||||
|
||||
const size_t kSymopNrTableSize = sizeof(kSymopNrTable) / sizeof(SymopDataBlock);
|
||||
const size_t kSymopNrTableSize = sizeof(kSymopNrTable) / sizeof(symop_datablock);
|
||||
|
||||
} // namespace mmcif
|
||||
|
||||
505
src/text.cpp
Normal file
505
src/text.cpp
Normal file
@@ -0,0 +1,505 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
|
||||
#include <cif++/text.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// This really makes a difference, having our own tolower routines
|
||||
|
||||
const uint8_t kCharToLowerMap[256] =
|
||||
{
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
|
||||
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
|
||||
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
|
||||
0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
|
||||
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
|
||||
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
|
||||
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
|
||||
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
|
||||
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
|
||||
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
|
||||
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
|
||||
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
|
||||
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
|
||||
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
|
||||
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
bool iequals(std::string_view a, std::string_view b)
|
||||
{
|
||||
bool result = a.length() == b.length();
|
||||
for (auto ai = a.begin(), bi = b.begin(); result and ai != a.end(); ++ai, ++bi)
|
||||
result = kCharToLowerMap[uint8_t(*ai)] == kCharToLowerMap[uint8_t(*bi)];
|
||||
// result = tolower(*ai) == tolower(*bi);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool iequals(const char *a, const char *b)
|
||||
{
|
||||
bool result = true;
|
||||
for (; result and *a and *b; ++a, ++b)
|
||||
result = tolower(*a) == tolower(*b);
|
||||
|
||||
return result and *a == *b;
|
||||
}
|
||||
|
||||
int icompare(std::string_view a, std::string_view b)
|
||||
{
|
||||
int d = 0;
|
||||
auto ai = a.begin(), bi = b.begin();
|
||||
|
||||
for (; d == 0 and ai != a.end() and bi != b.end(); ++ai, ++bi)
|
||||
d = tolower(*ai) - tolower(*bi);
|
||||
|
||||
if (d == 0)
|
||||
{
|
||||
if (ai != a.end())
|
||||
d = 1;
|
||||
else if (bi != b.end())
|
||||
d = -1;
|
||||
}
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
int icompare(const char *a, const char *b)
|
||||
{
|
||||
int d = 0;
|
||||
|
||||
for (; d == 0 and *a != 0 and *b != 0; ++a, ++b)
|
||||
d = tolower(*a) - tolower(*b);
|
||||
|
||||
if (d == 0)
|
||||
{
|
||||
if (*a != 0)
|
||||
d = 1;
|
||||
else if (*b != 0)
|
||||
d = -1;
|
||||
}
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
void to_lower(std::string &s)
|
||||
{
|
||||
for (auto &c : s)
|
||||
c = tolower(c);
|
||||
}
|
||||
|
||||
std::string to_lower_copy(std::string_view s)
|
||||
{
|
||||
std::string result(s);
|
||||
for (auto &c : result)
|
||||
c = tolower(c);
|
||||
return result;
|
||||
}
|
||||
|
||||
void to_upper(std::string &s)
|
||||
{
|
||||
for (auto &c : s)
|
||||
c = toupper(c);
|
||||
}
|
||||
|
||||
void replace_all(std::string &s, std::string_view what, std::string_view with)
|
||||
{
|
||||
for (std::string::size_type p = s.find(what); p != std::string::npos; p = s.find(what, p))
|
||||
{
|
||||
s.replace(p, what.length(), with);
|
||||
p += with.length();
|
||||
}
|
||||
}
|
||||
|
||||
bool icontains(std::string_view s, std::string_view q)
|
||||
{
|
||||
return contains(to_lower_copy(s), to_lower_copy(q));
|
||||
}
|
||||
|
||||
void trim_right(std::string &s)
|
||||
{
|
||||
auto e = s.end();
|
||||
while (e != s.begin())
|
||||
{
|
||||
auto pe = std::prev(e);
|
||||
if (not std::isspace(*pe))
|
||||
break;
|
||||
e = pe;
|
||||
}
|
||||
|
||||
if (e != s.end())
|
||||
s.erase(e, s.end());
|
||||
}
|
||||
|
||||
std::string trim_right_copy(std::string_view s)
|
||||
{
|
||||
auto e = s.end();
|
||||
while (e != s.begin())
|
||||
{
|
||||
auto pe = std::prev(e);
|
||||
if (not std::isspace(*pe))
|
||||
break;
|
||||
e = pe;
|
||||
}
|
||||
|
||||
return {s.begin(), e};
|
||||
}
|
||||
|
||||
std::string trim_left_copy(std::string_view s)
|
||||
{
|
||||
auto b = s.begin();
|
||||
while (b != s.end())
|
||||
{
|
||||
if (not std::isspace(*b))
|
||||
break;
|
||||
|
||||
b = std::next(b);
|
||||
}
|
||||
|
||||
return {b, s.end()};
|
||||
}
|
||||
|
||||
void trim_left(std::string &s)
|
||||
{
|
||||
auto b = s.begin();
|
||||
while (b != s.end())
|
||||
{
|
||||
if (not std::isspace(*b))
|
||||
break;
|
||||
|
||||
b = std::next(b);
|
||||
}
|
||||
|
||||
s.erase(s.begin(), b);
|
||||
}
|
||||
|
||||
void trim(std::string &s)
|
||||
{
|
||||
trim_right(s);
|
||||
trim_left(s);
|
||||
}
|
||||
|
||||
std::string trim_copy(std::string_view s)
|
||||
{
|
||||
return trim_left_copy(trim_right_copy(s));
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::tuple<std::string, std::string> split_tag_name(std::string_view tag)
|
||||
{
|
||||
if (tag.empty())
|
||||
throw std::runtime_error("empty tag");
|
||||
if (tag[0] != '_')
|
||||
throw std::runtime_error("tag does not start with underscore");
|
||||
|
||||
auto s = tag.find('.');
|
||||
if (s == std::string::npos)
|
||||
throw std::runtime_error("tag does not contain dot");
|
||||
return std::tuple<std::string, std::string>{
|
||||
tag.substr(1, s - 1), tag.substr(s + 1)};
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::string cif_id_for_number(int number)
|
||||
{
|
||||
std::string result;
|
||||
|
||||
if (number >= 26 * 26 * 26)
|
||||
result = 'L' + std::to_string(number);
|
||||
else
|
||||
{
|
||||
if (number >= 26 * 26)
|
||||
{
|
||||
int v = number / (26 * 26);
|
||||
result += char('A' - 1 + v);
|
||||
number %= (26 * 26);
|
||||
}
|
||||
|
||||
if (number >= 26)
|
||||
{
|
||||
int v = number / 26;
|
||||
result += char('A' - 1 + v);
|
||||
number %= 26;
|
||||
}
|
||||
|
||||
result += char('A' + number);
|
||||
}
|
||||
|
||||
assert(not result.empty());
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Simplified line breaking code taken from a decent text editor.
|
||||
// In this case, simplified means it only supports ASCII.
|
||||
|
||||
enum LineBreakClass
|
||||
{
|
||||
kLBC_OpenPunctuation,
|
||||
kLBC_ClosePunctuation,
|
||||
kLBC_CloseParenthesis,
|
||||
kLBC_Quotation,
|
||||
kLBC_NonBreaking,
|
||||
kLBC_Nonstarter,
|
||||
kLBC_Exlamation,
|
||||
kLBC_SymbolAllowingBreakAfter,
|
||||
kLBC_InfixNumericSeparator,
|
||||
kLBC_PrefixNumeric,
|
||||
kLBC_PostfixNumeric,
|
||||
kLBC_Numeric,
|
||||
kLBC_Alphabetic,
|
||||
kLBC_Ideographic,
|
||||
kLBC_Inseperable,
|
||||
kLBC_Hyphen,
|
||||
kLBC_BreakAfter,
|
||||
kLBC_BreakBefor,
|
||||
kLBC_BreakOpportunityBeforeAndAfter,
|
||||
kLBC_ZeroWidthSpace,
|
||||
kLBC_CombiningMark,
|
||||
kLBC_WordJoiner,
|
||||
kLBC_HangulLVSyllable,
|
||||
kLBC_HangulLVTSyllable,
|
||||
kLBC_HangulLJamo,
|
||||
kLBC_HangulVJamo,
|
||||
kLBC_HangulTJamo,
|
||||
|
||||
kLBC_MandatoryBreak,
|
||||
kLBC_CarriageReturn,
|
||||
kLBC_LineFeed,
|
||||
kLBC_NextLine,
|
||||
kLBC_Surrogate,
|
||||
kLBC_Space,
|
||||
kLBC_ContigentBreakOpportunity,
|
||||
kLBC_Ambiguous,
|
||||
kLBC_ComplexContext,
|
||||
kLBC_Unknown
|
||||
};
|
||||
|
||||
const LineBreakClass kASCII_LBTable[128] =
|
||||
{
|
||||
kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark,
|
||||
kLBC_CombiningMark, kLBC_BreakAfter, kLBC_LineFeed, kLBC_MandatoryBreak, kLBC_MandatoryBreak, kLBC_CarriageReturn, kLBC_CombiningMark, kLBC_CombiningMark,
|
||||
kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark,
|
||||
kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark,
|
||||
kLBC_Space, kLBC_Exlamation, kLBC_Quotation, kLBC_Alphabetic, kLBC_PrefixNumeric, kLBC_PostfixNumeric, kLBC_Alphabetic, kLBC_Quotation,
|
||||
kLBC_OpenPunctuation, kLBC_CloseParenthesis, kLBC_Alphabetic, kLBC_PrefixNumeric,
|
||||
|
||||
// comma treated differently here, it is not a numeric separator in PDB
|
||||
kLBC_SymbolAllowingBreakAfter /* kLBC_InfixNumericSeparator */,
|
||||
|
||||
kLBC_Hyphen, kLBC_InfixNumericSeparator, kLBC_SymbolAllowingBreakAfter,
|
||||
kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric,
|
||||
kLBC_Numeric, kLBC_Numeric, kLBC_InfixNumericSeparator, kLBC_InfixNumericSeparator, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Exlamation,
|
||||
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
|
||||
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
|
||||
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
|
||||
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_OpenPunctuation, kLBC_PrefixNumeric, kLBC_CloseParenthesis, kLBC_Alphabetic, kLBC_Alphabetic,
|
||||
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
|
||||
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
|
||||
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
|
||||
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_OpenPunctuation, kLBC_BreakAfter, kLBC_ClosePunctuation, kLBC_Alphabetic, kLBC_CombiningMark};
|
||||
|
||||
std::string::const_iterator nextLineBreak(std::string::const_iterator text, std::string::const_iterator end)
|
||||
{
|
||||
if (text == end)
|
||||
return text;
|
||||
|
||||
enum breakAction
|
||||
{
|
||||
DBK = 0, // direct break (blank in table)
|
||||
IBK, // indirect break (% in table)
|
||||
PBK, // prohibited break (^ in table)
|
||||
CIB, // combining indirect break
|
||||
CPB // combining prohibited break
|
||||
};
|
||||
|
||||
const breakAction brkTable[27][27] = {
|
||||
// OP CL CP QU GL NS EX SY IS PR PO NU AL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT
|
||||
/* OP */ {PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, CPB, PBK, PBK, PBK, PBK, PBK, PBK},
|
||||
/* CL */ {DBK, PBK, PBK, IBK, IBK, PBK, PBK, PBK, PBK, IBK, IBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* CP */ {DBK, PBK, PBK, IBK, IBK, PBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* QU */ {PBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
|
||||
/* GL */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
|
||||
/* NS */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* EX */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* SY */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* IS */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* PR */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, IBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
|
||||
/* PO */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* NU */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* AL */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* ID */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* IN */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* HY */ {DBK, PBK, PBK, IBK, DBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* BA */ {DBK, PBK, PBK, IBK, DBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* BB */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
|
||||
/* B2 */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, PBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* ZW */ {DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* CM */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* WJ */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
|
||||
/* H2 */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, IBK, IBK},
|
||||
/* H3 */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, IBK},
|
||||
/* JL */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, DBK},
|
||||
/* JV */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, IBK, IBK},
|
||||
/* JT */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, IBK},
|
||||
};
|
||||
|
||||
uint8_t ch = static_cast<uint8_t>(*text);
|
||||
|
||||
LineBreakClass cls;
|
||||
|
||||
if (ch == '\n')
|
||||
cls = kLBC_MandatoryBreak;
|
||||
else if (ch < 128)
|
||||
{
|
||||
cls = kASCII_LBTable[ch];
|
||||
if (cls > kLBC_MandatoryBreak and cls != kLBC_Space) // duh...
|
||||
cls = kLBC_Alphabetic;
|
||||
}
|
||||
else
|
||||
cls = kLBC_Unknown;
|
||||
|
||||
if (cls == kLBC_Space)
|
||||
cls = kLBC_WordJoiner;
|
||||
|
||||
LineBreakClass ncls = cls;
|
||||
|
||||
while (++text != end and cls != kLBC_MandatoryBreak)
|
||||
{
|
||||
ch = *text;
|
||||
|
||||
LineBreakClass lcls = ncls;
|
||||
|
||||
if (ch == '\n')
|
||||
{
|
||||
++text;
|
||||
break;
|
||||
}
|
||||
|
||||
ncls = kASCII_LBTable[ch];
|
||||
|
||||
if (ncls == kLBC_Space)
|
||||
continue;
|
||||
|
||||
breakAction brk = brkTable[cls][ncls];
|
||||
|
||||
if (brk == DBK or (brk == IBK and lcls == kLBC_Space))
|
||||
break;
|
||||
|
||||
cls = ncls;
|
||||
}
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
std::vector<std::string> wrapLine(const std::string &text, size_t width)
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
std::vector<size_t> offsets = {0};
|
||||
|
||||
auto b = text.begin();
|
||||
while (b != text.end())
|
||||
{
|
||||
auto e = nextLineBreak(b, text.end());
|
||||
|
||||
offsets.push_back(e - text.begin());
|
||||
|
||||
b = e;
|
||||
}
|
||||
|
||||
size_t count = offsets.size() - 1;
|
||||
|
||||
std::vector<size_t> minima(count + 1, 1000000);
|
||||
minima[0] = 0;
|
||||
std::vector<size_t> breaks(count + 1, 0);
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
size_t j = i + 1;
|
||||
while (j <= count)
|
||||
{
|
||||
size_t w = offsets[j] - offsets[i];
|
||||
|
||||
if (w > width)
|
||||
break;
|
||||
|
||||
while (w > 0 and isspace(text[offsets[i] + w - 1]))
|
||||
--w;
|
||||
|
||||
size_t cost = minima[i];
|
||||
if (j < count) // last line may be shorter
|
||||
cost += (width - w) * (width - w);
|
||||
|
||||
if (cost < minima[j])
|
||||
{
|
||||
minima[j] = cost;
|
||||
breaks[j] = i;
|
||||
}
|
||||
|
||||
++j;
|
||||
}
|
||||
}
|
||||
|
||||
size_t j = count;
|
||||
while (j > 0)
|
||||
{
|
||||
size_t i = breaks[j];
|
||||
result.push_back(text.substr(offsets[i], offsets[j] - offsets[i]));
|
||||
j = i;
|
||||
}
|
||||
|
||||
reverse(result.begin(), result.end());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::string> word_wrap(const std::string &text, size_t width)
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
for (auto p : cif::split<std::string>(text, "\n"))
|
||||
{
|
||||
if (p.empty())
|
||||
{
|
||||
result.push_back("");
|
||||
continue;
|
||||
}
|
||||
|
||||
auto lines = wrapLine(p, width);
|
||||
result.insert(result.end(), lines.begin(), lines.end());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
944
src/utilities.cpp
Normal file
944
src/utilities.cpp
Normal file
@@ -0,0 +1,944 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <regex>
|
||||
#include <sstream>
|
||||
#include <thread>
|
||||
|
||||
#if not defined(_MSC_VER)
|
||||
#include <sys/ioctl.h>
|
||||
#include <termios.h>
|
||||
#endif
|
||||
|
||||
#include <cif++/utilities.hpp>
|
||||
|
||||
#include "revision.hpp"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
int VERBOSE = 0;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::string get_version_nr()
|
||||
{
|
||||
std::ostringstream s;
|
||||
write_version_string(s, false);
|
||||
return s.str();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
#ifdef _MSC_VER
|
||||
}
|
||||
#include <Windows.h>
|
||||
#include <libloaderapi.h>
|
||||
#include <wincon.h>
|
||||
|
||||
#include <codecvt>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
uint32_t get_terminal_width()
|
||||
{
|
||||
CONSOLE_SCREEN_BUFFER_INFO csbi;
|
||||
::GetConsoleScreenBufferInfo(::GetStdHandle(STD_OUTPUT_HANDLE), &csbi);
|
||||
return csbi.srWindow.Right - csbi.srWindow.Left + 1;
|
||||
}
|
||||
|
||||
std::string GetExecutablePath()
|
||||
{
|
||||
WCHAR buffer[4096];
|
||||
|
||||
DWORD n = ::GetModuleFileNameW(nullptr, buffer, sizeof(buffer) / sizeof(WCHAR));
|
||||
if (n == 0)
|
||||
throw std::runtime_error("could not get exe path");
|
||||
|
||||
std::wstring ws(buffer);
|
||||
|
||||
// convert from utf16 to utf8
|
||||
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv1;
|
||||
std::string u8str = conv1.to_bytes(ws);
|
||||
|
||||
return u8str;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
uint32_t get_terminal_width()
|
||||
{
|
||||
uint32_t result = 80;
|
||||
|
||||
if (isatty(STDOUT_FILENO))
|
||||
{
|
||||
struct winsize w;
|
||||
ioctl(0, TIOCGWINSZ, &w);
|
||||
result = w.ws_col;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string get_executable_path()
|
||||
{
|
||||
using namespace std::literals;
|
||||
|
||||
char path[PATH_MAX] = "";
|
||||
if (readlink("/proc/self/exe", path, sizeof(path)) == -1)
|
||||
throw std::runtime_error("could not get exe path "s + strerror(errno));
|
||||
return {path};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
struct ProgressImpl
|
||||
{
|
||||
ProgressImpl(int64_t inMax, const std::string &inAction)
|
||||
: mMax(inMax)
|
||||
, mConsumed(0)
|
||||
, mAction(inAction)
|
||||
, mMessage(inAction)
|
||||
, mThread(std::bind(&ProgressImpl::Run, this))
|
||||
{
|
||||
}
|
||||
|
||||
void Run();
|
||||
void Stop()
|
||||
{
|
||||
mStop = true;
|
||||
if (mThread.joinable())
|
||||
mThread.join();
|
||||
}
|
||||
|
||||
void PrintProgress();
|
||||
void PrintDone();
|
||||
|
||||
int64_t mMax;
|
||||
std::atomic<int64_t> mConsumed;
|
||||
int64_t mLastConsumed = 0;
|
||||
int mSpinnerIndex = 0;
|
||||
std::string mAction, mMessage;
|
||||
std::mutex mMutex;
|
||||
std::thread mThread;
|
||||
std::chrono::time_point<std::chrono::system_clock>
|
||||
mStart = std::chrono::system_clock::now();
|
||||
bool mStop = false;
|
||||
};
|
||||
|
||||
void ProgressImpl::Run()
|
||||
{
|
||||
bool printedAny = false;
|
||||
|
||||
try
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
|
||||
std::unique_lock lock(mMutex);
|
||||
|
||||
if (mStop or mConsumed == mMax)
|
||||
break;
|
||||
|
||||
auto elapsed = std::chrono::system_clock::now() - mStart;
|
||||
|
||||
if (elapsed < std::chrono::seconds(5))
|
||||
continue;
|
||||
|
||||
PrintProgress();
|
||||
printedAny = true;
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
|
||||
if (printedAny)
|
||||
PrintDone();
|
||||
}
|
||||
|
||||
void ProgressImpl::PrintProgress()
|
||||
{
|
||||
// const char* kBlocks[] = {
|
||||
// " ", // 0
|
||||
// u8"\u258F", // 1
|
||||
// u8"\u258E", // 2
|
||||
// u8"\u258D", // 3
|
||||
// u8"\u258C", // 4
|
||||
// u8"\u258B", // 5
|
||||
// u8"\u258A", // 6
|
||||
// u8"\u2589", // 7
|
||||
// u8"\u2588", // 8
|
||||
// };
|
||||
|
||||
const char *kBlocks[] = {
|
||||
" ", // 0
|
||||
" ", // 1
|
||||
" ", // 2
|
||||
"-", // 3
|
||||
"-", // 4
|
||||
"-", // 5
|
||||
"=", // 6
|
||||
"=", // 7
|
||||
"=", // 8
|
||||
};
|
||||
|
||||
uint32_t width = get_terminal_width();
|
||||
|
||||
std::string msg;
|
||||
msg.reserve(width + 1);
|
||||
if (mMessage.length() <= 20)
|
||||
{
|
||||
msg = mMessage;
|
||||
if (msg.length() < 20)
|
||||
msg.append(20 - msg.length(), ' ');
|
||||
}
|
||||
else
|
||||
msg = mMessage.substr(0, 17) + "...";
|
||||
|
||||
msg += " |";
|
||||
|
||||
int64_t consumed = mConsumed;
|
||||
float progress = static_cast<float>(consumed) / mMax;
|
||||
int pi = static_cast<int>(std::ceil(progress * 33 * 8));
|
||||
// int tw = width - 28;
|
||||
// int twd = static_cast<int>(tw * progress + 0.5f);
|
||||
// msg.append(twd, '=');
|
||||
// msg.append(tw - twd, ' ');
|
||||
|
||||
for (int i = 0; i < 33; ++i)
|
||||
{
|
||||
if (pi <= 0)
|
||||
msg += kBlocks[0];
|
||||
else if (pi >= 8)
|
||||
msg += kBlocks[8];
|
||||
else
|
||||
msg += kBlocks[pi];
|
||||
pi -= 8;
|
||||
}
|
||||
|
||||
msg.append("| ");
|
||||
|
||||
const char kSpinner[] = {' ', '.', 'o', 'O', '0', 'O', 'o', '.'};
|
||||
const size_t kSpinnerCount = sizeof(kSpinner);
|
||||
|
||||
if (mLastConsumed < consumed)
|
||||
{
|
||||
mLastConsumed = consumed;
|
||||
mSpinnerIndex = (mSpinnerIndex + 1) % kSpinnerCount;
|
||||
}
|
||||
|
||||
const char spinner[2] = {kSpinner[mSpinnerIndex], 0};
|
||||
msg.append(spinner);
|
||||
|
||||
// int perc = static_cast<int>(100 * progress);
|
||||
// if (perc < 100)
|
||||
// msg += ' ';
|
||||
// if (perc < 10)
|
||||
// msg += ' ';
|
||||
// msg += to_string(perc);
|
||||
// msg += '%';
|
||||
|
||||
std::cout << '\r' << msg;
|
||||
std::cout.flush();
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const std::chrono::duration<double> &t)
|
||||
{
|
||||
uint64_t s = static_cast<uint64_t>(std::trunc(t.count()));
|
||||
if (s > 24 * 60 * 60)
|
||||
{
|
||||
auto days = s / (24 * 60 * 60);
|
||||
os << days << "d ";
|
||||
s %= 24 * 60 * 60;
|
||||
}
|
||||
|
||||
if (s > 60 * 60)
|
||||
{
|
||||
auto hours = s / (60 * 60);
|
||||
os << hours << "h ";
|
||||
s %= 60 * 60;
|
||||
}
|
||||
|
||||
if (s > 60)
|
||||
{
|
||||
auto minutes = s / 60;
|
||||
os << minutes << "m ";
|
||||
s %= 60;
|
||||
}
|
||||
|
||||
double ss = s + 1e-6 * (t.count() - s);
|
||||
|
||||
os << std::fixed << std::setprecision(1) << ss << 's';
|
||||
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void ProgressImpl::PrintDone()
|
||||
{
|
||||
std::chrono::duration<double> elapsed = std::chrono::system_clock::now() - mStart;
|
||||
|
||||
std::ostringstream msgstr;
|
||||
msgstr << mAction << " done in " << elapsed << " seconds";
|
||||
auto msg = msgstr.str();
|
||||
|
||||
uint32_t width = get_terminal_width();
|
||||
|
||||
if (msg.length() < width)
|
||||
msg += std::string(width - msg.length(), ' ');
|
||||
|
||||
std::cout << '\r' << msg << std::endl;
|
||||
}
|
||||
|
||||
Progress::Progress(int64_t inMax, const std::string &inAction)
|
||||
: m_impl(nullptr)
|
||||
{
|
||||
if (isatty(STDOUT_FILENO) and VERBOSE >= 0)
|
||||
m_impl = new ProgressImpl(inMax, inAction);
|
||||
}
|
||||
|
||||
Progress::~Progress()
|
||||
{
|
||||
if (m_impl != nullptr)
|
||||
m_impl->Stop();
|
||||
|
||||
delete m_impl;
|
||||
}
|
||||
|
||||
void Progress::consumed(int64_t inConsumed)
|
||||
{
|
||||
if (m_impl != nullptr and
|
||||
(m_impl->mConsumed += inConsumed) >= m_impl->mMax)
|
||||
{
|
||||
m_impl->Stop();
|
||||
}
|
||||
}
|
||||
|
||||
void Progress::progress(int64_t inProgress)
|
||||
{
|
||||
if (m_impl != nullptr and
|
||||
(m_impl->mConsumed = inProgress) >= m_impl->mMax)
|
||||
{
|
||||
m_impl->Stop();
|
||||
}
|
||||
}
|
||||
|
||||
void Progress::message(const std::string &inMessage)
|
||||
{
|
||||
if (m_impl != nullptr)
|
||||
{
|
||||
std::unique_lock lock(m_impl->mMutex);
|
||||
m_impl->mMessage = inMessage;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
//
|
||||
// Try to find a named resource. Can be either a local file with this name,
|
||||
// a file located in our cache directory or a resource linked in with mrc.
|
||||
//
|
||||
// We have a special, private version of mrsrc here. To be able to create
|
||||
// shared libraries and still be able to link when there's no mrc used.
|
||||
|
||||
namespace mrsrc
|
||||
{
|
||||
/// \brief Internal data structure as generated by mrc
|
||||
struct rsrc_imp
|
||||
{
|
||||
unsigned int m_next;
|
||||
unsigned int m_child;
|
||||
unsigned int m_name;
|
||||
unsigned int m_size;
|
||||
unsigned int m_data;
|
||||
};
|
||||
} // namespace mrsrc
|
||||
|
||||
#if _MSC_VER
|
||||
|
||||
extern "C" const mrsrc::rsrc_imp *gResourceIndexDefault[1] = {};
|
||||
extern "C" const char *gResourceDataDefault[1] = {};
|
||||
extern "C" const char *gResourceNameDefault[1] = {};
|
||||
|
||||
extern "C" const mrsrc::rsrc_imp gResourceIndex[];
|
||||
extern "C" const char gResourceData[];
|
||||
extern "C" const char gResourceName[];
|
||||
|
||||
#pragma comment(linker, "/alternatename:gResourceIndex=gResourceIndexDefault")
|
||||
#pragma comment(linker, "/alternatename:gResourceData=gResourceDataDefault")
|
||||
#pragma comment(linker, "/alternatename:gResourceName=gResourceNameDefault")
|
||||
|
||||
#else
|
||||
extern const __attribute__((weak)) mrsrc::rsrc_imp gResourceIndex[];
|
||||
extern const __attribute__((weak)) char gResourceData[];
|
||||
extern const __attribute__((weak)) char gResourceName[];
|
||||
|
||||
const mrsrc::rsrc_imp gResourceIndex[1] = {};
|
||||
const char gResourceData[1] = {};
|
||||
const char gResourceName[1] = {};
|
||||
|
||||
#endif
|
||||
|
||||
namespace mrsrc
|
||||
{
|
||||
class rsrc_data
|
||||
{
|
||||
public:
|
||||
static rsrc_data &instance()
|
||||
{
|
||||
static rsrc_data s_instance;
|
||||
return s_instance;
|
||||
}
|
||||
|
||||
const rsrc_imp *index() const { return m_index; }
|
||||
|
||||
const char *data(unsigned int offset) const
|
||||
{
|
||||
return m_data + offset;
|
||||
}
|
||||
|
||||
const char *name(unsigned int offset) const
|
||||
{
|
||||
return m_name + offset;
|
||||
}
|
||||
|
||||
private:
|
||||
rsrc_data()
|
||||
{
|
||||
// if (gResourceIndex and (gResourceIndex[0].m_child > 0 or gResourceIndex[0].m_size > 0) and gResourceIndex and gResourceName)
|
||||
if (gResourceIndex[0].m_child > 0 or gResourceIndex[0].m_size > 0)
|
||||
{
|
||||
m_index = gResourceIndex;
|
||||
m_data = gResourceData;
|
||||
m_name = gResourceName;
|
||||
}
|
||||
}
|
||||
|
||||
rsrc_imp m_dummy = {};
|
||||
const rsrc_imp *m_index = &m_dummy;
|
||||
const char *m_data = "";
|
||||
const char *m_name = "";
|
||||
};
|
||||
|
||||
/// \brief Class mrsrc::rsrc contains a pointer to the data in the
|
||||
/// resource, as well as offering an iterator interface to its
|
||||
/// children.
|
||||
|
||||
class rsrc
|
||||
{
|
||||
public:
|
||||
rsrc()
|
||||
: m_impl(rsrc_data::instance().index())
|
||||
{
|
||||
}
|
||||
|
||||
rsrc(const rsrc &other)
|
||||
: m_impl(other.m_impl)
|
||||
{
|
||||
}
|
||||
|
||||
rsrc &operator=(const rsrc &other)
|
||||
{
|
||||
m_impl = other.m_impl;
|
||||
return *this;
|
||||
}
|
||||
|
||||
rsrc(std::filesystem::path path);
|
||||
|
||||
std::string name() const { return rsrc_data::instance().name(m_impl->m_name); }
|
||||
|
||||
const char *data() const { return rsrc_data::instance().data(m_impl->m_data); }
|
||||
|
||||
unsigned long size() const { return m_impl->m_size; }
|
||||
|
||||
explicit operator bool() const { return m_impl != NULL and m_impl->m_size > 0; }
|
||||
|
||||
template <typename RSRC>
|
||||
class iterator_t
|
||||
{
|
||||
public:
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
using value_type = RSRC;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = value_type *;
|
||||
using reference = value_type &;
|
||||
|
||||
iterator_t(const rsrc_imp *cur)
|
||||
: m_cur(cur)
|
||||
{
|
||||
}
|
||||
|
||||
iterator_t(const iterator_t &i)
|
||||
: m_cur(i.m_cur)
|
||||
{
|
||||
}
|
||||
|
||||
iterator_t &operator=(const iterator_t &i)
|
||||
{
|
||||
m_cur = i.m_cur;
|
||||
return *this;
|
||||
}
|
||||
|
||||
reference operator*() { return m_cur; }
|
||||
pointer operator->() { return &m_cur; }
|
||||
|
||||
iterator_t &operator++()
|
||||
{
|
||||
if (m_cur.m_impl->m_next)
|
||||
m_cur.m_impl = rsrc_data::instance().index() + m_cur.m_impl->m_next;
|
||||
else
|
||||
m_cur.m_impl = nullptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
iterator_t operator++(int)
|
||||
{
|
||||
auto tmp(*this);
|
||||
this->operator++();
|
||||
return tmp;
|
||||
}
|
||||
|
||||
bool operator==(const iterator_t &rhs) const { return m_cur.m_impl == rhs.m_cur.m_impl; }
|
||||
bool operator!=(const iterator_t &rhs) const { return m_cur.m_impl != rhs.m_cur.m_impl; }
|
||||
|
||||
private:
|
||||
value_type m_cur;
|
||||
};
|
||||
|
||||
using iterator = iterator_t<rsrc>;
|
||||
|
||||
iterator begin() const
|
||||
{
|
||||
const rsrc_imp *impl = nullptr;
|
||||
if (m_impl and m_impl->m_child)
|
||||
impl = rsrc_data::instance().index() + m_impl->m_child;
|
||||
return iterator(impl);
|
||||
}
|
||||
|
||||
iterator end() const
|
||||
{
|
||||
return iterator(nullptr);
|
||||
}
|
||||
|
||||
private:
|
||||
rsrc(const rsrc_imp *imp)
|
||||
: m_impl(imp)
|
||||
{
|
||||
}
|
||||
|
||||
const rsrc_imp *m_impl;
|
||||
};
|
||||
|
||||
inline rsrc::rsrc(std::filesystem::path p)
|
||||
{
|
||||
m_impl = rsrc_data::instance().index();
|
||||
|
||||
// using std::filesytem::path would have been natural here of course...
|
||||
|
||||
auto pb = p.begin();
|
||||
auto pe = p.end();
|
||||
|
||||
while (m_impl != nullptr and pb != pe)
|
||||
{
|
||||
auto name = *pb++;
|
||||
|
||||
const rsrc_imp *impl = nullptr;
|
||||
for (rsrc child : *this)
|
||||
{
|
||||
if (child.name() == name)
|
||||
{
|
||||
impl = child.m_impl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
m_impl = impl;
|
||||
}
|
||||
|
||||
if (pb != pe) // not found
|
||||
m_impl = nullptr;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
template <typename CharT, typename Traits>
|
||||
class basic_streambuf : public std::basic_streambuf<CharT, Traits>
|
||||
{
|
||||
public:
|
||||
typedef CharT char_type;
|
||||
typedef Traits traits_type;
|
||||
typedef typename traits_type::int_type int_type;
|
||||
typedef typename traits_type::pos_type pos_type;
|
||||
typedef typename traits_type::off_type off_type;
|
||||
|
||||
/// \brief constructor taking a \a path to the resource in memory
|
||||
basic_streambuf(const std::string &path)
|
||||
: m_rsrc(path)
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
/// \brief constructor taking a \a rsrc
|
||||
basic_streambuf(const rsrc &rsrc)
|
||||
: m_rsrc(rsrc)
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
basic_streambuf(const basic_streambuf &) = delete;
|
||||
|
||||
basic_streambuf(basic_streambuf &&rhs)
|
||||
: basic_streambuf(rhs.m_rsrc)
|
||||
{
|
||||
}
|
||||
|
||||
basic_streambuf &operator=(const basic_streambuf &) = delete;
|
||||
|
||||
basic_streambuf &operator=(basic_streambuf &&rhs)
|
||||
{
|
||||
swap(rhs);
|
||||
return *this;
|
||||
}
|
||||
|
||||
void swap(basic_streambuf &rhs)
|
||||
{
|
||||
std::swap(m_begin, rhs.m_begin);
|
||||
std::swap(m_end, rhs.m_end);
|
||||
std::swap(m_current, rhs.m_current);
|
||||
}
|
||||
|
||||
private:
|
||||
void init()
|
||||
{
|
||||
m_begin = reinterpret_cast<const char_type *>(m_rsrc.data());
|
||||
m_end = reinterpret_cast<const char_type *>(m_rsrc.data() + m_rsrc.size());
|
||||
m_current = m_begin;
|
||||
}
|
||||
|
||||
int_type underflow()
|
||||
{
|
||||
if (m_current == m_end)
|
||||
return traits_type::eof();
|
||||
|
||||
return traits_type::to_int_type(*m_current);
|
||||
}
|
||||
|
||||
int_type uflow()
|
||||
{
|
||||
if (m_current == m_end)
|
||||
return traits_type::eof();
|
||||
|
||||
return traits_type::to_int_type(*m_current++);
|
||||
}
|
||||
|
||||
int_type pbackfail(int_type ch)
|
||||
{
|
||||
if (m_current == m_begin or (ch != traits_type::eof() and ch != m_current[-1]))
|
||||
return traits_type::eof();
|
||||
|
||||
return traits_type::to_int_type(*--m_current);
|
||||
}
|
||||
|
||||
std::streamsize showmanyc()
|
||||
{
|
||||
assert(std::less_equal<const char *>()(m_current, m_end));
|
||||
return m_end - m_current;
|
||||
}
|
||||
|
||||
pos_type seekoff(off_type off, std::ios_base::seekdir dir, std::ios_base::openmode which)
|
||||
{
|
||||
switch (dir)
|
||||
{
|
||||
case std::ios_base::beg:
|
||||
m_current = m_begin + off;
|
||||
break;
|
||||
|
||||
case std::ios_base::end:
|
||||
m_current = m_end + off;
|
||||
break;
|
||||
|
||||
case std::ios_base::cur:
|
||||
m_current += off;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (m_current < m_begin)
|
||||
m_current = m_begin;
|
||||
|
||||
if (m_current > m_end)
|
||||
m_current = m_end;
|
||||
|
||||
return m_current - m_begin;
|
||||
}
|
||||
|
||||
pos_type seekpos(pos_type pos, std::ios_base::openmode which)
|
||||
{
|
||||
m_current = m_begin + pos;
|
||||
|
||||
if (m_current < m_begin)
|
||||
m_current = m_begin;
|
||||
|
||||
if (m_current > m_end)
|
||||
m_current = m_end;
|
||||
|
||||
return m_current - m_begin;
|
||||
}
|
||||
|
||||
private:
|
||||
rsrc m_rsrc;
|
||||
const char_type *m_begin;
|
||||
const char_type *m_end;
|
||||
const char_type *m_current;
|
||||
};
|
||||
|
||||
using streambuf = basic_streambuf<char, std::char_traits<char>>;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// class mrsrc::istream
|
||||
|
||||
template <typename CharT, typename Traits>
|
||||
class basic_istream : public std::basic_istream<CharT, Traits>
|
||||
{
|
||||
public:
|
||||
typedef CharT char_type;
|
||||
typedef Traits traits_type;
|
||||
typedef typename traits_type::int_type int_type;
|
||||
typedef typename traits_type::pos_type pos_type;
|
||||
typedef typename traits_type::off_type off_type;
|
||||
|
||||
private:
|
||||
using __streambuf_type = basic_streambuf<CharT, Traits>;
|
||||
using __istream_type = std::basic_istream<CharT, Traits>;
|
||||
|
||||
__streambuf_type m_buffer;
|
||||
|
||||
public:
|
||||
basic_istream(const std::string &path)
|
||||
: __istream_type(&m_buffer)
|
||||
, m_buffer(path)
|
||||
{
|
||||
this->init(&m_buffer);
|
||||
}
|
||||
|
||||
basic_istream(rsrc &resource)
|
||||
: __istream_type(&m_buffer)
|
||||
, m_buffer(resource)
|
||||
{
|
||||
this->init(&m_buffer);
|
||||
}
|
||||
|
||||
basic_istream(const basic_istream &) = delete;
|
||||
|
||||
basic_istream(basic_istream &&rhs)
|
||||
: __istream_type(std::move(rhs))
|
||||
, m_buffer(std::move(rhs.m_buffer))
|
||||
{
|
||||
__istream_type::set_rdbuf(&m_buffer);
|
||||
}
|
||||
|
||||
basic_istream &operator=(const basic_istream &) = delete;
|
||||
|
||||
basic_istream &operator=(basic_istream &&rhs)
|
||||
{
|
||||
__istream_type::operator=(std::move(rhs));
|
||||
m_buffer = std::move(rhs.m_buffer);
|
||||
return *this;
|
||||
}
|
||||
|
||||
void swap(basic_istream &rhs)
|
||||
{
|
||||
__istream_type::swap(rhs);
|
||||
m_buffer.swap(rhs.m_buffer);
|
||||
}
|
||||
|
||||
__streambuf_type *rdbuf() const
|
||||
{
|
||||
return const_cast<__streambuf_type *>(&m_buffer);
|
||||
}
|
||||
};
|
||||
|
||||
using istream = basic_istream<char, std::char_traits<char>>;
|
||||
} // namespace mrsrc
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class ResourcePool
|
||||
{
|
||||
public:
|
||||
static ResourcePool &instance()
|
||||
{
|
||||
static std::unique_ptr<ResourcePool> s_instance(new ResourcePool);
|
||||
return *s_instance;
|
||||
}
|
||||
|
||||
void pushDir(fs::path dir)
|
||||
{
|
||||
std::error_code ec;
|
||||
|
||||
if (fs::exists(dir, ec) and not ec)
|
||||
mDirs.push_front(dir);
|
||||
}
|
||||
|
||||
void pushDir(const char *path)
|
||||
{
|
||||
if (path != nullptr)
|
||||
pushDir(fs::path(path));
|
||||
}
|
||||
|
||||
void pushAlias(const std::string &name, std::filesystem::path dataFile)
|
||||
{
|
||||
std::error_code ec;
|
||||
if (not fs::exists(dataFile, ec) or ec)
|
||||
throw std::runtime_error("Attempt to add a file resource for " + name + " that cannot be used (" + dataFile.string() + ") :" + ec.message());
|
||||
|
||||
mLocalResources[name] = dataFile;
|
||||
}
|
||||
|
||||
std::unique_ptr<std::istream> load(fs::path name);
|
||||
|
||||
private:
|
||||
ResourcePool();
|
||||
|
||||
std::unique_ptr<std::ifstream> open(fs::path &p)
|
||||
{
|
||||
std::unique_ptr<std::ifstream> result;
|
||||
|
||||
try
|
||||
{
|
||||
if (fs::exists(p))
|
||||
{
|
||||
std::unique_ptr<std::ifstream> file(new std::ifstream(p, std::ios::binary));
|
||||
if (file->is_open())
|
||||
result.reset(file.release());
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::map<std::string, std::filesystem::path> mLocalResources;
|
||||
std::deque<fs::path> mDirs;
|
||||
};
|
||||
|
||||
ResourcePool::ResourcePool()
|
||||
{
|
||||
#if defined(DATA_DIR)
|
||||
pushDir(DATA_DIR);
|
||||
#endif
|
||||
|
||||
pushDir(getenv("LIBCIFPP_DATA_DIR"));
|
||||
|
||||
auto ccp4 = getenv("CCP4");
|
||||
if (ccp4 != nullptr)
|
||||
pushDir(fs::path(ccp4) / "share" / "libcifpp");
|
||||
|
||||
#if defined(CACHE_DIR)
|
||||
pushDir(CACHE_DIR);
|
||||
#endif
|
||||
}
|
||||
|
||||
std::unique_ptr<std::istream> ResourcePool::load(fs::path name)
|
||||
{
|
||||
std::unique_ptr<std::istream> result;
|
||||
std::error_code ec;
|
||||
|
||||
fs::path p = name;
|
||||
|
||||
if (mLocalResources.count(name.string()))
|
||||
result = open(mLocalResources[name.string()]);
|
||||
|
||||
for (auto di = mDirs.begin(); not result and di != mDirs.end(); ++di)
|
||||
{
|
||||
auto p2 = *di / p;
|
||||
if (fs::exists(p2, ec) and not ec)
|
||||
result = open(p2);
|
||||
}
|
||||
|
||||
// if (not result and gResourceData)
|
||||
if (not result and (gResourceIndex[0].m_child > 0 or gResourceIndex[0].m_size > 0))
|
||||
{
|
||||
mrsrc::rsrc rsrc(name);
|
||||
if (rsrc)
|
||||
result.reset(new mrsrc::istream(rsrc));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void add_data_directory(std::filesystem::path dataDir)
|
||||
{
|
||||
ResourcePool::instance().pushDir(dataDir);
|
||||
}
|
||||
|
||||
void add_file_resource(const std::string &name, std::filesystem::path dataFile)
|
||||
{
|
||||
ResourcePool::instance().pushAlias(name, dataFile);
|
||||
}
|
||||
|
||||
std::unique_ptr<std::istream> load_resource(std::filesystem::path name)
|
||||
{
|
||||
return ResourcePool::instance().load(name);
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
480
src/validate.cpp
Normal file
480
src/validate.cpp
Normal file
@@ -0,0 +1,480 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
// The validator depends on regular expressions. Unfortunately,
|
||||
// the implementation of std::regex in g++ is buggy and crashes
|
||||
// on reading the pdbx dictionary. Therefore, in case g++ is used
|
||||
// the code will use boost::regex instead.
|
||||
|
||||
#if USE_BOOST_REGEX
|
||||
#include <boost/regex.hpp>
|
||||
using boost::regex;
|
||||
#else
|
||||
#include <regex>
|
||||
using std::regex;
|
||||
#endif
|
||||
|
||||
#include <gxrio.hpp>
|
||||
|
||||
#include <cif++/dictionary_parser.hpp>
|
||||
#include <cif++/validate.hpp>
|
||||
|
||||
#include <cif++/utilities.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
struct regex_impl : public regex
|
||||
{
|
||||
regex_impl(std::string_view rx)
|
||||
: regex(rx.begin(), rx.end(), regex::extended | regex::optimize)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
validation_error::validation_error(const std::string &msg)
|
||||
: m_msg(msg)
|
||||
{
|
||||
}
|
||||
|
||||
validation_error::validation_error(const std::string &cat, const std::string &item, const std::string &msg)
|
||||
: m_msg("When validating _" + cat + '.' + item + ": " + msg)
|
||||
{
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
DDL_PrimitiveType map_to_primitive_type(std::string_view s)
|
||||
{
|
||||
DDL_PrimitiveType result;
|
||||
if (iequals(s, "char"))
|
||||
result = DDL_PrimitiveType::Char;
|
||||
else if (iequals(s, "uchar"))
|
||||
result = DDL_PrimitiveType::UChar;
|
||||
else if (iequals(s, "numb"))
|
||||
result = DDL_PrimitiveType::Numb;
|
||||
else
|
||||
throw validation_error("Not a known primitive type");
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
type_validator::type_validator(std::string_view name, DDL_PrimitiveType type, std::string_view rx)
|
||||
: m_name(name)
|
||||
, m_primitive_type(type)
|
||||
, m_rx(new regex_impl(rx.empty() ? ".+" : rx)) /// Empty regular expressions are not allowed, in libcpp's std::regex (POSIX?)
|
||||
{
|
||||
}
|
||||
|
||||
type_validator::~type_validator()
|
||||
{
|
||||
delete m_rx;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct my_from_chars
|
||||
{
|
||||
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
|
||||
{
|
||||
return cif::from_chars(a, b, d);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct std_from_chars
|
||||
{
|
||||
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
|
||||
{
|
||||
return std::from_chars(a, b, d);
|
||||
}
|
||||
};
|
||||
|
||||
int type_validator::compare(std::string_view a, std::string_view b) const
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
if (a.empty())
|
||||
result = b.empty() ? 0 : -1;
|
||||
else if (b.empty())
|
||||
result = a.empty() ? 0 : +1;
|
||||
else
|
||||
{
|
||||
switch (m_primitive_type)
|
||||
{
|
||||
case DDL_PrimitiveType::Numb:
|
||||
{
|
||||
double da, db;
|
||||
|
||||
using namespace cif;
|
||||
using namespace std;
|
||||
|
||||
std::from_chars_result ra, rb;
|
||||
|
||||
ra = selected_charconv<double>::from_chars(a.begin(), a.end(), da);
|
||||
rb = selected_charconv<double>::from_chars(b.begin(), b.end(), db);
|
||||
|
||||
if (ra.ec == std::errc() and rb.ec == std::errc())
|
||||
{
|
||||
auto d = da - db;
|
||||
if (std::abs(d) > std::numeric_limits<double>::epsilon())
|
||||
{
|
||||
if (d > 0)
|
||||
result = 1;
|
||||
else if (d < 0)
|
||||
result = -1;
|
||||
}
|
||||
}
|
||||
else if (ra.ec == std::errc())
|
||||
result = 1;
|
||||
else
|
||||
result = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
case DDL_PrimitiveType::UChar:
|
||||
case DDL_PrimitiveType::Char:
|
||||
{
|
||||
// CIF is guaranteed to have ascii only, therefore this primitive code will do
|
||||
// also, we're collapsing spaces
|
||||
|
||||
auto ai = a.begin(), bi = b.begin();
|
||||
for (;;)
|
||||
{
|
||||
if (ai == a.end())
|
||||
{
|
||||
if (bi != b.end())
|
||||
result = -1;
|
||||
break;
|
||||
}
|
||||
else if (bi == b.end())
|
||||
{
|
||||
result = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
char ca = *ai;
|
||||
char cb = *bi;
|
||||
|
||||
if (m_primitive_type == DDL_PrimitiveType::UChar)
|
||||
{
|
||||
ca = tolower(ca);
|
||||
cb = tolower(cb);
|
||||
}
|
||||
|
||||
result = ca - cb;
|
||||
|
||||
if (result != 0)
|
||||
break;
|
||||
|
||||
if (ca == ' ')
|
||||
{
|
||||
while (ai[1] == ' ')
|
||||
++ai;
|
||||
while (bi[1] == ' ')
|
||||
++bi;
|
||||
}
|
||||
|
||||
++ai;
|
||||
++bi;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
// void ValidateItem::addLinked(ValidateItem* parent, const std::string& parentItem, const std::string& childItem)
|
||||
//{
|
||||
//// if (mParent != nullptr and VERBOSE)
|
||||
//// cerr << "replacing parent in " << mCategory->m_name << " from " << mParent->mCategory->m_name << " to " << parent->mCategory->m_name << endl;
|
||||
//// mParent = parent;
|
||||
//
|
||||
// if (m_type == nullptr and parent != nullptr)
|
||||
// m_type = parent->m_type;
|
||||
//
|
||||
// if (parent != nullptr)
|
||||
// {
|
||||
// mLinked.push_back({parent, parentItem, childItem});
|
||||
//
|
||||
// parent->mChildren.insert(this);
|
||||
////
|
||||
//// if (mCategory->mKeys == std::vector<std::string>{mTag})
|
||||
//// parent->mForeignKeys.insert(this);
|
||||
// }
|
||||
//}
|
||||
|
||||
void item_validator::operator()(std::string_view value) const
|
||||
{
|
||||
if (not value.empty() and value != "?" and value != ".")
|
||||
{
|
||||
if (m_type != nullptr and not regex_match(value.begin(), value.end(), *m_type->m_rx))
|
||||
throw validation_error(m_category->m_name, m_tag, "Value '" + std::string{ value } + "' does not match type expression for type " + m_type->m_name);
|
||||
|
||||
if (not m_enums.empty())
|
||||
{
|
||||
if (m_enums.count(std::string{ value }) == 0)
|
||||
throw validation_error(m_category->m_name, m_tag, "Value '" + std::string{ value } + "' is not in the list of allowed values");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void category_validator::addItemValidator(item_validator &&v)
|
||||
{
|
||||
if (v.m_mandatory)
|
||||
m_mandatory_fields.insert(v.m_tag);
|
||||
|
||||
v.m_category = this;
|
||||
|
||||
auto r = m_item_validators.insert(std::move(v));
|
||||
if (not r.second and VERBOSE >= 4)
|
||||
std::cout << "Could not add validator for item " << v.m_tag << " to category " << m_name << std::endl;
|
||||
}
|
||||
|
||||
const item_validator *category_validator::get_validator_for_item(std::string_view tag) const
|
||||
{
|
||||
const item_validator *result = nullptr;
|
||||
auto i = m_item_validators.find(item_validator{ std::string(tag) });
|
||||
if (i != m_item_validators.end())
|
||||
result = &*i;
|
||||
else if (VERBOSE > 4)
|
||||
std::cout << "No validator for tag " << tag << std::endl;
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void validator::add_type_validator(type_validator &&v)
|
||||
{
|
||||
auto r = m_type_validators.insert(std::move(v));
|
||||
if (not r.second and VERBOSE > 4)
|
||||
std::cout << "Could not add validator for type " << v.m_name << std::endl;
|
||||
}
|
||||
|
||||
const type_validator *validator::get_validator_for_type(std::string_view typeCode) const
|
||||
{
|
||||
const type_validator *result = nullptr;
|
||||
|
||||
auto i = m_type_validators.find(type_validator{ std::string(typeCode), DDL_PrimitiveType::Char, {} });
|
||||
if (i != m_type_validators.end())
|
||||
result = &*i;
|
||||
else if (VERBOSE > 4)
|
||||
std::cout << "No validator for type " << typeCode << std::endl;
|
||||
return result;
|
||||
}
|
||||
|
||||
void validator::add_category_validator(category_validator &&v)
|
||||
{
|
||||
auto r = m_category_validators.insert(std::move(v));
|
||||
if (not r.second and VERBOSE > 4)
|
||||
std::cout << "Could not add validator for category " << v.m_name << std::endl;
|
||||
}
|
||||
|
||||
const category_validator *validator::get_validator_for_category(std::string_view category) const
|
||||
{
|
||||
const category_validator *result = nullptr;
|
||||
auto i = m_category_validators.find(category_validator{ std::string(category) });
|
||||
if (i != m_category_validators.end())
|
||||
result = &*i;
|
||||
else if (VERBOSE > 4)
|
||||
std::cout << "No validator for category " << category << std::endl;
|
||||
return result;
|
||||
}
|
||||
|
||||
item_validator *validator::get_validator_for_item(std::string_view tag) const
|
||||
{
|
||||
item_validator *result = nullptr;
|
||||
|
||||
std::string cat, item;
|
||||
std::tie(cat, item) = split_tag_name(tag);
|
||||
|
||||
auto *cv = get_validator_for_category(cat);
|
||||
if (cv != nullptr)
|
||||
result = const_cast<item_validator *>(cv->get_validator_for_item(item));
|
||||
|
||||
if (result == nullptr and VERBOSE > 4)
|
||||
std::cout << "No validator for item " << tag << std::endl;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void validator::add_link_validator(link_validator &&v)
|
||||
{
|
||||
assert(v.m_parent_keys.size() == v.m_child_keys.size());
|
||||
if (v.m_parent_keys.size() != v.m_child_keys.size())
|
||||
throw std::runtime_error("unequal number of keys for parent and child in link");
|
||||
|
||||
auto pcv = get_validator_for_category(v.m_parent_category);
|
||||
auto ccv = get_validator_for_category(v.m_child_category);
|
||||
|
||||
if (pcv == nullptr)
|
||||
throw std::runtime_error("unknown parent category " + v.m_parent_category);
|
||||
|
||||
if (ccv == nullptr)
|
||||
throw std::runtime_error("unknown child category " + v.m_child_category);
|
||||
|
||||
for (size_t i = 0; i < v.m_parent_keys.size(); ++i)
|
||||
{
|
||||
auto piv = pcv->get_validator_for_item(v.m_parent_keys[i]);
|
||||
|
||||
if (piv == nullptr)
|
||||
throw std::runtime_error("unknown parent tag _" + v.m_parent_category + '.' + v.m_parent_keys[i]);
|
||||
|
||||
auto civ = ccv->get_validator_for_item(v.m_child_keys[i]);
|
||||
if (civ == nullptr)
|
||||
throw std::runtime_error("unknown child tag _" + v.m_child_category + '.' + v.m_child_keys[i]);
|
||||
|
||||
if (civ->m_type == nullptr and piv->m_type != nullptr)
|
||||
const_cast<item_validator *>(civ)->m_type = piv->m_type;
|
||||
}
|
||||
|
||||
m_link_validators.emplace_back(std::move(v));
|
||||
}
|
||||
|
||||
std::vector<const link_validator *> validator::get_links_for_parent(std::string_view category) const
|
||||
{
|
||||
std::vector<const link_validator *> result;
|
||||
|
||||
for (auto &l : m_link_validators)
|
||||
{
|
||||
if (l.m_parent_category == category)
|
||||
result.push_back(&l);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<const link_validator *> validator::get_links_for_child(std::string_view category) const
|
||||
{
|
||||
std::vector<const link_validator *> result;
|
||||
|
||||
for (auto &l : m_link_validators)
|
||||
{
|
||||
if (l.m_child_category == category)
|
||||
result.push_back(&l);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void validator::report_error(const std::string &msg, bool fatal) const
|
||||
{
|
||||
if (m_strict or fatal)
|
||||
throw validation_error(msg);
|
||||
else if (VERBOSE > 0)
|
||||
std::cerr << msg << std::endl;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
const validator &validator_factory::operator[](std::string_view dictionary_name)
|
||||
{
|
||||
std::lock_guard lock(m_mutex);
|
||||
|
||||
for (auto &validator : m_validators)
|
||||
{
|
||||
if (iequals(validator.name(), dictionary_name))
|
||||
return validator;
|
||||
}
|
||||
|
||||
// not found, add it
|
||||
|
||||
// too bad clang version 10 did not have a constructor for std::filesystem::path that accepts a std::string_view
|
||||
std::filesystem::path dictionary(dictionary_name.data(), dictionary_name.data() + dictionary_name.length());
|
||||
|
||||
auto data = load_resource(dictionary_name);
|
||||
|
||||
if (not data and dictionary.extension().string() != ".dic")
|
||||
data = load_resource(dictionary.parent_path() / (dictionary.filename().string() + ".dic"));
|
||||
|
||||
if (data)
|
||||
construct_validator(dictionary_name, *data);
|
||||
else
|
||||
{
|
||||
std::error_code ec;
|
||||
|
||||
// might be a compressed dictionary on disk
|
||||
std::filesystem::path p = dictionary;
|
||||
if (p.extension() == ".dic")
|
||||
p = p.parent_path() / (p.filename().string() + ".gz");
|
||||
else
|
||||
p = p.parent_path() / (p.filename().string() + ".dic.gz");
|
||||
|
||||
#if defined(CACHE_DIR) or defined(DATA_DIR)
|
||||
if (not std::filesystem::exists(p, ec) or ec)
|
||||
{
|
||||
for (const char *dir : {
|
||||
#if defined(CACHE_DIR)
|
||||
CACHE_DIR,
|
||||
#endif
|
||||
#if defined(DATA_DIR)
|
||||
DATA_DIR
|
||||
#endif
|
||||
})
|
||||
{
|
||||
auto p2 = std::filesystem::path(dir) / p;
|
||||
if (std::filesystem::exists(p2, ec) and not ec)
|
||||
{
|
||||
swap(p, p2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (std::filesystem::exists(p, ec) and not ec)
|
||||
{
|
||||
gxrio::ifstream in(p);
|
||||
|
||||
if (not in.is_open())
|
||||
throw std::runtime_error("Could not open dictionary (" + p.string() + ")");
|
||||
|
||||
construct_validator(dictionary_name, in);
|
||||
}
|
||||
else
|
||||
throw std::runtime_error("Dictionary not found or defined (" + dictionary.string() + ")");
|
||||
}
|
||||
|
||||
return m_validators.back();
|
||||
}
|
||||
|
||||
void validator_factory::construct_validator(std::string_view name, std::istream &is)
|
||||
{
|
||||
m_validators.emplace_back(parse_dictionary(name, is));
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
BIN
test/1juh.cif.gz
Normal file
BIN
test/1juh.cif.gz
Normal file
Binary file not shown.
85
test/format-test.cpp
Normal file
85
test/format-test.cpp
Normal file
@@ -0,0 +1,85 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define BOOST_TEST_ALTERNATIVE_INIT_API
|
||||
#include <boost/test/included/unit_test.hpp>
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include <cif++.hpp>
|
||||
|
||||
namespace tt = boost::test_tools;
|
||||
|
||||
std::filesystem::path gTestDir = std::filesystem::current_path(); // filled in first test
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
cif::file operator""_cf(const char *text, size_t length)
|
||||
{
|
||||
struct membuf : public std::streambuf
|
||||
{
|
||||
membuf(char *text, size_t length)
|
||||
{
|
||||
this->setg(text, text, text + length);
|
||||
}
|
||||
} buffer(const_cast<char *>(text), length);
|
||||
|
||||
std::istream is(&buffer);
|
||||
return cif::file(is);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
bool init_unit_test()
|
||||
{
|
||||
cif::VERBOSE = 1;
|
||||
|
||||
// // not a test, just initialize test dir
|
||||
// if (boost::unit_test::framework::master_test_suite().argc == 2)
|
||||
// gTestDir = boost::unit_test::framework::master_test_suite().argv[1];
|
||||
|
||||
// // do this now, avoids the need for installing
|
||||
// cif::add_file_resource("mmcif_pdbx.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic");
|
||||
|
||||
// // initialize CCD location
|
||||
// cif::add_file_resource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fmt_1)
|
||||
{
|
||||
std::ostringstream os;
|
||||
|
||||
std::string world("world");
|
||||
os << cif::format("Hello, %-10.10s, the magic number is %d and pi is %g", world, 42, M_PI);
|
||||
BOOST_CHECK_EQUAL(os.str(), "Hello, world , the magic number is 42 and pi is 3.14159");
|
||||
|
||||
BOOST_CHECK_EQUAL(cif::format("Hello, %-10.10s, the magic number is %d and pi is %g", world, 42, M_PI).str(),
|
||||
"Hello, world , the magic number is 42 and pi is 3.14159");
|
||||
}
|
||||
341
test/model-test.cpp
Normal file
341
test/model-test.cpp
Normal file
@@ -0,0 +1,341 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define BOOST_TEST_ALTERNATIVE_INIT_API
|
||||
#include <boost/test/included/unit_test.hpp>
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include <cif++.hpp>
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
cif::file operator""_cf(const char* text, size_t length)
|
||||
{
|
||||
struct membuf : public std::streambuf
|
||||
{
|
||||
membuf(char* text, size_t length)
|
||||
{
|
||||
this->setg(text, text, text + length);
|
||||
}
|
||||
} buffer(const_cast<char*>(text), length);
|
||||
|
||||
std::istream is(&buffer);
|
||||
return cif::file(is);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::filesystem::path gTestDir = std::filesystem::current_path();
|
||||
|
||||
bool init_unit_test()
|
||||
{
|
||||
cif::VERBOSE = 1;
|
||||
|
||||
// not a test, just initialize test dir
|
||||
if (boost::unit_test::framework::master_test_suite().argc == 2)
|
||||
gTestDir = boost::unit_test::framework::master_test_suite().argv[1];
|
||||
|
||||
// do this now, avoids the need for installing
|
||||
cif::add_file_resource("mmcif_pdbx.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic");
|
||||
|
||||
// initialize CCD location
|
||||
cif::add_file_resource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");
|
||||
|
||||
cif::compound_factory::instance().push_dictionary(gTestDir / "HEM.cif");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BOOST_AUTO_TEST_CASE(create_nonpoly_1)
|
||||
{
|
||||
cif::VERBOSE = 1;
|
||||
|
||||
cif::file file;
|
||||
file.load_dictionary("mmcif_pdbx.dic");
|
||||
file.emplace("TEST"); // create a datablock
|
||||
|
||||
cif::mm::structure structure(file);
|
||||
|
||||
std::string entity_id = structure.create_non_poly_entity("HEM");
|
||||
|
||||
auto atoms = R"(
|
||||
data_HEM
|
||||
loop_
|
||||
_atom_site.id
|
||||
_atom_site.group_PDB
|
||||
_atom_site.type_symbol
|
||||
_atom_site.label_atom_id
|
||||
_atom_site.label_alt_id
|
||||
_atom_site.pdbx_PDB_ins_code
|
||||
_atom_site.Cartn_x
|
||||
_atom_site.Cartn_y
|
||||
_atom_site.Cartn_z
|
||||
_atom_site.occupancy
|
||||
_atom_site.B_iso_or_equiv
|
||||
_atom_site.pdbx_formal_charge
|
||||
1 HETATM C CHA . ? -5.248 39.769 -0.250 1.00 7.67 ?
|
||||
2 HETATM C CHB . ? -3.774 36.790 3.280 1.00 7.05 ?
|
||||
3 HETATM C CHC . ? -2.879 33.328 0.013 1.00 7.69 ?
|
||||
4 HETATM C CHD . ? -4.342 36.262 -3.536 1.00 8.00 ?
|
||||
# that's enough to test with
|
||||
)"_cf;
|
||||
|
||||
atoms.load_dictionary("mmcif_pdbx");
|
||||
|
||||
auto &hem_data = atoms["HEM"];
|
||||
auto &atom_site = hem_data["atom_site"];
|
||||
|
||||
auto hem_atoms = atom_site.rows();
|
||||
std::vector<cif::mm::atom> atom_data;
|
||||
for (auto hem_atom: hem_atoms)
|
||||
atom_data.emplace_back(hem_data, hem_atom);
|
||||
|
||||
structure.create_non_poly(entity_id, atom_data);
|
||||
|
||||
auto expected = R"(
|
||||
data_TEST
|
||||
#
|
||||
_pdbx_nonpoly_scheme.asym_id A
|
||||
_pdbx_nonpoly_scheme.ndb_seq_num 1
|
||||
_pdbx_nonpoly_scheme.entity_id 1
|
||||
_pdbx_nonpoly_scheme.mon_id HEM
|
||||
_pdbx_nonpoly_scheme.pdb_seq_num 1
|
||||
_pdbx_nonpoly_scheme.auth_seq_num 1
|
||||
_pdbx_nonpoly_scheme.pdb_mon_id HEM
|
||||
_pdbx_nonpoly_scheme.auth_mon_id HEM
|
||||
_pdbx_nonpoly_scheme.pdb_strand_id A
|
||||
_pdbx_nonpoly_scheme.pdb_ins_code .
|
||||
#
|
||||
loop_
|
||||
_atom_site.id
|
||||
_atom_site.auth_asym_id
|
||||
_atom_site.label_alt_id
|
||||
_atom_site.label_asym_id
|
||||
_atom_site.label_atom_id
|
||||
_atom_site.label_comp_id
|
||||
_atom_site.label_entity_id
|
||||
_atom_site.label_seq_id
|
||||
_atom_site.type_symbol
|
||||
_atom_site.group_PDB
|
||||
_atom_site.pdbx_PDB_ins_code
|
||||
_atom_site.Cartn_x
|
||||
_atom_site.Cartn_y
|
||||
_atom_site.Cartn_z
|
||||
_atom_site.occupancy
|
||||
_atom_site.B_iso_or_equiv
|
||||
_atom_site.pdbx_formal_charge
|
||||
_atom_site.auth_seq_id
|
||||
_atom_site.auth_comp_id
|
||||
_atom_site.auth_atom_id
|
||||
_atom_site.pdbx_PDB_model_num
|
||||
1 A ? A CHA HEM 1 . C HETATM ? -5.248 39.769 -0.250 1.00 7.67 ? 1 HEM CHA 1
|
||||
2 A ? A CHB HEM 1 . C HETATM ? -3.774 36.790 3.280 1.00 7.05 ? 1 HEM CHB 1
|
||||
3 A ? A CHC HEM 1 . C HETATM ? -2.879 33.328 0.013 1.00 7.69 ? 1 HEM CHC 1
|
||||
4 A ? A CHD HEM 1 . C HETATM ? -4.342 36.262 -3.536 1.00 8.00 ? 1 HEM CHD 1
|
||||
#
|
||||
_chem_comp.id HEM
|
||||
_chem_comp.type NON-POLYMER
|
||||
_chem_comp.name 'PROTOPORPHYRIN IX CONTAINING FE'
|
||||
_chem_comp.formula 'C34 H32 Fe N4 O4'
|
||||
_chem_comp.formula_weight 616.487000
|
||||
#
|
||||
_pdbx_entity_nonpoly.entity_id 1
|
||||
_pdbx_entity_nonpoly.name 'PROTOPORPHYRIN IX CONTAINING FE'
|
||||
_pdbx_entity_nonpoly.comp_id HEM
|
||||
#
|
||||
_entity.id 1
|
||||
_entity.type non-polymer
|
||||
_entity.pdbx_description 'PROTOPORPHYRIN IX CONTAINING FE'
|
||||
_entity.formula_weight 616.487000
|
||||
#
|
||||
_struct_asym.id A
|
||||
_struct_asym.entity_id 1
|
||||
_struct_asym.pdbx_blank_PDB_chainid_flag N
|
||||
_struct_asym.pdbx_modified N
|
||||
_struct_asym.details ?
|
||||
#
|
||||
_atom_type.symbol C
|
||||
)"_cf;
|
||||
|
||||
expected.load_dictionary("mmcif_pdbx.dic");
|
||||
|
||||
if (not (expected.front() == structure.get_datablock()))
|
||||
{
|
||||
BOOST_TEST(false);
|
||||
std::cout << expected.front() << std::endl
|
||||
<< std::endl
|
||||
<< structure.get_datablock() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_atom_id)
|
||||
{
|
||||
auto data = R"(
|
||||
data_TEST
|
||||
#
|
||||
_pdbx_nonpoly_scheme.asym_id A
|
||||
_pdbx_nonpoly_scheme.ndb_seq_num 1
|
||||
_pdbx_nonpoly_scheme.entity_id 1
|
||||
_pdbx_nonpoly_scheme.mon_id HEM
|
||||
_pdbx_nonpoly_scheme.pdb_seq_num 1
|
||||
_pdbx_nonpoly_scheme.auth_seq_num 1
|
||||
_pdbx_nonpoly_scheme.pdb_mon_id HEM
|
||||
_pdbx_nonpoly_scheme.auth_mon_id HEM
|
||||
_pdbx_nonpoly_scheme.pdb_strand_id A
|
||||
_pdbx_nonpoly_scheme.pdb_ins_code .
|
||||
#
|
||||
loop_
|
||||
_atom_site.id
|
||||
_atom_site.auth_asym_id
|
||||
_atom_site.label_alt_id
|
||||
_atom_site.label_asym_id
|
||||
_atom_site.label_atom_id
|
||||
_atom_site.label_comp_id
|
||||
_atom_site.label_entity_id
|
||||
_atom_site.label_seq_id
|
||||
_atom_site.type_symbol
|
||||
_atom_site.group_PDB
|
||||
_atom_site.pdbx_PDB_ins_code
|
||||
_atom_site.Cartn_x
|
||||
_atom_site.Cartn_y
|
||||
_atom_site.Cartn_z
|
||||
_atom_site.occupancy
|
||||
_atom_site.B_iso_or_equiv
|
||||
_atom_site.pdbx_formal_charge
|
||||
_atom_site.auth_seq_id
|
||||
_atom_site.auth_comp_id
|
||||
_atom_site.auth_atom_id
|
||||
_atom_site.pdbx_PDB_model_num
|
||||
1 A ? A CHA HEM 1 . C HETATM ? -5.248 39.769 -0.250 1.00 7.67 ? 1 HEM CHA 1
|
||||
3 A ? A CHB HEM 1 . C HETATM ? -3.774 36.790 3.280 1.00 7.05 ? 1 HEM CHB 1
|
||||
2 A ? A CHC HEM 1 . C HETATM ? -2.879 33.328 0.013 1.00 7.69 ? 1 HEM CHC 1
|
||||
4 A ? A CHD HEM 1 . C HETATM ? -4.342 36.262 -3.536 1.00 8.00 ? 1 HEM CHD 1
|
||||
#
|
||||
_chem_comp.id HEM
|
||||
_chem_comp.type NON-POLYMER
|
||||
_chem_comp.name 'PROTOPORPHYRIN IX CONTAINING FE'
|
||||
_chem_comp.formula 'C34 H32 Fe N4 O4'
|
||||
_chem_comp.formula_weight 616.487000
|
||||
#
|
||||
_pdbx_entity_nonpoly.entity_id 1
|
||||
_pdbx_entity_nonpoly.name 'PROTOPORPHYRIN IX CONTAINING FE'
|
||||
_pdbx_entity_nonpoly.comp_id HEM
|
||||
#
|
||||
_entity.id 1
|
||||
_entity.type non-polymer
|
||||
_entity.pdbx_description 'PROTOPORPHYRIN IX CONTAINING FE'
|
||||
_entity.formula_weight 616.487000
|
||||
#
|
||||
_struct_asym.id A
|
||||
_struct_asym.entity_id 1
|
||||
_struct_asym.pdbx_blank_PDB_chainid_flag N
|
||||
_struct_asym.pdbx_modified N
|
||||
_struct_asym.details ?
|
||||
#
|
||||
)"_cf;
|
||||
|
||||
data.load_dictionary("mmcif_pdbx.dic");
|
||||
|
||||
cif::mm::structure s(data);
|
||||
|
||||
BOOST_CHECK_EQUAL(s.get_atom_by_id("1").get_label_atom_id(), "CHA");
|
||||
BOOST_CHECK_EQUAL(s.get_atom_by_id("2").get_label_atom_id(), "CHC");
|
||||
BOOST_CHECK_EQUAL(s.get_atom_by_id("3").get_label_atom_id(), "CHB");
|
||||
BOOST_CHECK_EQUAL(s.get_atom_by_id("4").get_label_atom_id(), "CHD");
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BOOST_AUTO_TEST_CASE(atom_numbers_1)
|
||||
{
|
||||
const std::filesystem::path test1(gTestDir / ".." / "examples" / "1cbs.cif.gz");
|
||||
cif::file file(test1.string());
|
||||
cif::mm::structure structure(file);
|
||||
|
||||
auto &db = file.front();
|
||||
|
||||
auto &atoms = structure.atoms();
|
||||
auto ai = atoms.begin();
|
||||
|
||||
for (const auto &[id, label_asym_id, label_seq_id, label_atom_id, auth_seq_id, label_comp_id] :
|
||||
db["atom_site"].rows<std::string,std::string,int,std::string,std::string,std::string>("id", "label_asym_id", "label_seq_id", "label_atom_id", "auth_seq_id", "label_comp_id"))
|
||||
{
|
||||
auto atom = structure.get_atom_by_id(id);
|
||||
|
||||
BOOST_CHECK_EQUAL(atom.get_label_asym_id(), label_asym_id);
|
||||
BOOST_CHECK_EQUAL(atom.get_label_seq_id(), label_seq_id);
|
||||
BOOST_CHECK_EQUAL(atom.get_label_atom_id(), label_atom_id);
|
||||
BOOST_CHECK_EQUAL(atom.get_auth_seq_id(), auth_seq_id);
|
||||
BOOST_CHECK_EQUAL(atom.get_label_comp_id(), label_comp_id);
|
||||
|
||||
BOOST_ASSERT(ai != atoms.end());
|
||||
|
||||
BOOST_CHECK_EQUAL(ai->id(), id);
|
||||
++ai;
|
||||
}
|
||||
|
||||
BOOST_ASSERT(ai == atoms.end());
|
||||
}
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_load_2)
|
||||
{
|
||||
using namespace cif::literals;
|
||||
|
||||
const std::filesystem::path example(gTestDir / ".." / "examples" / "1cbs.cif.gz");
|
||||
cif::file file(example.string());
|
||||
|
||||
auto &db = file.front();
|
||||
|
||||
cif::mm::structure s(file);
|
||||
|
||||
BOOST_CHECK(s.polymers().size() == 1);
|
||||
|
||||
auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"];
|
||||
|
||||
for (auto &poly : s.polymers())
|
||||
{
|
||||
BOOST_CHECK_EQUAL(poly.size(), pdbx_poly_seq_scheme.find("asym_id"_key == poly.get_asym_id()).size());
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(remove_residue_1)
|
||||
{
|
||||
using namespace cif::literals;
|
||||
|
||||
const std::filesystem::path example(gTestDir / ".." / "examples" / "1cbs.cif.gz");
|
||||
cif::file file(example.string());
|
||||
|
||||
cif::mm::structure s(file);
|
||||
s.remove_residue(s.get_residue("B"));
|
||||
|
||||
BOOST_CHECK_NO_THROW(s.validate_atoms());
|
||||
}
|
||||
@@ -1,57 +0,0 @@
|
||||
#include "../include/cif++/Cif++.hpp"
|
||||
#include "../include/cif++/PDB2Cif.hpp"
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
// #include "pdb2cif.h"
|
||||
|
||||
namespace po = boost::program_options;
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
using namespace std::literals;
|
||||
|
||||
po::options_description desc("pdb2cif-test options");
|
||||
desc.add_options()
|
||||
("input,i", po::value<std::string>(), "Input file")
|
||||
("help,h", "Display help message")
|
||||
("version", "Print version")
|
||||
("verbose,v", "Verbose output")
|
||||
("debug,d", po::value<int>(), "Debug level (for even more verbose output)");
|
||||
|
||||
po::positional_options_description p;
|
||||
p.add("input", 1);
|
||||
|
||||
po::variables_map vm;
|
||||
po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
|
||||
po::notify(vm);
|
||||
|
||||
if (vm.count("version"))
|
||||
{
|
||||
std::cout << argv[0] << " version " PACKAGE_VERSION << std::endl;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (vm.count("help") or vm.count("input") == 0)
|
||||
{
|
||||
std::cerr << desc << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cif::VERBOSE = vm.count("verbose") != 0;
|
||||
if (vm.count("debug"))
|
||||
cif::VERBOSE = vm["debug"].as<int>();
|
||||
|
||||
std::ifstream is(vm["input"].as<std::string>());
|
||||
if (not is.is_open())
|
||||
throw std::runtime_error("Could not open file " + vm["input"].as<std::string>());
|
||||
|
||||
cif::File f;
|
||||
ReadPDBFile(is, f);
|
||||
f.save(std::cout);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,14 +1,34 @@
|
||||
#include "../include/cif++/Cif++.hpp"
|
||||
#include "../include/cif++/PDB2Cif.hpp"
|
||||
#include "../include/cif++/Structure.hpp"
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cif++.hpp>
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
namespace po = boost::program_options;
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
cif::VERBOSE = 3;
|
||||
@@ -20,21 +40,29 @@ int main(int argc, char* argv[])
|
||||
if (argc == 3)
|
||||
testdir = argv[2];
|
||||
|
||||
if (std::filesystem::exists(testdir / ".."/"data"/"ccd-subset.cif"))
|
||||
cif::addFileResource("components.cif", testdir / ".."/"data"/"ccd-subset.cif");
|
||||
if (std::filesystem::exists(testdir / ".." / "data" / "ccd-subset.cif"))
|
||||
cif::add_file_resource("components.cif", testdir / ".." / "data" / "ccd-subset.cif");
|
||||
|
||||
mmcif::CompoundFactory::instance().pushDictionary(testdir / "REA.cif");
|
||||
mmcif::CompoundFactory::instance().pushDictionary(testdir / "RXA.cif");
|
||||
if (std::filesystem::exists(testdir / ".." / "rsrc" / "mmcif_pdbx.dic"))
|
||||
cif::add_file_resource("mmcif_pdbx.dic", testdir / ".." / "rsrc" / "mmcif_pdbx.dic");
|
||||
|
||||
mmcif::File f(testdir / ".."/"examples"/"1cbs.cif.gz");
|
||||
mmcif::Structure structure(f);
|
||||
cif::compound_factory::instance().push_dictionary(testdir / "REA.cif");
|
||||
cif::compound_factory::instance().push_dictionary(testdir / "RXA.cif");
|
||||
|
||||
auto &res = structure.getResidue("B", "REA");
|
||||
structure.changeResidue(res, "RXA", {});
|
||||
cif::file f(testdir / ".."/"examples"/"1cbs.cif.gz");
|
||||
cif::mm::structure structure(f);
|
||||
|
||||
structure.cleanupEmptyCategories();
|
||||
auto &res = structure.get_residue("B");
|
||||
structure.change_residue(res, "RXA", {});
|
||||
|
||||
f.file().save(std::cout);
|
||||
structure.cleanup_empty_categories();
|
||||
|
||||
f.save(std::cout);
|
||||
|
||||
if (not f.is_valid())
|
||||
throw std::runtime_error("Invalid");
|
||||
|
||||
f.save(std::cout);
|
||||
}
|
||||
catch (const std::exception& e)
|
||||
{
|
||||
|
||||
@@ -1,184 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define BOOST_TEST_ALTERNATIVE_INIT_API
|
||||
#include <boost/test/included/unit_test.hpp>
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include "cif++/Cif++.hpp"
|
||||
#include "cif++/Structure.hpp"
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
cif::File operator""_cf(const char* text, size_t length)
|
||||
{
|
||||
struct membuf : public std::streambuf
|
||||
{
|
||||
membuf(char* text, size_t length)
|
||||
{
|
||||
this->setg(text, text, text + length);
|
||||
}
|
||||
} buffer(const_cast<char*>(text), length);
|
||||
|
||||
std::istream is(&buffer);
|
||||
return cif::File(is);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::filesystem::path gTestDir = std::filesystem::current_path();
|
||||
|
||||
bool init_unit_test()
|
||||
{
|
||||
cif::VERBOSE = 1;
|
||||
|
||||
// not a test, just initialize test dir
|
||||
if (boost::unit_test::framework::master_test_suite().argc == 2)
|
||||
gTestDir = boost::unit_test::framework::master_test_suite().argv[1];
|
||||
|
||||
// do this now, avoids the need for installing
|
||||
cif::addFileResource("mmcif_pdbx_v50.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx_v50.dic");
|
||||
|
||||
// initialize CCD location
|
||||
cif::addFileResource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");
|
||||
|
||||
mmcif::CompoundFactory::instance().pushDictionary(gTestDir / "HEM.cif");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BOOST_AUTO_TEST_CASE(create_nonpoly_1)
|
||||
{
|
||||
cif::VERBOSE = 1;
|
||||
|
||||
// do this now, avoids the need for installing
|
||||
cif::addFileResource("mmcif_pdbx_v50.dic", "../rsrc/mmcif_pdbx_v50.dic");
|
||||
|
||||
mmcif::File file;
|
||||
file.file().loadDictionary("mmcif_pdbx_v50.dic");
|
||||
file.createDatablock("TEST"); // create a datablock
|
||||
|
||||
mmcif::Structure structure(file);
|
||||
|
||||
std::string entity_id = structure.createNonPolyEntity("HEM");
|
||||
|
||||
auto atoms = R"(
|
||||
data_HEM
|
||||
loop_
|
||||
_atom_site.group_PDB
|
||||
_atom_site.type_symbol
|
||||
_atom_site.label_atom_id
|
||||
_atom_site.label_alt_id
|
||||
_atom_site.pdbx_PDB_ins_code
|
||||
_atom_site.Cartn_x
|
||||
_atom_site.Cartn_y
|
||||
_atom_site.Cartn_z
|
||||
_atom_site.occupancy
|
||||
_atom_site.B_iso_or_equiv
|
||||
_atom_site.pdbx_formal_charge
|
||||
HETATM C CHA . ? -5.248 39.769 -0.250 1.00 7.67 ?
|
||||
HETATM C CHB . ? -3.774 36.790 3.280 1.00 7.05 ?
|
||||
HETATM C CHC . ? -2.879 33.328 0.013 1.00 7.69 ?
|
||||
HETATM C CHD . ? -4.342 36.262 -3.536 1.00 8.00 ?
|
||||
# that's enough to test with
|
||||
)"_cf;
|
||||
|
||||
auto &hem_data = atoms["HEM"];
|
||||
auto &atom_site = hem_data["atom_site"];
|
||||
|
||||
auto hem_atoms = atom_site.rows();
|
||||
std::vector<mmcif::Atom> atom_data;
|
||||
for (auto &hem_atom: hem_atoms)
|
||||
atom_data.emplace_back(hem_data, hem_atom);
|
||||
|
||||
structure.createNonpoly(entity_id, atom_data);
|
||||
|
||||
auto expected = R"(
|
||||
data_TEST
|
||||
#
|
||||
loop_
|
||||
_atom_site.id
|
||||
_atom_site.auth_asym_id
|
||||
_atom_site.label_alt_id
|
||||
_atom_site.label_asym_id
|
||||
_atom_site.label_atom_id
|
||||
_atom_site.label_comp_id
|
||||
_atom_site.label_entity_id
|
||||
_atom_site.label_seq_id
|
||||
_atom_site.type_symbol
|
||||
_atom_site.group_PDB
|
||||
_atom_site.pdbx_PDB_ins_code
|
||||
_atom_site.Cartn_x
|
||||
_atom_site.Cartn_y
|
||||
_atom_site.Cartn_z
|
||||
_atom_site.occupancy
|
||||
_atom_site.B_iso_or_equiv
|
||||
_atom_site.pdbx_formal_charge
|
||||
_atom_site.auth_seq_id
|
||||
_atom_site.auth_comp_id
|
||||
_atom_site.auth_atom_id
|
||||
_atom_site.pdbx_PDB_model_num
|
||||
1 A ? A CHA HEM 1 . C HETATM ? -5.248 39.769 -0.250 1.00 7.67 ? ? HEM CHA 1
|
||||
2 A ? A CHB HEM 1 . C HETATM ? -3.774 36.790 3.280 1.00 7.05 ? ? HEM CHB 1
|
||||
3 A ? A CHC HEM 1 . C HETATM ? -2.879 33.328 0.013 1.00 7.69 ? ? HEM CHC 1
|
||||
4 A ? A CHD HEM 1 . C HETATM ? -4.342 36.262 -3.536 1.00 8.00 ? ? HEM CHD 1
|
||||
#
|
||||
_chem_comp.id HEM
|
||||
_chem_comp.type NON-POLYMER
|
||||
_chem_comp.name 'PROTOPORPHYRIN IX CONTAINING FE'
|
||||
_chem_comp.formula 'C34 H32 Fe N4 O4'
|
||||
_chem_comp.formula_weight 616.487000
|
||||
#
|
||||
_pdbx_entity_nonpoly.entity_id 1
|
||||
_pdbx_entity_nonpoly.name 'PROTOPORPHYRIN IX CONTAINING FE'
|
||||
_pdbx_entity_nonpoly.comp_id HEM
|
||||
#
|
||||
_entity.id 1
|
||||
_entity.type non-polymer
|
||||
_entity.pdbx_description 'PROTOPORPHYRIN IX CONTAINING FE'
|
||||
_entity.formula_weight 616.487000
|
||||
#
|
||||
_struct_asym.id A
|
||||
_struct_asym.entity_id 1
|
||||
_struct_asym.pdbx_blank_PDB_chainid_flag N
|
||||
_struct_asym.pdbx_modified N
|
||||
_struct_asym.details ?
|
||||
#
|
||||
)"_cf;
|
||||
|
||||
expected.loadDictionary("mmcif_pdbx_v50.dic");
|
||||
|
||||
if (not (expected.firstDatablock() == structure.getFile().data()))
|
||||
{
|
||||
BOOST_TEST(false);
|
||||
std::cout << expected.firstDatablock() << std::endl
|
||||
<< std::endl
|
||||
<< structure.getFile().data() << std::endl;
|
||||
}
|
||||
}
|
||||
210
test/sugar-test.cpp
Normal file
210
test/sugar-test.cpp
Normal file
@@ -0,0 +1,210 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define BOOST_TEST_ALTERNATIVE_INIT_API
|
||||
#include <boost/test/included/unit_test.hpp>
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include <cif++.hpp>
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
cif::file operator""_cf(const char* text, size_t length)
|
||||
{
|
||||
struct membuf : public std::streambuf
|
||||
{
|
||||
membuf(char* text, size_t length)
|
||||
{
|
||||
this->setg(text, text, text + length);
|
||||
}
|
||||
} buffer(const_cast<char*>(text), length);
|
||||
|
||||
std::istream is(&buffer);
|
||||
return cif::file(is);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::filesystem::path gTestDir = std::filesystem::current_path();
|
||||
|
||||
bool init_unit_test()
|
||||
{
|
||||
cif::VERBOSE = 1;
|
||||
|
||||
// not a test, just initialize test dir
|
||||
if (boost::unit_test::framework::master_test_suite().argc == 2)
|
||||
gTestDir = boost::unit_test::framework::master_test_suite().argv[1];
|
||||
|
||||
// do this now, avoids the need for installing
|
||||
cif::add_file_resource("mmcif_pdbx.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic");
|
||||
|
||||
// initialize CCD location
|
||||
cif::add_file_resource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");
|
||||
|
||||
cif::compound_factory::instance().push_dictionary(gTestDir / "HEM.cif");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BOOST_AUTO_TEST_CASE(sugar_name_1)
|
||||
{
|
||||
using namespace cif::literals;
|
||||
|
||||
const std::filesystem::path example(gTestDir / "1juh.cif.gz");
|
||||
cif::file file(example.string());
|
||||
cif::mm::structure s(file);
|
||||
|
||||
auto &db = s.get_datablock();
|
||||
auto &entity = db["entity"];
|
||||
|
||||
auto &branches = s.branches();
|
||||
|
||||
BOOST_CHECK_EQUAL(branches.size(), 4);
|
||||
|
||||
for (auto &branch : branches)
|
||||
{
|
||||
auto entityID = branch.front().get_entity_id();
|
||||
|
||||
auto name = entity.find1<std::string>("id"_key == entityID, "pdbx_description");
|
||||
BOOST_CHECK_EQUAL(branch.name(), name);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BOOST_AUTO_TEST_CASE(create_sugar_1)
|
||||
{
|
||||
using namespace cif::literals;
|
||||
|
||||
const std::filesystem::path example(gTestDir / "1juh.cif.gz");
|
||||
cif::file file(example.string());
|
||||
cif::mm::structure s(file);
|
||||
|
||||
// collect atoms from asym L first
|
||||
auto &NAG = s.get_residue("L");
|
||||
auto nagAtoms = NAG.atoms();
|
||||
|
||||
std::vector<cif::row_initializer> ai;
|
||||
|
||||
auto &db = s.get_datablock();
|
||||
auto &as = db["atom_site"];
|
||||
|
||||
// NOTE, row_initializer does not actually hold the data, so copy it first
|
||||
// before it gets destroyed by remove_residue
|
||||
|
||||
for (auto r : as.find("label_asym_id"_key == "L"))
|
||||
/*auto &ri = */ai.emplace_back(r);
|
||||
|
||||
s.remove_residue(NAG);
|
||||
|
||||
auto &branch = s.create_branch(ai);
|
||||
|
||||
BOOST_CHECK_EQUAL(branch.name(), "2-acetamido-2-deoxy-beta-D-glucopyranose");
|
||||
BOOST_CHECK_EQUAL(branch.size(), 1);
|
||||
|
||||
BOOST_CHECK_EQUAL(branch[0].atoms().size(), nagAtoms.size());
|
||||
|
||||
BOOST_CHECK(file.is_valid());
|
||||
|
||||
file.save(gTestDir / "test-create_sugar_1.cif");
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BOOST_AUTO_TEST_CASE(create_sugar_2)
|
||||
{
|
||||
using namespace cif::literals;
|
||||
|
||||
const std::filesystem::path example(gTestDir / "1juh.cif.gz");
|
||||
cif::file file(example.string());
|
||||
cif::mm::structure s(file);
|
||||
|
||||
// Get branch for H
|
||||
auto &bH = s.get_branch_by_asym_id("H");
|
||||
|
||||
BOOST_CHECK_EQUAL(bH.size(), 2);
|
||||
|
||||
std::vector<cif::row_initializer> ai[2];
|
||||
|
||||
auto &db = s.get_datablock();
|
||||
auto &as = db["atom_site"];
|
||||
|
||||
for (size_t i = 0; i < 2; ++i)
|
||||
{
|
||||
for (auto r : as.find("label_asym_id"_key == "H" and "auth_seq_id"_key == i + 1))
|
||||
/*auto &ri = */ai[i].emplace_back(r);
|
||||
}
|
||||
|
||||
s.remove_branch(bH);
|
||||
|
||||
BOOST_CHECK(file.is_valid());
|
||||
|
||||
auto &bN = s.create_branch(ai[0]);
|
||||
s.extend_branch(bN.get_asym_id(), ai[1], 1, "O4");
|
||||
|
||||
BOOST_CHECK_EQUAL(bN.name(), "2-acetamido-2-deoxy-beta-D-glucopyranose-(1-4)-2-acetamido-2-deoxy-beta-D-glucopyranose");
|
||||
BOOST_CHECK_EQUAL(bN.size(), 2);
|
||||
|
||||
BOOST_CHECK(file.is_valid());
|
||||
|
||||
file.save(gTestDir / "test-create_sugar_2.cif");
|
||||
|
||||
BOOST_CHECK_NO_THROW(cif::mm::structure s2(file));
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BOOST_AUTO_TEST_CASE(delete_sugar_1)
|
||||
{
|
||||
using namespace cif::literals;
|
||||
|
||||
const std::filesystem::path example(gTestDir / "1juh.cif.gz");
|
||||
cif::file file(example.string());
|
||||
cif::mm::structure s(file);
|
||||
|
||||
// Get branch for H
|
||||
auto &bG = s.get_branch_by_asym_id("G");
|
||||
|
||||
BOOST_CHECK_EQUAL(bG.size(), 4);
|
||||
|
||||
s.remove_residue(bG[1]);
|
||||
|
||||
BOOST_CHECK_EQUAL(bG.size(), 1);
|
||||
|
||||
auto &bN = s.get_branch_by_asym_id("G");
|
||||
|
||||
BOOST_CHECK_EQUAL(bN.name(), "2-acetamido-2-deoxy-beta-D-glucopyranose");
|
||||
BOOST_CHECK_EQUAL(bN.size(), 1);
|
||||
|
||||
BOOST_CHECK(file.is_valid());
|
||||
|
||||
// file.save(gTestDir / "test-create_sugar_3.cif");
|
||||
|
||||
BOOST_CHECK_NO_THROW(cif::mm::structure s2(file));
|
||||
}
|
||||
168
test/unit-3d-test.cpp
Normal file
168
test/unit-3d-test.cpp
Normal file
@@ -0,0 +1,168 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define BOOST_TEST_ALTERNATIVE_INIT_API
|
||||
#include <boost/test/included/unit_test.hpp>
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include <cif++.hpp>
|
||||
|
||||
#include <cif++/dictionary_parser.hpp>
|
||||
#include <cif++/parser.hpp>
|
||||
|
||||
namespace tt = boost::test_tools;
|
||||
|
||||
std::filesystem::path gTestDir = std::filesystem::current_path(); // filled in first test
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
cif::file operator""_cf(const char *text, size_t length)
|
||||
{
|
||||
struct membuf : public std::streambuf
|
||||
{
|
||||
membuf(char *text, size_t length)
|
||||
{
|
||||
this->setg(text, text, text + length);
|
||||
}
|
||||
} buffer(const_cast<char *>(text), length);
|
||||
|
||||
std::istream is(&buffer);
|
||||
return cif::file(is);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
bool init_unit_test()
|
||||
{
|
||||
cif::VERBOSE = 1;
|
||||
|
||||
// not a test, just initialize test dir
|
||||
if (boost::unit_test::framework::master_test_suite().argc == 2)
|
||||
gTestDir = boost::unit_test::framework::master_test_suite().argv[1];
|
||||
|
||||
// do this now, avoids the need for installing
|
||||
cif::add_file_resource("mmcif_pdbx.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic");
|
||||
|
||||
// initialize CCD location
|
||||
cif::add_file_resource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
// 3d tests
|
||||
|
||||
BOOST_AUTO_TEST_CASE(t1)
|
||||
{
|
||||
// std::random_device rnd;
|
||||
// std::mt19937 gen(rnd());
|
||||
// std::uniform_real_distribution<float> dis(0, 1);
|
||||
|
||||
// Quaternion q{ dis(gen), dis(gen), dis(gen), dis(gen) };
|
||||
// q = Normalize(q);
|
||||
|
||||
// Quaternion q{ 0.1, 0.2, 0.3, 0.4 };
|
||||
cif::quaternion q{0.5, 0.5, 0.5, 0.5};
|
||||
q = normalize(q);
|
||||
|
||||
const auto &&[angle0, axis0] = cif::quaternion_to_angle_axis(q);
|
||||
|
||||
std::vector<cif::point> p1{
|
||||
{16.979, 13.301, 44.555},
|
||||
{18.150, 13.525, 43.680},
|
||||
{18.656, 14.966, 43.784},
|
||||
{17.890, 15.889, 44.078},
|
||||
{17.678, 13.270, 42.255},
|
||||
{16.248, 13.734, 42.347},
|
||||
{15.762, 13.216, 43.724}};
|
||||
|
||||
auto p2 = p1;
|
||||
|
||||
cif::center_points(p1);
|
||||
|
||||
for (auto &p : p2)
|
||||
p.rotate(q);
|
||||
|
||||
cif::center_points(p2);
|
||||
|
||||
auto q2 = cif::align_points(p1, p2);
|
||||
|
||||
const auto &&[angle, axis] = cif::quaternion_to_angle_axis(q2);
|
||||
|
||||
BOOST_TEST(std::fmod(360 + angle, 360) == std::fmod(360 - angle0, 360), tt::tolerance(0.01));
|
||||
|
||||
for (auto &p : p1)
|
||||
p.rotate(q2);
|
||||
|
||||
float rmsd = cif::RMSd(p1, p2);
|
||||
|
||||
BOOST_TEST(rmsd < 1e-5);
|
||||
|
||||
// std::cout << "rmsd: " << RMSd(p1, p2) << std::endl;
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(t2)
|
||||
{
|
||||
cif::point p[] = {
|
||||
{ 1, 1, 0 },
|
||||
{ 2, 1, 0 },
|
||||
{ 1, 2, 0 }
|
||||
};
|
||||
|
||||
cif::point xp = cif::cross_product(p[1] - p[0], p[2] - p[0]);
|
||||
|
||||
auto q = cif::construct_from_angle_axis(45, xp); //mmcif::Normalize(Quaternion{45 * mmcif::kPI / 180, xp.mX, xp.mY, xp.mZ});
|
||||
|
||||
auto &&[angle, axis] = cif::quaternion_to_angle_axis(q);
|
||||
|
||||
BOOST_TEST(angle == 45, tt::tolerance(0.01));
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(t3)
|
||||
{
|
||||
cif::point p[] = {
|
||||
{ 1, 1, 0 },
|
||||
{ 2, 1, 0 },
|
||||
{ 1, 2, 0 }
|
||||
};
|
||||
|
||||
cif::point xp = cif::cross_product(p[1] - p[0], p[2] - p[0]);
|
||||
|
||||
auto q = cif::construct_from_angle_axis(45, xp); //mmcif::Normalize(Quaternion{45 * mmcif::kPI / 180, xp.mX, xp.mY, xp.mZ});
|
||||
|
||||
auto v = p[1];
|
||||
v -= p[0];
|
||||
v.rotate(q);
|
||||
v += p[0];
|
||||
|
||||
std::cout << v << std::endl;
|
||||
|
||||
double a = cif::angle(v, p[0], p[1]);
|
||||
|
||||
BOOST_TEST(a == 45, tt::tolerance(0.01));
|
||||
}
|
||||
1690
test/unit-test.cpp
1690
test/unit-test.cpp
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user