mirror of
https://github.com/PDB-REDO/libcifpp.git
synced 2026-06-04 22:14:24 +08:00
Compare commits
250 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
560f6debc6 | ||
|
|
ea1ac33de8 | ||
|
|
7ea30237ae | ||
|
|
bc668487e2 | ||
|
|
1769f9864b | ||
|
|
75ffd97802 | ||
|
|
cfd5b7da0f | ||
|
|
26b7d1df26 | ||
|
|
0747929cd6 | ||
|
|
5bcfb102f4 | ||
|
|
908fb1ccea | ||
|
|
af8389baa4 | ||
|
|
24ca1017cd | ||
|
|
85c21aeb01 | ||
|
|
2f249048d9 | ||
|
|
974cb40ab3 | ||
|
|
c01c16ea60 | ||
|
|
bd157c249c | ||
|
|
b0ac33c1b1 | ||
|
|
82e73a9525 | ||
|
|
adc316d671 | ||
|
|
6a0b6b99ac | ||
|
|
08dd9dd5b4 | ||
|
|
557a1c2d00 | ||
|
|
f77bbfedda | ||
|
|
3aa3fe19e2 | ||
|
|
35fcc0493e | ||
|
|
9485bec2fa | ||
|
|
4b759e731c | ||
|
|
7dd6a8a1aa | ||
|
|
96725ae8b9 | ||
|
|
b3a0ded9a8 | ||
|
|
184c491803 | ||
|
|
f944b3ce00 | ||
|
|
2557f41863 | ||
|
|
2b92cee3f7 | ||
|
|
8071768579 | ||
|
|
71c8541b68 | ||
|
|
3d66c77188 | ||
|
|
8701512961 | ||
|
|
b317c780ba | ||
|
|
681aa3bf8b | ||
|
|
a68e053471 | ||
|
|
25a90e3b32 | ||
|
|
2f62759dfe | ||
|
|
cf9ec46ab8 | ||
|
|
ecbef51b10 | ||
|
|
dfff8c9587 | ||
|
|
cc5d52bbf9 | ||
|
|
a9e9f86c93 | ||
|
|
a2c52713b2 | ||
|
|
545aca88d8 | ||
|
|
ac27248784 | ||
|
|
5758bfbaea | ||
|
|
8d3a079774 | ||
|
|
718c138510 | ||
|
|
29aac70e67 | ||
|
|
700575adfe | ||
|
|
9fe6e5df85 | ||
|
|
ce7434a463 | ||
|
|
ad7d876d07 | ||
|
|
0dc19e86fa | ||
|
|
a12acaa5c7 | ||
|
|
ff62efe720 | ||
|
|
2407877184 | ||
|
|
5fde050738 | ||
|
|
a855f88073 | ||
|
|
cfa2acd61d | ||
|
|
d9db2fe2e7 | ||
|
|
15d62cd3b6 | ||
|
|
19a89aeb7e | ||
|
|
677c61c32f | ||
|
|
4dd4f66397 | ||
|
|
04b7828abc | ||
|
|
9c621ecab8 | ||
|
|
ab9c4d9416 | ||
|
|
e5eb62255a | ||
|
|
98ff79432b | ||
|
|
24fa80ba2a | ||
|
|
3999d792ef | ||
|
|
4db3732749 | ||
|
|
07131e8b40 | ||
|
|
39b91e74c9 | ||
|
|
d4bb7ec3bc | ||
|
|
6175b7e359 | ||
|
|
10442d506a | ||
|
|
573a695c3d | ||
|
|
a76bef0d01 | ||
|
|
e20111b566 | ||
|
|
4a1d9c8f75 | ||
|
|
26c86282e3 | ||
|
|
0eaeb1650d | ||
|
|
f4a6533f6b | ||
|
|
df1b6a13e1 | ||
|
|
e8f24f617c | ||
|
|
9454fdc217 | ||
|
|
22543d8fe5 | ||
|
|
60d1dc82e6 | ||
|
|
87486f87ef | ||
|
|
80e7da0f13 | ||
|
|
3745beae66 | ||
|
|
3965840bfa | ||
|
|
a88c6f3d32 | ||
|
|
ed6c6f0026 | ||
|
|
bdda9d72b5 | ||
|
|
fd080e778e | ||
|
|
9f72df2ecd | ||
|
|
617db012f0 | ||
|
|
9d15541237 | ||
|
|
35c99564c6 | ||
|
|
1d8fe334d6 | ||
|
|
d86bb314ac | ||
|
|
0ef8eb59f8 | ||
|
|
b5fe4a9a87 | ||
|
|
11fea31b98 | ||
|
|
f629275ed5 | ||
|
|
a5f6166469 | ||
|
|
501050e591 | ||
|
|
e1b240b2b2 | ||
|
|
3d79278ed7 | ||
|
|
5e0b197a43 | ||
|
|
9c4170d9e2 | ||
|
|
af721eb196 | ||
|
|
788e315f5e | ||
|
|
4a82a8d5a8 | ||
|
|
11019a26f8 | ||
|
|
6f8909dce9 | ||
|
|
5525103aaf | ||
|
|
291ef737b1 | ||
|
|
af125bdd57 | ||
|
|
79089bbb8c | ||
|
|
1f08498d00 | ||
|
|
49ba714a03 | ||
|
|
85fd9296b2 | ||
|
|
1cda14867f | ||
|
|
2d2b26f7dc | ||
|
|
93b33af44a | ||
|
|
eb80490bcd | ||
|
|
ba2b06f5af | ||
|
|
fecc762db1 | ||
|
|
1e406253ab | ||
|
|
6e3b85f43d | ||
|
|
58f1b626e2 | ||
|
|
c104a08e16 | ||
|
|
dd0f6ca1e6 | ||
|
|
f02ea91b51 | ||
|
|
6768a501a3 | ||
|
|
879e15c759 | ||
|
|
89285b4abc | ||
|
|
c584714f91 | ||
|
|
f5016403b7 | ||
|
|
c8f66ae6bb | ||
|
|
858c967e71 | ||
|
|
f9ca5de5bf | ||
|
|
252c3476a1 | ||
|
|
19210df6db | ||
|
|
15c5730749 | ||
|
|
3764adb7ef | ||
|
|
9160adb1cf | ||
|
|
3ebf4338ab | ||
|
|
2eb4b7b39b | ||
|
|
c241e49b48 | ||
|
|
238c881132 | ||
|
|
49dc733536 | ||
|
|
755bd78f60 | ||
|
|
77f80cd51f | ||
|
|
3df6000635 | ||
|
|
5efee2b40d | ||
|
|
f3c2e59184 | ||
|
|
24ab660e6e | ||
|
|
6c0a418068 | ||
|
|
07a180991e | ||
|
|
4732004b67 | ||
|
|
faa9cd0431 | ||
|
|
e0c3c2394d | ||
|
|
2dec584f54 | ||
|
|
5ab2ccae40 | ||
|
|
1017d08626 | ||
|
|
32b1bbd943 | ||
|
|
1abf31ffa5 | ||
|
|
aec60829d2 | ||
|
|
888c3c38c2 | ||
|
|
e2c4648037 | ||
|
|
f7b98c0530 | ||
|
|
d4bd3faa16 | ||
|
|
c4f3b1cd7b | ||
|
|
74add69a83 | ||
|
|
a490b19d24 | ||
|
|
44cfa2c1a2 | ||
|
|
6dd9522b3f | ||
|
|
5e352cb8e4 | ||
|
|
2fad7315b8 | ||
|
|
520759dfe8 | ||
|
|
577b44ae11 | ||
|
|
66f742d6c0 | ||
|
|
7ba9f688c7 | ||
|
|
883f0307a2 | ||
|
|
c9719f873f | ||
|
|
123d25f853 | ||
|
|
56da42db84 | ||
|
|
7f820449ca | ||
|
|
ecb2cf5f11 | ||
|
|
7f27da9b3b | ||
|
|
01eb499c69 | ||
|
|
1ff6f70682 | ||
|
|
ddde996e10 | ||
|
|
1c9212c7e0 | ||
|
|
a568143991 | ||
|
|
2b6f1bd9ee | ||
|
|
2527aa5ea6 | ||
|
|
4c28091ecd | ||
|
|
d49725423e | ||
|
|
fcb4dc61b5 | ||
|
|
b7330c074f | ||
|
|
e8f4123030 | ||
|
|
975057c4c4 | ||
|
|
a0e01668d1 | ||
|
|
2c77491416 | ||
|
|
be19e4a9cb | ||
|
|
61ce91a9d7 | ||
|
|
18f1d07e85 | ||
|
|
b596976194 | ||
|
|
1f6b86d516 | ||
|
|
31499b977d | ||
|
|
f83850e380 | ||
|
|
1a4ccd86fe | ||
|
|
5c3c6fec09 | ||
|
|
f97e742daa | ||
|
|
7f39d401e2 | ||
|
|
af412c284d | ||
|
|
874cd3bae5 | ||
|
|
ea28ebdd13 | ||
|
|
3ba468933f | ||
|
|
45f33e4bea | ||
|
|
021487ed16 | ||
|
|
cb3443ffb1 | ||
|
|
6b2c9dc3e3 | ||
|
|
7513cc1947 | ||
|
|
c98b8ae5c9 | ||
|
|
ab2dd4b75f | ||
|
|
be77316545 | ||
|
|
cdfb0d9497 | ||
|
|
71f7e7c741 | ||
|
|
cff099596e | ||
|
|
e182604455 | ||
|
|
45a7defb7e | ||
|
|
906f6ac1ea | ||
|
|
8d96e513bd | ||
|
|
cdefd063e2 | ||
|
|
8bbcba76cf |
9
.gitignore
vendored
9
.gitignore
vendored
@@ -6,9 +6,12 @@ tools/symop-map-generator
|
||||
test/unit-test
|
||||
test/pdb2cif-test
|
||||
test/rename-compound-test
|
||||
tools/update-dictionary-script
|
||||
data/
|
||||
tools/update-libcifpp-data
|
||||
data/components.cif*
|
||||
CMakeSettings.json
|
||||
msvc/
|
||||
Testing/
|
||||
|
||||
rsrc/feature-request.txt
|
||||
test/test-create_sugar_?.cif
|
||||
test/oprofile_data/
|
||||
test/perf.data*
|
||||
|
||||
6
.gitmodules
vendored
Normal file
6
.gitmodules
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
[submodule "regex"]
|
||||
path = regex
|
||||
url = https://github.com/boostorg/regex
|
||||
[submodule "gxrio"]
|
||||
path = gxrio
|
||||
url = https://github.com/mhekkel/gxrio.git
|
||||
33
.travis.yml
33
.travis.yml
@@ -1,33 +0,0 @@
|
||||
language: cpp
|
||||
|
||||
os:
|
||||
- linux
|
||||
- osx
|
||||
|
||||
dist: focal
|
||||
|
||||
osx_image:
|
||||
- xcode12
|
||||
|
||||
compiler:
|
||||
- gcc
|
||||
- clang
|
||||
|
||||
addons:
|
||||
apt:
|
||||
packages:
|
||||
- libboost-all-dev
|
||||
|
||||
before_install:
|
||||
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then brew install make; fi
|
||||
|
||||
script:
|
||||
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then ./configure --disable-shared --disable-revision --disable-download-ccd ; else ./configure --disable-revision --disable-download-ccd ; fi
|
||||
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then gmake ; else make ; fi
|
||||
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then gmake test ; else make test ; fi
|
||||
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then sudo gmake install ; else sudo make install; fi
|
||||
|
||||
# jobs:
|
||||
# allow_failures:
|
||||
# - os: osx
|
||||
|
||||
468
CMakeLists.txt
468
CMakeLists.txt
@@ -6,10 +6,10 @@
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer
|
||||
# list of conditions and the following disclaimer
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
@@ -25,32 +25,28 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
# set the project name
|
||||
project(cifpp VERSION 2.0.1 LANGUAGES CXX)
|
||||
project(cifpp VERSION 5.0.0 LANGUAGES CXX)
|
||||
|
||||
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||
|
||||
enable_testing()
|
||||
|
||||
include(GNUInstallDirs)
|
||||
include(CheckFunctionExists)
|
||||
include(CheckIncludeFiles)
|
||||
include(CheckLibraryExists)
|
||||
include(CMakePackageConfigHelpers)
|
||||
include(Dart)
|
||||
include(FindFilesystem)
|
||||
include(GenerateExportHeader)
|
||||
include(CheckCXXSourceCompiles)
|
||||
|
||||
# include(Dart)
|
||||
include(AddGitSubmodule)
|
||||
|
||||
set(CXX_EXTENSIONS OFF)
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
set(CMAKE_CXX_STANDARD 20)
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
||||
|
||||
find_package(Filesystem REQUIRED)
|
||||
|
||||
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
|
||||
# https://stackoverflow.com/questions/63902528/program-crashes-when-filesystempath-is-destroyed
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
|
||||
elseif(MSVC)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
|
||||
endif()
|
||||
|
||||
# Building shared libraries?
|
||||
@@ -61,6 +57,31 @@ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
||||
|
||||
# Optionally build a version to be installed inside CCP4
|
||||
option(BUILD_FOR_CCP4 "Build a version to be installed in CCP4" OFF)
|
||||
|
||||
# Lots of code depend on the availability of the components.cif file
|
||||
option(CIFPP_DOWNLOAD_CCD "Download the CCD file components.cif during installation" ON)
|
||||
|
||||
# An optional cron script can be installed to keep the data files up-to-date
|
||||
if(UNIX)
|
||||
option(CIFPP_INSTALL_UPDATE_SCRIPT "Install the script to update CCD and dictionary files" OFF)
|
||||
endif()
|
||||
|
||||
# When CCP4 is sourced in the environment, we can recreate the symmetry operations table
|
||||
if(EXISTS "$ENV{CCP4}")
|
||||
if(EXISTS "$ENV{CLIBD}/syminfo.lib")
|
||||
option(CIFPP_RECREATE_SYMOP_DATA "Recreate SymOp data table in case it is out of date" ON)
|
||||
else()
|
||||
set(CIFPP_RECREATE_SYMOP_DATA OFF)
|
||||
message(WARNING "Symop data table recreation requested, but file syminfo.lib was not found in $ENV{CLIBD}")
|
||||
endif()
|
||||
else()
|
||||
set(CIFPP_RECREATE_SYMOP_DATA OFF)
|
||||
message("Not trying to recreate symop_table_data.hpp since CCP4 is not defined")
|
||||
endif()
|
||||
|
||||
# Unit tests
|
||||
option(ENABLE_TESTING "Build test exectuables" OFF)
|
||||
|
||||
if(BUILD_FOR_CCP4)
|
||||
if("$ENV{CCP4}" STREQUAL "" OR NOT EXISTS $ENV{CCP4})
|
||||
message(FATAL_ERROR "A CCP4 built was requested but CCP4 was not sourced")
|
||||
@@ -69,43 +90,18 @@ if(BUILD_FOR_CCP4)
|
||||
list(APPEND CMAKE_PREFIX_PATH "$ENV{CCP4}")
|
||||
set(CMAKE_INSTALL_PREFIX "$ENV{CCP4}")
|
||||
|
||||
# This is the only option:
|
||||
if(WIN32)
|
||||
set(BUILD_SHARED_LIBS ON)
|
||||
endif()
|
||||
endif("$ENV{CCP4}" STREQUAL "" OR NOT EXISTS $ENV{CCP4})
|
||||
endif()
|
||||
|
||||
# Check if CCP4 is available
|
||||
if(EXISTS "$ENV{CCP4}")
|
||||
set(CCP4 $ENV{CCP4})
|
||||
set(CLIBD ${CCP4}/lib/data)
|
||||
endif()
|
||||
if(CCP4 AND NOT CLIBD)
|
||||
set(CLIBD ${CCP4}/lib/data)
|
||||
endif()
|
||||
|
||||
# When CCP4 is sourced in the environment, we can recreate the symmetry operations table
|
||||
if(EXISTS "${CCP4}")
|
||||
if(RECREATE_SYMOP_DATA AND NOT EXISTS "${CLIBD}/syminfo.lib")
|
||||
message(WARNING "Symop data table recreation requested, but file syminfo.lib was not found in ${CLIBD}")
|
||||
set(RECREATE_SYMOP_DATA OFF)
|
||||
else()
|
||||
option(RECREATE_SYMOP_DATA "Recreate SymOp data table in case it is out of date" ON)
|
||||
endif()
|
||||
else()
|
||||
set(RECREATE_SYMOP_DATA OFF)
|
||||
message("Not trying to recreate SymOpTable_data.hpp since CCP4 is not defined")
|
||||
endif()
|
||||
|
||||
# set(CMAKE_DEBUG_POSTFIX d)
|
||||
|
||||
if(MSVC)
|
||||
# make msvc standards compliant...
|
||||
add_compile_options(/permissive-)
|
||||
# make msvc standards compliant...
|
||||
add_compile_options(/permissive-)
|
||||
|
||||
macro(get_WIN32_WINNT version)
|
||||
if (WIN32 AND CMAKE_SYSTEM_VERSION)
|
||||
if(WIN32 AND CMAKE_SYSTEM_VERSION)
|
||||
set(ver ${CMAKE_SYSTEM_VERSION})
|
||||
string(REPLACE "." "" ver ${ver})
|
||||
string(REGEX REPLACE "([0-9])" "0\\1" ver ${ver})
|
||||
@@ -116,187 +112,166 @@ if(MSVC)
|
||||
|
||||
get_WIN32_WINNT(ver)
|
||||
add_definitions(-D_WIN32_WINNT=${ver})
|
||||
|
||||
# On Windows, do not install in the system location
|
||||
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT AND NOT BUILD_FOR_CCP4)
|
||||
message(STATUS "The library and auxiliary files will be installed in $ENV{LOCALAPPDATA}/${PROJECT_NAME}")
|
||||
set(CMAKE_INSTALL_PREFIX "$ENV{LOCALAPPDATA}/${PROJECT_NAME}" CACHE PATH "..." FORCE)
|
||||
endif()
|
||||
|
||||
# Find out the processor type for the target
|
||||
if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
|
||||
set(COFF_TYPE "x64")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386")
|
||||
set(COFF_TYPE "x86")
|
||||
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ARM64")
|
||||
set(COFF_TYPE "arm64")
|
||||
else()
|
||||
message(FATAL_ERROR "Unsupported or unknown processor type ${CMAKE_SYSTEM_PROCESSOR}")
|
||||
endif()
|
||||
|
||||
set(COFF_SPEC "--coff=${COFF_TYPE}")
|
||||
|
||||
# for mrc, just in case
|
||||
list(APPEND CMAKE_PREFIX_PATH "$ENV{LOCALAPPDATA}/mrc")
|
||||
endif()
|
||||
|
||||
if(UNIX AND NOT APPLE AND NOT BUILD_FOR_CCP4 AND CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
|
||||
# On Linux, install in the $HOME/.local folder by default
|
||||
message(STATUS "The library and auxiliary files will be installed in $ENV{HOME}/.local")
|
||||
set(CMAKE_INSTALL_PREFIX "$ENV{HOME}/.local" CACHE PATH "..." FORCE)
|
||||
endif()
|
||||
|
||||
# Optionally use mrc to create resources
|
||||
|
||||
if(WIN32 AND BUILD_SHARED_LIBS)
|
||||
message("Not using resources when building shared libraries for Windows")
|
||||
else()
|
||||
find_program(MRC mrc)
|
||||
|
||||
if(MRC)
|
||||
option(USE_RSRC "Use mrc to create resources" ON)
|
||||
else()
|
||||
message("Using resources not possible since mrc was not found")
|
||||
endif()
|
||||
|
||||
if(USE_RSRC STREQUAL "ON")
|
||||
set(USE_RSRC 1)
|
||||
|
||||
message("Using resources compiled with ${MRC}")
|
||||
add_compile_definitions(USE_RSRC)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Libraries
|
||||
|
||||
# Start by finding out if std:regex is usable. Note that the current
|
||||
# implementation in GCC is not acceptable, it crashes on long lines.
|
||||
# The implementation in libc++ (clang) and MSVC seem to be OK.
|
||||
check_cxx_source_compiles("
|
||||
#include <iostream>
|
||||
#ifndef __GLIBCXX__
|
||||
#error
|
||||
#endif
|
||||
int main(int argc, char *argv[]) { return 0; }" GXX_LIBSTDCPP)
|
||||
|
||||
if(GXX_LIBSTDCPP)
|
||||
message(STATUS "Testing for known regex bug, since you're using GNU libstdc++")
|
||||
|
||||
try_run(STD_REGEX_RUNNING STD_REGEX_COMPILING
|
||||
${CMAKE_CURRENT_BINARY_DIR}/test ${PROJECT_SOURCE_DIR}/cmake/test-rx.cpp)
|
||||
|
||||
if(STD_REGEX_RUNNING STREQUAL FAILED_TO_RUN)
|
||||
message(STATUS "You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will try to use boost::regex instead")
|
||||
|
||||
set(BOOST_REGEX_STANDALONE ON)
|
||||
add_git_submodule(regex EXCLUDE_FROM_ALL)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(CMAKE_THREAD_PREFER_PTHREAD)
|
||||
set(THREADS_PREFER_PTHREAD_FLAG)
|
||||
find_package(Threads)
|
||||
|
||||
set(Boost_DETAILED_FAILURE_MSG ON)
|
||||
if(NOT BUILD_SHARED_LIBS)
|
||||
set(Boost_USE_STATIC_LIBS ON)
|
||||
endif()
|
||||
find_package(Boost 1.70.0 REQUIRED COMPONENTS system iostreams regex program_options)
|
||||
add_git_submodule(gxrio EXCLUDE_FROM_ALL)
|
||||
find_package(ZLIB REQUIRED)
|
||||
find_package(LibLZMA REQUIRED)
|
||||
|
||||
if(NOT MSVC AND Boost_USE_STATIC_LIBS)
|
||||
find_package(ZLIB REQUIRED)
|
||||
find_package(BZip2 REQUIRED)
|
||||
endif()
|
||||
include(FindFilesystem)
|
||||
list(APPEND CIFPP_REQUIRED_LIBRARIES ${STDCPPFS_LIBRARY})
|
||||
|
||||
include(FindAtomic)
|
||||
list(APPEND CIFPP_REQUIRED_LIBRARIES ${STDCPPATOMIC_LIBRARY})
|
||||
|
||||
# Create a revision file, containing the current git version info
|
||||
|
||||
find_package(Git)
|
||||
if(GIT_FOUND AND EXISTS "${CMAKE_SOURCE_DIR}/.git")
|
||||
include(GetGitRevisionDescription)
|
||||
get_git_head_revision(REFSPEC COMMITHASH)
|
||||
|
||||
# Generate our own version string
|
||||
git_describe_working_tree(BUILD_VERSION_STRING --match=build --dirty)
|
||||
else()
|
||||
message(WARNING "Git not found, cannot set version info")
|
||||
|
||||
SET(BUILD_VERSION_STRING ${PROJECT_VERSION})
|
||||
endif()
|
||||
|
||||
# generate version.h
|
||||
string(TIMESTAMP BUILD_DATE_TIME "%Y-%m-%dT%H:%M:%SZ" UTC)
|
||||
configure_file("${CMAKE_SOURCE_DIR}/src/revision.hpp.in" "${CMAKE_BINARY_DIR}/revision.hpp" @ONLY)
|
||||
include(VersionString)
|
||||
write_version_header("LibCIFPP")
|
||||
|
||||
# SymOp data table
|
||||
if(RECREATE_SYMOP_DATA)
|
||||
if(CIFPP_RECREATE_SYMOP_DATA)
|
||||
# The tool to create the table
|
||||
add_executable(symop-map-generator "${PROJECT_SOURCE_DIR}/tools/symop-map-generator.cpp")
|
||||
|
||||
add_executable(symop-map-generator "${CMAKE_SOURCE_DIR}/tools/symop-map-generator.cpp")
|
||||
|
||||
target_link_libraries(symop-map-generator Threads::Threads ${Boost_LIBRARIES} std::filesystem ${ZLIB_LIBRARIES} ${BZip2_LIBRARIES})
|
||||
if(Boost_INCLUDE_DIR)
|
||||
target_include_directories(symop-map-generator PUBLIC ${Boost_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
set($ENV{CLIBD} ${CLIBD})
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${CMAKE_SOURCE_DIR}/src/SymOpTable_data.hpp
|
||||
COMMAND $<TARGET_FILE:symop-map-generator> ${CLIBD}/syminfo.lib ${CMAKE_SOURCE_DIR}/src/SymOpTable_data.hpp
|
||||
)
|
||||
|
||||
OUTPUT ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
|
||||
COMMAND $<TARGET_FILE:symop-map-generator> $ENV{CLIBD}/syminfo.lib ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
|
||||
)
|
||||
|
||||
add_custom_target(
|
||||
OUTPUT ${CMAKE_SOURCE_DIR}/src/SymOpTable_data.hpp
|
||||
OUTPUT ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
|
||||
DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib"
|
||||
)
|
||||
endif()
|
||||
|
||||
# Sources
|
||||
set(project_sources
|
||||
${PROJECT_SOURCE_DIR}/src/category.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/condition.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/datablock.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/dictionary_parser.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/file.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/item.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/parser.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/row.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/validate.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/text.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/utilities.cpp
|
||||
|
||||
set(project_sources
|
||||
${PROJECT_SOURCE_DIR}/src/AtomType.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/BondMap.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/Cif++.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/Cif2PDB.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/CifParser.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/CifUtils.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/CifValidator.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/Compound.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/PDB2Cif.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/PDB2CifRemark3.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/Point.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/Secondary.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/Structure.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/Symmetry.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/TlsParser.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/atom_type.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/compound.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/point.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/symmetry.cpp
|
||||
|
||||
${PROJECT_SOURCE_DIR}/src/model.cpp
|
||||
|
||||
${PROJECT_SOURCE_DIR}/src/pdb/cif2pdb.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.cpp
|
||||
${PROJECT_SOURCE_DIR}/src/pdb/tls.cpp
|
||||
)
|
||||
|
||||
set(project_headers
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/AtomType.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/BondMap.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/Cif++.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/Cif2PDB.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/CifParser.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/CifUtils.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/CifValidator.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/Compound.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/Matrix.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/PDB2Cif.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/PDB2CifRemark3.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/Point.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/Secondary.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/Structure.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/Symmetry.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/TlsParser.hpp
|
||||
)
|
||||
set(project_headers
|
||||
${PROJECT_SOURCE_DIR}/include/cif++.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/utilities.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/item.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/datablock.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/file.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/validate.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/list.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/iterator.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/parser.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/forward_decl.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/dictionary_parser.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/condition.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/category.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/row.hpp
|
||||
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/atom_type.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/compound.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/point.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/symmetry.hpp
|
||||
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/model.hpp
|
||||
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb/cif2pdb.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb/io.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif_remark_3.hpp
|
||||
${PROJECT_SOURCE_DIR}/include/cif++/pdb/tls.hpp
|
||||
)
|
||||
|
||||
add_library(cifpp ${project_sources} ${project_headers} ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp)
|
||||
add_library(cifpp::cifpp ALIAS cifpp)
|
||||
|
||||
add_library(cifpp ${project_sources} ${project_headers} ${CMAKE_SOURCE_DIR}/src/SymOpTable_data.hpp)
|
||||
set_target_properties(cifpp PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
if(BOOST_REGEX_STANDALONE)
|
||||
target_compile_definitions(cifpp PUBLIC USE_BOOST_REGEX=1 BOOST_REGEX_STANDALONE=1)
|
||||
endif()
|
||||
|
||||
target_include_directories(cifpp
|
||||
PUBLIC
|
||||
"$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>"
|
||||
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
|
||||
${Boost_INCLUDE_DIR}
|
||||
)
|
||||
|
||||
target_include_directories(cifpp
|
||||
PRIVATE
|
||||
${CMAKE_BINARY_DIR}
|
||||
)
|
||||
target_link_libraries(cifpp PUBLIC
|
||||
Threads::Threads
|
||||
ZLIB::ZLIB
|
||||
LibLZMA::LibLZMA
|
||||
${CIFPP_REQUIRED_LIBRARIES})
|
||||
|
||||
target_link_libraries(cifpp Threads::Threads ${Boost_LIBRARIES} std::filesystem ${ZLIB_LIBRARIES} ${BZip2_LIBRARIES})
|
||||
get_target_property(GXRIO_INCLUDE_DIR gxrio::gxrio INTERFACE_INCLUDE_DIRECTORIES)
|
||||
target_include_directories(cifpp PRIVATE ${GXRIO_INCLUDE_DIR})
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
||||
target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
|
||||
endif (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
||||
if(BOOST_REGEX_STANDALONE)
|
||||
target_include_directories(cifpp PRIVATE regex/include)
|
||||
endif()
|
||||
|
||||
# download the components.cif file from CCD
|
||||
set(COMPONENTS_CIF ${PROJECT_SOURCE_DIR}/data/components.cif)
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
||||
target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
|
||||
endif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
|
||||
|
||||
if (NOT EXISTS ${COMPONENTS_CIF})
|
||||
if(CIFPP_DOWNLOAD_CCD)
|
||||
# download the components.cif file from CCD
|
||||
set(COMPONENTS_CIF ${PROJECT_SOURCE_DIR}/data/components.cif)
|
||||
|
||||
if (NOT EXISTS ${PROJECT_SOURCE_DIR}/data)
|
||||
file(MAKE_DIRECTORY ${PROJECT_SOURCE_DIR}/data/)
|
||||
endif()
|
||||
if(NOT EXISTS ${COMPONENTS_CIF})
|
||||
if(NOT EXISTS ${PROJECT_SOURCE_DIR}/data)
|
||||
file(MAKE_DIRECTORY ${PROJECT_SOURCE_DIR}/data/)
|
||||
endif()
|
||||
|
||||
# if(${CMAKE_VERSION} VERSION_LESS "3.19.0")
|
||||
find_program(GUNZIP gunzip)
|
||||
|
||||
if(GUNZIP)
|
||||
@@ -304,43 +279,30 @@ if (NOT EXISTS ${COMPONENTS_CIF})
|
||||
SHOW_PROGRESS)
|
||||
add_custom_command(OUTPUT ${COMPONENTS_CIF}
|
||||
COMMAND ${GUNZIP} ${COMPONENTS_CIF}.gz
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/data/)
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/data/)
|
||||
else()
|
||||
file(DOWNLOAD ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif ${COMPONENTS_CIF}
|
||||
SHOW_PROGRESS)
|
||||
endif()
|
||||
# else()
|
||||
# file(DOWNLOAD ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz ${COMPONENTS_CIF}.gz
|
||||
# SHOW_PROGRESS)
|
||||
# file(ARCHIVE_EXTRACT INPUT ${COMPONENTS_CIF}.gz
|
||||
# DESTINATION ${CMAKE_SOURCE_DIR}/data/
|
||||
# VERBOSE)
|
||||
# endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
add_custom_target(COMPONENTS ALL DEPENDS ${COMPONENTS_CIF})
|
||||
add_custom_target(COMPONENTS ALL DEPENDS ${COMPONENTS_CIF})
|
||||
endif()
|
||||
|
||||
if(UNIX)
|
||||
option(INSTALL_UPDATE_SCRIPT "Install the script to update CCD and dictionary files" OFF)
|
||||
if(INSTALL_UPDATE_SCRIPT)
|
||||
set(CIFPP_CACHE_DIR "/var/cache/libcifpp")
|
||||
if(NOT "${CIFPP_CACHE_DIR}" STREQUAL "OFF")
|
||||
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
|
||||
endif()
|
||||
endif()
|
||||
set(CIFPP_CACHE_DIR "/var/cache/libcifpp" CACHE STRING "The cache directory to use")
|
||||
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
|
||||
endif()
|
||||
|
||||
generate_export_header(cifpp
|
||||
EXPORT_FILE_NAME cif++/Cif++Export.hpp)
|
||||
|
||||
set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} )
|
||||
set(LIBRARY_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} )
|
||||
set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
set(LIBRARY_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR})
|
||||
set(SHARE_INSTALL_DIR ${CMAKE_INSTALL_DATADIR}/libcifpp)
|
||||
|
||||
target_compile_definitions(cifpp PUBLIC DATA_DIR="${CMAKE_INSTALL_PREFIX}/${SHARE_INSTALL_DIR}")
|
||||
set(CIFPP_DATA_DIR "${CMAKE_INSTALL_PREFIX}/${SHARE_INSTALL_DIR}" CACHE STRING "The directory containing the provided data files")
|
||||
|
||||
target_compile_definitions(cifpp PUBLIC DATA_DIR="${CIFPP_DATA_DIR}")
|
||||
|
||||
# Install rules
|
||||
|
||||
install(TARGETS cifpp
|
||||
EXPORT cifppTargets
|
||||
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
@@ -348,6 +310,13 @@ install(TARGETS cifpp
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
|
||||
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
if(MSVC AND BUILD_SHARED_LIBS)
|
||||
install(
|
||||
FILES $<TARGET_PDB_FILE:${PROJECT_NAME}>
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
OPTIONAL)
|
||||
endif()
|
||||
|
||||
install(EXPORT cifppTargets
|
||||
FILE "cifppTargets.cmake"
|
||||
NAMESPACE cifpp::
|
||||
@@ -361,27 +330,29 @@ install(
|
||||
)
|
||||
|
||||
install(
|
||||
FILES "${CMAKE_CURRENT_BINARY_DIR}/cif++/Cif++Export.hpp"
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/cif++
|
||||
FILES include/cif++.hpp
|
||||
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
|
||||
COMPONENT Devel
|
||||
)
|
||||
|
||||
install(FILES
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx_v50.dic
|
||||
${PROJECT_SOURCE_DIR}/data/components.cif
|
||||
DESTINATION ${SHARE_INSTALL_DIR}
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic
|
||||
${COMPONENTS_CIF}
|
||||
DESTINATION ${CIFPP_DATA_DIR}
|
||||
)
|
||||
|
||||
configure_package_config_file(Config.cmake.in
|
||||
configure_package_config_file(
|
||||
${PROJECT_SOURCE_DIR}/cmake/cifppConfig.cmake.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfig.cmake
|
||||
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
|
||||
PATH_VARS INCLUDE_INSTALL_DIR LIBRARY_INSTALL_DIR SHARE_INSTALL_DIR
|
||||
)
|
||||
|
||||
install(FILES
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfig.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfigVersion.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfig.cmake"
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfigVersion.cmake"
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
|
||||
COMPONENT Devel
|
||||
)
|
||||
@@ -393,21 +364,20 @@ set_target_properties(cifpp PROPERTIES
|
||||
INTERFACE_cifpp_MAJOR_VERSION ${cifpp_MAJOR_VERSION})
|
||||
|
||||
set_property(TARGET cifpp APPEND PROPERTY
|
||||
COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION
|
||||
COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION
|
||||
)
|
||||
|
||||
write_basic_package_version_file(
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfigVersion.cmake"
|
||||
VERSION ${PROJECT_VERSION}
|
||||
COMPATIBILITY AnyNewerVersion
|
||||
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfigVersion.cmake"
|
||||
VERSION ${PROJECT_VERSION}
|
||||
COMPATIBILITY AnyNewerVersion
|
||||
)
|
||||
|
||||
# pkgconfig support
|
||||
|
||||
set(prefix ${CMAKE_INSTALL_PREFIX})
|
||||
set(prefix ${CMAKE_INSTALL_PREFIX})
|
||||
set(exec_prefix ${CMAKE_INSTALL_PREFIX})
|
||||
set(libdir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
|
||||
set(includedir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})
|
||||
set(libdir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
|
||||
set(includedir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})
|
||||
|
||||
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libcifpp.pc.in
|
||||
${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc.in @ONLY)
|
||||
@@ -415,38 +385,26 @@ file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc
|
||||
INPUT ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc.in)
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
|
||||
|
||||
# Unit tests
|
||||
if(ENABLE_TESTING)
|
||||
enable_testing()
|
||||
|
||||
option(CIFPP_BUILD_TESTS "Build test exectuables" OFF)
|
||||
find_package(Boost REQUIRED headers)
|
||||
|
||||
if(CIFPP_BUILD_TESTS)
|
||||
|
||||
if(USE_RSRC)
|
||||
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/cifpp_test_rsrc.obj
|
||||
COMMAND ${MRC} -o ${CMAKE_CURRENT_BINARY_DIR}/cifpp_test_rsrc.obj ${CMAKE_SOURCE_DIR}/rsrc/mmcif_pdbx_v50.dic ${COFF_SPEC}
|
||||
)
|
||||
set(CIFPP_TEST_RESOURCE ${CMAKE_CURRENT_BINARY_DIR}/cifpp_test_rsrc.obj)
|
||||
endif()
|
||||
|
||||
list(APPEND CIFPP_tests
|
||||
# pdb2cif
|
||||
rename-compound
|
||||
structure
|
||||
unit)
|
||||
list(APPEND CIFPP_tests unit-v2 unit-3d format model rename-compound sugar
|
||||
)
|
||||
|
||||
foreach(CIFPP_TEST IN LISTS CIFPP_tests)
|
||||
set(CIFPP_TEST "${CIFPP_TEST}-test")
|
||||
set(CIFPP_TEST_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/test/${CIFPP_TEST}.cpp")
|
||||
|
||||
add_executable(${CIFPP_TEST} ${CIFPP_TEST_SOURCE} ${CIFPP_TEST_RESOURCE})
|
||||
add_executable(${CIFPP_TEST} ${CIFPP_TEST_SOURCE})
|
||||
|
||||
target_include_directories(${CIFPP_TEST} PRIVATE
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/include
|
||||
${CMAKE_CURRENT_BINARY_DIR} # for config.h
|
||||
)
|
||||
target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp::cifpp Boost::headers)
|
||||
|
||||
if(CIFPP_USE_RSRC)
|
||||
mrc_target_resources(${CIFPP_TEST} ${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic)
|
||||
endif()
|
||||
|
||||
target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp ${Boost_LIBRARIES} std::filesystem ${ZLIB_LIBRARIES} ${BZip2_LIBRARIES})
|
||||
|
||||
if(MSVC)
|
||||
# Specify unwind semantics so that MSVC knowns how to handle exceptions
|
||||
target_compile_options(${CIFPP_TEST} PRIVATE /EHsc)
|
||||
@@ -460,20 +418,18 @@ if(CIFPP_BUILD_TESTS)
|
||||
|
||||
add_test(NAME ${CIFPP_TEST}
|
||||
COMMAND $<TARGET_FILE:${CIFPP_TEST}> -- ${PROJECT_SOURCE_DIR}/test)
|
||||
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
message("Will install in ${CMAKE_INSTALL_PREFIX}")
|
||||
|
||||
# Optionally install the update scripts for CCD and dictionary files
|
||||
|
||||
if(INSTALL_UPDATE_SCRIPT)
|
||||
if(CIFPP_INSTALL_UPDATE_SCRIPT)
|
||||
set(CIFPP_CRON_DIR "$ENV{DESTDIR}/etc/cron.weekly")
|
||||
|
||||
configure_file(${CMAKE_SOURCE_DIR}/tools/update-dictionary-script.in update-dictionary-script @ONLY)
|
||||
configure_file(${PROJECT_SOURCE_DIR}/tools/update-libcifpp-data.in update-libcifpp-data @ONLY)
|
||||
install(
|
||||
FILES ${CMAKE_CURRENT_BINARY_DIR}/update-dictionary-script
|
||||
FILES ${CMAKE_CURRENT_BINARY_DIR}/update-libcifpp-data
|
||||
DESTINATION ${CIFPP_CRON_DIR}
|
||||
PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ
|
||||
)
|
||||
@@ -482,13 +438,13 @@ if(INSTALL_UPDATE_SCRIPT)
|
||||
install(DIRECTORY DESTINATION "$ENV{DESTDIR}/etc/libcifpp/cache-update.d")
|
||||
|
||||
# a config to, to make it complete
|
||||
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf [[# Uncomment the next line to enable automatic updates
|
||||
if(NOT EXISTS "$ENV{DESTDIR}/etc/libcifpp.conf")
|
||||
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf [[# Uncomment the next line to enable automatic updates
|
||||
# update=true
|
||||
]])
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf DESTINATION "$ENV{DESTDIR}/etc")
|
||||
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf DESTINATION "$ENV{DESTDIR}/etc")
|
||||
install(CODE "message(\"A configuration file has been written to $ENV{DESTDIR}/etc/libcifpp.conf, please edit this file to enable automatic updates\")")
|
||||
endif()
|
||||
|
||||
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
|
||||
|
||||
install(CODE "message(\"A configuration file has been written to $ENV{DESTDIR}/etc/libcifpp.conf, please edit this file to enable automatic updates\")")
|
||||
endif()
|
||||
|
||||
|
||||
5
LICENSE
5
LICENSE
@@ -1,6 +1,7 @@
|
||||
SPDX-License-Identifier: BSD-2-Clause
|
||||
BSD-2-Clause License
|
||||
|
||||
Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
@@ -20,4 +21,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
@@ -12,7 +12,6 @@ have been used as well as MSVC version 2019.
|
||||
|
||||
Other requirements are:
|
||||
|
||||
- Boost libraries, at least version 1.70
|
||||
- [mrc](https://github.com/mhekkel/mrc), a resource compiler that
|
||||
allows including data files into the executable making them easier to
|
||||
install. Strictly this is optional, but at the expense of functionality.
|
||||
|
||||
70
changelog
70
changelog
@@ -1,3 +1,73 @@
|
||||
Version 5.0.0
|
||||
- Total rewrite of cif part
|
||||
- Removed DSSP code, moved into dssp project itself
|
||||
|
||||
Version 4.2.1
|
||||
- Improved REMARK 3 parser (for TLS in large molecules)
|
||||
|
||||
Version 4.2.0
|
||||
- Yet another rewrite of resource loading
|
||||
|
||||
Version 4.1.1
|
||||
- Fall back to zero charge for scattering factors if the atom
|
||||
was not found in the table.
|
||||
- Improve code to locate resources, failing less.
|
||||
|
||||
Version 4.1.0
|
||||
- Some interface changes for mmcif::Atom
|
||||
|
||||
Version 4.0.1
|
||||
- Added a bunch of const methods to Datablock and Category.
|
||||
- Changed PDB writing interface to accept Datablock instead of File.
|
||||
|
||||
Version 4.0.0
|
||||
- getResidue in mmcif::Structure now requires both a
|
||||
sequence ID and an auth sequence ID. As a result the code was cleaned
|
||||
up considerably.
|
||||
|
||||
Version 3.0.5
|
||||
- mmcif::Structure redesign. It is now a wrapper around a cif::Datablock.
|
||||
|
||||
Version 3.0.4
|
||||
- Fix in mmCIF parser, now correctly handles the unquoted
|
||||
string ??
|
||||
|
||||
Version 3.0.3
|
||||
- Better configuration checks, for atomic e.g.
|
||||
- Fixed a problem introduced in refactoring mmcif::Atom
|
||||
- Version string creation
|
||||
|
||||
Version 3.0.2
|
||||
- refactored mmcif::Atom for performance reasons
|
||||
|
||||
Version 3.0.1
|
||||
- Fixed processing of proline restraints file from CCP4, proline
|
||||
is a peptide, really.
|
||||
- Added code to facilitate DSSP
|
||||
|
||||
Version 3.0.0
|
||||
- Replaced many strings in the API with string_view for
|
||||
performance reasons.
|
||||
- Upgraded mmcif::Structure
|
||||
- various other small fixes
|
||||
|
||||
Version 2.0.5
|
||||
- Backporting updated CMakeLists.txt file
|
||||
|
||||
Version 2.0.4
|
||||
- Reverted a too strict test when reading cif files.
|
||||
|
||||
Version 2.0.3
|
||||
- Fixed reading mmCIF files where model numbers are used and
|
||||
model number 1 is missing.
|
||||
|
||||
Version 2.0.2
|
||||
- Added configuration flag to disable downloading CCD data during build
|
||||
Note that there are now two flags for CCD data:
|
||||
DOWNLOAD_CCD to enable downloading during build
|
||||
INSTALL_UPDATE_SCRIPT to install an update mechanism for this file
|
||||
- Updated unit tests to work even if no CCD data is available
|
||||
|
||||
Version 2.0.1
|
||||
- Fixed the generator for the symmetry operator table
|
||||
|
||||
|
||||
27
cmake/AddGitSubmodule.cmake
Normal file
27
cmake/AddGitSubmodule.cmake
Normal file
@@ -0,0 +1,27 @@
|
||||
cmake_minimum_required(VERSION 3.16..3.19)
|
||||
|
||||
function(add_git_submodule dir)
|
||||
# add a Git submodule directory to CMake, assuming the
|
||||
# Git submodule directory is a CMake project.
|
||||
#
|
||||
# Usage: in CMakeLists.txt
|
||||
#
|
||||
# include(AddGitSubmodule.cmake)
|
||||
# add_git_submodule(mysubmod_dir)
|
||||
find_package(Git REQUIRED)
|
||||
|
||||
if(NOT EXISTS ${dir}/CMakeLists.txt)
|
||||
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.19)
|
||||
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive -- ${dir}
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
COMMAND_ERROR_IS_FATAL ANY)
|
||||
else()
|
||||
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive -- ${dir}
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
set(ENABLE_TESTING OFF)
|
||||
|
||||
add_subdirectory(${dir} ${ARGV})
|
||||
endfunction(add_git_submodule)
|
||||
62
cmake/FindAtomic.cmake
Normal file
62
cmake/FindAtomic.cmake
Normal file
@@ -0,0 +1,62 @@
|
||||
# Simple check to see if we need a library for std::atomic
|
||||
|
||||
if(TARGET std::atomic)
|
||||
return()
|
||||
endif()
|
||||
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
include(CMakePushCheckState)
|
||||
include(CheckIncludeFileCXX)
|
||||
include(CheckCXXSourceRuns)
|
||||
|
||||
cmake_push_check_state()
|
||||
|
||||
check_include_file_cxx("atomic" _CXX_ATOMIC_HAVE_HEADER)
|
||||
mark_as_advanced(_CXX_ATOMIC_HAVE_HEADER)
|
||||
|
||||
set(code [[
|
||||
#include <atomic>
|
||||
int main(int argc, char** argv) {
|
||||
std::atomic<long long> s;
|
||||
++s;
|
||||
return 0;
|
||||
}
|
||||
]])
|
||||
|
||||
check_cxx_source_runs("${code}" _CXX_ATOMIC_BUILTIN)
|
||||
|
||||
if(_CXX_ATOMIC_BUILTIN)
|
||||
set(_found 1)
|
||||
else()
|
||||
list(APPEND CMAKE_REQUIRED_LIBRARIES atomic)
|
||||
list(APPEND FOLLY_LINK_LIBRARIES atomic)
|
||||
|
||||
check_cxx_source_runs("${code}" _CXX_ATOMIC_LIB_NEEDED)
|
||||
if (NOT _CXX_ATOMIC_LIB_NEEDED)
|
||||
message(FATAL_ERROR "unable to link C++ std::atomic code: you may need \
|
||||
to install GNU libatomic")
|
||||
else()
|
||||
set(_found 1)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(_found)
|
||||
add_library(std::atomic INTERFACE IMPORTED)
|
||||
set_property(TARGET std::atomic APPEND PROPERTY INTERFACE_COMPILE_FEATURES cxx_std_14)
|
||||
|
||||
if(_CXX_ATOMIC_BUILTIN)
|
||||
# Nothing to add...
|
||||
elseif(_CXX_ATOMIC_LIB_NEEDED)
|
||||
set_target_properties(std::atomic PROPERTIES IMPORTED_LIBNAME atomic)
|
||||
set(STDCPPATOMIC_LIBRARY atomic)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
cmake_pop_check_state()
|
||||
|
||||
set(Atomic_FOUND ${_found} CACHE BOOL "TRUE if we can run a program using std::atomic" FORCE)
|
||||
|
||||
if(Atomic_FIND_REQUIRED AND NOT Atomic_FOUND)
|
||||
message(FATAL_ERROR "Cannot run simple program using std::atomic")
|
||||
endif()
|
||||
@@ -12,8 +12,6 @@ include(CheckCXXSourceCompiles)
|
||||
|
||||
cmake_push_check_state()
|
||||
|
||||
set(CMAKE_CXX_STANDARD 17)
|
||||
|
||||
check_include_file_cxx("filesystem" _CXX_FILESYSTEM_HAVE_HEADER)
|
||||
mark_as_advanced(_CXX_FILESYSTEM_HAVE_HEADER)
|
||||
|
||||
@@ -59,8 +57,10 @@ if(_found)
|
||||
# Nothing to add...
|
||||
elseif(CXX_FILESYSTEM_STDCPPFS_NEEDED)
|
||||
set_target_properties(std::filesystem PROPERTIES IMPORTED_LIBNAME stdc++fs)
|
||||
set(STDCPPFS_LIBRARY stdc++fs)
|
||||
elseif(CXX_FILESYSTEM_CPPFS_NEEDED)
|
||||
set_target_properties(std::filesystem PROPERTIES IMPORTED_LIBNAME c++fs)
|
||||
set(STDCPPFS_LIBRARY c++fs)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
77
cmake/VersionString.cmake
Normal file
77
cmake/VersionString.cmake
Normal file
@@ -0,0 +1,77 @@
|
||||
# SPDX-License-Identifier: BSD-2-Clause
|
||||
|
||||
# Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
|
||||
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
cmake_minimum_required(VERSION 3.15)
|
||||
|
||||
# Create a revision file, containing the current git version info, if any
|
||||
function(write_version_header)
|
||||
include(GetGitRevisionDescription)
|
||||
if(NOT(GIT-NOTFOUND OR HEAD-HASH-NOTFOUND))
|
||||
git_describe_working_tree(BUILD_VERSION_STRING --match=build --dirty)
|
||||
|
||||
if(BUILD_VERSION_STRING MATCHES "build-([0-9]+)-g([0-9a-f]+)(-dirty)?")
|
||||
set(BUILD_GIT_TAGREF "${CMAKE_MATCH_2}")
|
||||
if(CMAKE_MATCH_3)
|
||||
set(BUILD_VERSION_STRING "${CMAKE_MATCH_1}*")
|
||||
else()
|
||||
set(BUILD_VERSION_STRING "${CMAKE_MATCH_1}")
|
||||
endif()
|
||||
endif()
|
||||
else()
|
||||
set(BUILD_VERSION_STRING "no git info available")
|
||||
endif()
|
||||
|
||||
include_directories(${CMAKE_BINARY_DIR} PRIVATE)
|
||||
string(TIMESTAMP BUILD_DATE_TIME "%Y-%m-%dT%H:%M:%SZ" UTC)
|
||||
|
||||
if(ARGC GREATER 0)
|
||||
set(VAR_PREFIX "${ARGV0}")
|
||||
endif()
|
||||
|
||||
file(WRITE "${CMAKE_BINARY_DIR}/revision.hpp.in" [[// Generated revision file
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <ostream>
|
||||
|
||||
const char k@VAR_PREFIX@ProjectName[] = "@PROJECT_NAME@";
|
||||
const char k@VAR_PREFIX@VersionNumber[] = "@PROJECT_VERSION@";
|
||||
const char k@VAR_PREFIX@VersionGitTag[] = "@BUILD_GIT_TAGREF@";
|
||||
const char k@VAR_PREFIX@BuildInfo[] = "@BUILD_VERSION_STRING@";
|
||||
const char k@VAR_PREFIX@BuildDate[] = "@BUILD_DATE_TIME@";
|
||||
|
||||
inline void write_version_string(std::ostream &os, bool verbose)
|
||||
{
|
||||
os << k@VAR_PREFIX@ProjectName << " version " << k@VAR_PREFIX@VersionNumber << std::endl;
|
||||
if (verbose)
|
||||
{
|
||||
os << "build: " << k@VAR_PREFIX@BuildInfo << ' ' << k@VAR_PREFIX@BuildDate << std::endl;
|
||||
if (k@VAR_PREFIX@VersionGitTag[0] != 0)
|
||||
os << "git tag: " << k@VAR_PREFIX@VersionGitTag << std::endl;
|
||||
}
|
||||
}
|
||||
]])
|
||||
configure_file("${CMAKE_BINARY_DIR}/revision.hpp.in" "${CMAKE_BINARY_DIR}/revision.hpp" @ONLY)
|
||||
endfunction()
|
||||
|
||||
@@ -1,11 +1,10 @@
|
||||
@PACKAGE_INIT@
|
||||
|
||||
include(CMakeFindDependencyMacro)
|
||||
find_dependency(Boost 1.70.0 REQUIRED COMPONENTS system iostreams regex program_options)
|
||||
if(NOT WIN32)
|
||||
find_dependency(ZLIB)
|
||||
find_dependency(BZip2)
|
||||
endif()
|
||||
find_dependency(Threads)
|
||||
|
||||
find_dependency(ZLIB REQUIRED)
|
||||
find_dependency(LibLZMA REQUIRED)
|
||||
|
||||
INCLUDE("${CMAKE_CURRENT_LIST_DIR}/cifppTargets.cmake")
|
||||
|
||||
18
cmake/test-rx.cpp
Normal file
18
cmake/test-rx.cpp
Normal file
@@ -0,0 +1,18 @@
|
||||
// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86164
|
||||
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
|
||||
int main()
|
||||
{
|
||||
std::string s(100'000, '*');
|
||||
std::smatch m;
|
||||
std::regex r("^(.*?)$");
|
||||
|
||||
std::regex_search(s, m, r);
|
||||
|
||||
std::cout << s.substr(0, 10) << std::endl;
|
||||
std::cout << m.str(1).substr(0, 10) << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
2582
data/ccd-subset.cif
Normal file
2582
data/ccd-subset.cif
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,30 +1,24 @@
|
||||
#include <iostream>
|
||||
#include <filesystem>
|
||||
|
||||
#include <cif++/Cif++.hpp>
|
||||
#include <cif++.hpp>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
int main()
|
||||
{
|
||||
fs::path in("1cbs.cif.gz");
|
||||
|
||||
cif::File file;
|
||||
|
||||
file.loadDictionary("mmcif_pdbx_v50");
|
||||
|
||||
cif::file file;
|
||||
file.load("1cbs.cif.gz");
|
||||
|
||||
auto& db = file.firstDatablock()["atom_site"];
|
||||
auto n = db.find(cif::Key("label_atom_id") == "OXT").size();
|
||||
auto& db = file.front();
|
||||
auto &atom_site = db["atom_site"];
|
||||
auto n = atom_site.find(cif::key("label_atom_id") == "OXT").size();
|
||||
|
||||
std::cout << "File contains " << db.size() << " atoms of which " << n << (n == 1 ? " is" : " are") << " OXT" << std::endl
|
||||
std::cout << "File contains " << atom_site.size() << " atoms of which " << n << (n == 1 ? " is" : " are") << " OXT" << std::endl
|
||||
<< "residues with an OXT are:" << std::endl;
|
||||
|
||||
for (const auto& [asym, comp, seqnr]: db.find<std::string,std::string,int>(
|
||||
cif::Key("label_atom_id") == "OXT",
|
||||
{ "label_asym_id", "label_comp_id", "label_seq_id" }
|
||||
))
|
||||
for (const auto& [asym, comp, seqnr]: atom_site.find<std::string,std::string,int>(
|
||||
cif::key("label_atom_id") == "OXT", "label_asym_id", "label_comp_id", "label_seq_id"))
|
||||
{
|
||||
std::cout << asym << ' ' << comp << ' ' << seqnr << std::endl;
|
||||
}
|
||||
|
||||
1573
examples/pdb1cbs.ent
1573
examples/pdb1cbs.ent
File diff suppressed because it is too large
Load Diff
Binary file not shown.
1
gxrio
Submodule
1
gxrio
Submodule
Submodule gxrio added at a7bb5b5c4b
40
include/cif++.hpp
Normal file
40
include/cif++.hpp
Normal file
@@ -0,0 +1,40 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++/utilities.hpp>
|
||||
#include <cif++/file.hpp>
|
||||
#include <cif++/parser.hpp>
|
||||
#include <cif++/format.hpp>
|
||||
|
||||
#include <cif++/compound.hpp>
|
||||
#include <cif++/point.hpp>
|
||||
#include <cif++/symmetry.hpp>
|
||||
|
||||
#include <cif++/model.hpp>
|
||||
|
||||
#include <cif++/pdb/io.hpp>
|
||||
@@ -1,245 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
// Lib for working with structures as contained in mmCIF and PDB files
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
enum AtomType : uint8_t
|
||||
{
|
||||
Nn = 0, // Unknown
|
||||
|
||||
H = 1, // Hydrogen
|
||||
He = 2, // Helium
|
||||
|
||||
Li = 3, // Lithium
|
||||
Be = 4, // Beryllium
|
||||
B = 5, // Boron
|
||||
C = 6, // Carbon
|
||||
N = 7, // Nitrogen
|
||||
O = 8, // Oxygen
|
||||
F = 9, // Fluorine
|
||||
Ne = 10, // Neon
|
||||
|
||||
Na = 11, // Sodium
|
||||
Mg = 12, // Magnesium
|
||||
Al = 13, // Aluminium
|
||||
Si = 14, // Silicon
|
||||
P = 15, // Phosphorus
|
||||
S = 16, // Sulfur
|
||||
Cl = 17, // Chlorine
|
||||
Ar = 18, // Argon
|
||||
|
||||
K = 19, // Potassium
|
||||
Ca = 20, // Calcium
|
||||
Sc = 21, // Scandium
|
||||
Ti = 22, // Titanium
|
||||
V = 23, // Vanadium
|
||||
Cr = 24, // Chromium
|
||||
Mn = 25, // Manganese
|
||||
Fe = 26, // Iron
|
||||
Co = 27, // Cobalt
|
||||
Ni = 28, // Nickel
|
||||
Cu = 29, // Copper
|
||||
Zn = 30, // Zinc
|
||||
Ga = 31, // Gallium
|
||||
Ge = 32, // Germanium
|
||||
As = 33, // Arsenic
|
||||
Se = 34, // Selenium
|
||||
Br = 35, // Bromine
|
||||
Kr = 36, // Krypton
|
||||
|
||||
Rb = 37, // Rubidium
|
||||
Sr = 38, // Strontium
|
||||
Y = 39, // Yttrium
|
||||
Zr = 40, // Zirconium
|
||||
Nb = 41, // Niobium
|
||||
Mo = 42, // Molybdenum
|
||||
Tc = 43, // Technetium
|
||||
Ru = 44, // Ruthenium
|
||||
Rh = 45, // Rhodium
|
||||
Pd = 46, // Palladium
|
||||
Ag = 47, // Silver
|
||||
Cd = 48, // Cadmium
|
||||
In = 49, // Indium
|
||||
Sn = 50, // Tin
|
||||
Sb = 51, // Antimony
|
||||
Te = 52, // Tellurium
|
||||
I = 53, // Iodine
|
||||
Xe = 54, // Xenon
|
||||
Cs = 55, // Caesium
|
||||
Ba = 56, // Barium
|
||||
La = 57, // Lanthanum
|
||||
|
||||
Hf = 72, // Hafnium
|
||||
Ta = 73, // Tantalum
|
||||
W = 74, // Tungsten
|
||||
Re = 75, // Rhenium
|
||||
Os = 76, // Osmium
|
||||
Ir = 77, // Iridium
|
||||
Pt = 78, // Platinum
|
||||
Au = 79, // Gold
|
||||
Hg = 80, // Mercury
|
||||
Tl = 81, // Thallium
|
||||
Pb = 82, // Lead
|
||||
Bi = 83, // Bismuth
|
||||
Po = 84, // Polonium
|
||||
At = 85, // Astatine
|
||||
Rn = 86, // Radon
|
||||
Fr = 87, // Francium
|
||||
Ra = 88, // Radium
|
||||
Ac = 89, // Actinium
|
||||
|
||||
Rf = 104, // Rutherfordium
|
||||
Db = 105, // Dubnium
|
||||
Sg = 106, // Seaborgium
|
||||
Bh = 107, // Bohrium
|
||||
Hs = 108, // Hassium
|
||||
Mt = 109, // Meitnerium
|
||||
Ds = 110, // Darmstadtium
|
||||
Rg = 111, // Roentgenium
|
||||
Cn = 112, // Copernicium
|
||||
Nh = 113, // Nihonium
|
||||
Fl = 114, // Flerovium
|
||||
Mc = 115, // Moscovium
|
||||
Lv = 116, // Livermorium
|
||||
Ts = 117, // Tennessine
|
||||
Og = 118, // Oganesson
|
||||
|
||||
Ce = 58, // Cerium
|
||||
Pr = 59, // Praseodymium
|
||||
Nd = 60, // Neodymium
|
||||
Pm = 61, // Promethium
|
||||
Sm = 62, // Samarium
|
||||
Eu = 63, // Europium
|
||||
Gd = 64, // Gadolinium
|
||||
Tb = 65, // Terbium
|
||||
Dy = 66, // Dysprosium
|
||||
Ho = 67, // Holmium
|
||||
Er = 68, // Erbium
|
||||
Tm = 69, // Thulium
|
||||
Yb = 70, // Ytterbium
|
||||
Lu = 71, // Lutetium
|
||||
|
||||
Th = 90, // Thorium
|
||||
Pa = 91, // Protactinium
|
||||
U = 92, // Uranium
|
||||
Np = 93, // Neptunium
|
||||
Pu = 94, // Plutonium
|
||||
Am = 95, // Americium
|
||||
Cm = 96, // Curium
|
||||
Bk = 97, // Berkelium
|
||||
Cf = 98, // Californium
|
||||
Es = 99, // Einsteinium
|
||||
Fm = 100, // Fermium
|
||||
Md = 101, // Mendelevium
|
||||
No = 102, // Nobelium
|
||||
Lr = 103, // Lawrencium
|
||||
|
||||
D = 129, // Deuterium
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// AtomTypeInfo
|
||||
|
||||
enum RadiusType {
|
||||
eRadiusCalculated,
|
||||
eRadiusEmpirical,
|
||||
eRadiusCovalentEmpirical,
|
||||
|
||||
eRadiusSingleBond,
|
||||
eRadiusDoubleBond,
|
||||
eRadiusTripleBond,
|
||||
|
||||
eRadiusVanderWaals,
|
||||
|
||||
eRadiusTypeCount
|
||||
};
|
||||
|
||||
struct AtomTypeInfo
|
||||
{
|
||||
AtomType type;
|
||||
std::string name;
|
||||
std::string symbol;
|
||||
float weight;
|
||||
bool metal;
|
||||
float radii[eRadiusTypeCount];
|
||||
};
|
||||
|
||||
extern const AtomTypeInfo kKnownAtoms[];
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// AtomTypeTraits
|
||||
|
||||
class AtomTypeTraits
|
||||
{
|
||||
public:
|
||||
AtomTypeTraits(AtomType a);
|
||||
AtomTypeTraits(const std::string& symbol);
|
||||
|
||||
AtomType type() const { return mInfo->type; }
|
||||
std::string name() const { return mInfo->name; }
|
||||
std::string symbol() const { return mInfo->symbol; }
|
||||
float weight() const { return mInfo->weight; }
|
||||
|
||||
bool isMetal() const { return mInfo->metal; }
|
||||
|
||||
static bool isElement(const std::string& symbol);
|
||||
static bool isMetal(const std::string& symbol);
|
||||
|
||||
float radius(RadiusType type = eRadiusSingleBond) const
|
||||
{
|
||||
if (type >= eRadiusTypeCount)
|
||||
throw std::invalid_argument("invalid radius requested");
|
||||
return mInfo->radii[type] / 100.f;
|
||||
}
|
||||
|
||||
// data type encapsulating the Waasmaier & Kirfel scattering factors
|
||||
// in a simplified form (only a and b).
|
||||
// Added the electrion scattering factors as well
|
||||
struct SFData
|
||||
{
|
||||
double a[6], b[6];
|
||||
};
|
||||
|
||||
// to get the Cval and Siva values, use this constant as charge:
|
||||
enum { kWKSFVal = -99 };
|
||||
|
||||
const SFData& wksf(int charge = 0) const;
|
||||
const SFData& elsf() const;
|
||||
|
||||
private:
|
||||
const struct AtomTypeInfo* mInfo;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,101 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <unordered_map>
|
||||
#include <filesystem>
|
||||
#include <stdexcept>
|
||||
|
||||
#include "cif++/Structure.hpp"
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
class BondMapException : public std::runtime_error
|
||||
{
|
||||
public:
|
||||
BondMapException(const std::string& msg)
|
||||
: runtime_error(msg) {}
|
||||
};
|
||||
|
||||
class BondMap
|
||||
{
|
||||
public:
|
||||
BondMap(const Structure& p);
|
||||
|
||||
BondMap(const BondMap&) = delete;
|
||||
BondMap& operator=(const BondMap&) = delete;
|
||||
|
||||
bool operator()(const Atom& a, const Atom& b) const
|
||||
{
|
||||
return isBonded(index.at(a.id()), index.at(b.id()));
|
||||
}
|
||||
|
||||
bool is1_4(const Atom& a, const Atom& b) const
|
||||
{
|
||||
uint32_t ixa = index.at(a.id());
|
||||
uint32_t ixb = index.at(b.id());
|
||||
|
||||
return bond_1_4.count(key(ixa, ixb));
|
||||
}
|
||||
|
||||
// links coming from the struct_conn records:
|
||||
std::vector<std::string> linked(const Atom& a) const;
|
||||
|
||||
// This list of atomID's is comming from either CCD or the CCP4 dictionaries loaded
|
||||
static std::vector<std::string> atomIDsForCompound(const std::string& compoundID);
|
||||
|
||||
private:
|
||||
|
||||
bool isBonded(uint32_t ai, uint32_t bi) const
|
||||
{
|
||||
return bond.count(key(ai, bi)) != 0;
|
||||
}
|
||||
|
||||
uint64_t key(uint32_t a, uint32_t b) const
|
||||
{
|
||||
if (a > b)
|
||||
std::swap(a, b);
|
||||
return static_cast<uint64_t>(a) | (static_cast<uint64_t>(b) << 32);
|
||||
}
|
||||
|
||||
std::tuple<uint32_t,uint32_t> dekey(uint64_t k) const
|
||||
{
|
||||
return std::make_tuple(
|
||||
static_cast<uint32_t>(k >> 32),
|
||||
static_cast<uint32_t>(k)
|
||||
);
|
||||
}
|
||||
|
||||
uint32_t dim;
|
||||
std::unordered_map<std::string,uint32_t> index;
|
||||
std::set<uint64_t> bond, bond_1_4;
|
||||
|
||||
std::map<std::string,std::set<std::string>> link;
|
||||
};
|
||||
|
||||
}
|
||||
File diff suppressed because it is too large
Load Diff
@@ -1,248 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cif++/Cif++.hpp"
|
||||
|
||||
#include <stack>
|
||||
#include <map>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class CifParserError : public std::runtime_error
|
||||
{
|
||||
public:
|
||||
CifParserError(uint32_t lineNr, const std::string& message);
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
extern const uint32_t kMaxLineLength;
|
||||
|
||||
extern const uint8_t kCharTraitsTable[128];
|
||||
|
||||
enum CharTraitsMask: uint8_t {
|
||||
kOrdinaryMask = 1 << 0,
|
||||
kNonBlankMask = 1 << 1,
|
||||
kTextLeadMask = 1 << 2,
|
||||
kAnyPrintMask = 1 << 3
|
||||
};
|
||||
|
||||
inline bool isWhite(int ch)
|
||||
{
|
||||
return std::isspace(ch) or ch == '#';
|
||||
}
|
||||
|
||||
inline bool isOrdinary(int ch)
|
||||
{
|
||||
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kOrdinaryMask) != 0;
|
||||
}
|
||||
|
||||
inline bool isNonBlank(int ch)
|
||||
{
|
||||
return ch > 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kNonBlankMask) != 0;
|
||||
}
|
||||
|
||||
inline bool isTextLead(int ch)
|
||||
{
|
||||
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kTextLeadMask) != 0;
|
||||
}
|
||||
|
||||
inline bool isAnyPrint(int ch)
|
||||
{
|
||||
return ch == '\t' or
|
||||
(ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
|
||||
}
|
||||
|
||||
inline bool isUnquotedString(const char* s)
|
||||
{
|
||||
bool result = isOrdinary(*s++);
|
||||
while (result and *s != 0)
|
||||
{
|
||||
result = isNonBlank(*s);
|
||||
++s;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::tuple<std::string,std::string> splitTagName(const std::string& tag);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
using DatablockIndex = std::map<std::string,std::size_t>;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// sac Parser, analogous to SAX Parser (simple api for xml)
|
||||
|
||||
class SacParser
|
||||
{
|
||||
public:
|
||||
SacParser(std::istream& is, bool init = true);
|
||||
virtual ~SacParser() {}
|
||||
|
||||
enum CIFToken
|
||||
{
|
||||
eCIFTokenUnknown,
|
||||
|
||||
eCIFTokenEOF,
|
||||
|
||||
eCIFTokenDATA,
|
||||
eCIFTokenLOOP,
|
||||
eCIFTokenGLOBAL,
|
||||
eCIFTokenSAVE,
|
||||
eCIFTokenSTOP,
|
||||
eCIFTokenTag,
|
||||
eCIFTokenValue,
|
||||
};
|
||||
|
||||
static const char* kTokenName[];
|
||||
|
||||
enum CIFValueType
|
||||
{
|
||||
eCIFValueInt,
|
||||
eCIFValueFloat,
|
||||
eCIFValueNumeric,
|
||||
eCIFValueString,
|
||||
eCIFValueTextField,
|
||||
eCIFValueInapplicable,
|
||||
eCIFValueUnknown
|
||||
};
|
||||
|
||||
static const char* kValueName[];
|
||||
|
||||
int getNextChar();
|
||||
|
||||
void retract();
|
||||
void restart();
|
||||
|
||||
CIFToken getNextToken();
|
||||
void match(CIFToken token);
|
||||
|
||||
bool parseSingleDatablock(const std::string& datablock);
|
||||
|
||||
DatablockIndex indexDatablocks();
|
||||
bool parseSingleDatablock(const std::string& datablock, const DatablockIndex &index);
|
||||
|
||||
void parseFile();
|
||||
void parseGlobal();
|
||||
void parseDataBlock();
|
||||
|
||||
virtual void parseSaveFrame();
|
||||
|
||||
void parseDictionary();
|
||||
|
||||
void error(const std::string& msg);
|
||||
|
||||
// production methods, these are pure virtual here
|
||||
|
||||
virtual void produceDatablock(const std::string& name) = 0;
|
||||
virtual void produceCategory(const std::string& name) = 0;
|
||||
virtual void produceRow() = 0;
|
||||
virtual void produceItem(const std::string& category, const std::string& item, const std::string& value) = 0;
|
||||
|
||||
protected:
|
||||
|
||||
enum State
|
||||
{
|
||||
eStateStart,
|
||||
eStateWhite,
|
||||
eStateComment,
|
||||
eStateQuestionMark,
|
||||
eStateDot,
|
||||
eStateQuotedString,
|
||||
eStateQuotedStringQuote,
|
||||
eStateUnquotedString,
|
||||
eStateTag,
|
||||
eStateTextField,
|
||||
eStateFloat = 100,
|
||||
eStateInt = 110,
|
||||
// eStateNumericSuffix = 200,
|
||||
eStateValue = 300
|
||||
};
|
||||
|
||||
std::istream& mData;
|
||||
|
||||
// Parser state
|
||||
bool mValidate;
|
||||
uint32_t mLineNr;
|
||||
bool mBol;
|
||||
int mState, mStart;
|
||||
CIFToken mLookahead;
|
||||
std::string mTokenValue;
|
||||
CIFValueType mTokenType;
|
||||
std::stack<int> mBuffer;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Parser : public SacParser
|
||||
{
|
||||
public:
|
||||
Parser(std::istream& is, File& f, bool init = true);
|
||||
|
||||
virtual void produceDatablock(const std::string& name);
|
||||
virtual void produceCategory(const std::string& name);
|
||||
virtual void produceRow();
|
||||
virtual void produceItem(const std::string& category, const std::string& item, const std::string& value);
|
||||
|
||||
protected:
|
||||
File& mFile;
|
||||
Datablock* mDataBlock;
|
||||
Datablock::iterator mCat;
|
||||
Row mRow;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class DictParser : public Parser
|
||||
{
|
||||
public:
|
||||
|
||||
DictParser(Validator& validator, std::istream& is);
|
||||
~DictParser();
|
||||
|
||||
void loadDictionary();
|
||||
|
||||
private:
|
||||
|
||||
virtual void parseSaveFrame();
|
||||
|
||||
bool collectItemTypes();
|
||||
void linkItems();
|
||||
|
||||
Validator& mValidator;
|
||||
File mFile;
|
||||
struct DictParserDataImpl* mImpl;
|
||||
bool mCollectedItemTypes = false;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,198 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cif++/Cif++.hpp"
|
||||
|
||||
// duh.. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86164
|
||||
// #include <regex>
|
||||
#include <boost/regex.hpp>
|
||||
|
||||
#include <set>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
struct ValidateCategory;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class ValidationError : public std::exception
|
||||
{
|
||||
public:
|
||||
ValidationError(const std::string& msg);
|
||||
ValidationError(const std::string& cat, const std::string& item,
|
||||
const std::string& msg);
|
||||
const char* what() const noexcept { return mMsg.c_str(); }
|
||||
std::string mMsg;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
enum class DDL_PrimitiveType
|
||||
{
|
||||
Char, UChar, Numb
|
||||
};
|
||||
|
||||
DDL_PrimitiveType mapToPrimitiveType(const std::string& s);
|
||||
|
||||
struct ValidateType
|
||||
{
|
||||
std::string mName;
|
||||
DDL_PrimitiveType mPrimitiveType;
|
||||
// std::regex mRx;
|
||||
boost::regex mRx;
|
||||
|
||||
bool operator<(const ValidateType& rhs) const
|
||||
{
|
||||
return icompare(mName, rhs.mName) < 0;
|
||||
}
|
||||
|
||||
// compare values based on type
|
||||
// int compare(const std::string& a, const std::string& b) const
|
||||
// {
|
||||
// return compare(a.c_str(), b.c_str());
|
||||
// }
|
||||
|
||||
int compare(const char* a, const char* b) const;
|
||||
};
|
||||
|
||||
struct ValidateItem
|
||||
{
|
||||
std::string mTag;
|
||||
bool mMandatory;
|
||||
const ValidateType* mType;
|
||||
cif::iset mEnums;
|
||||
std::string mDefault;
|
||||
bool mDefaultIsNull;
|
||||
ValidateCategory* mCategory = nullptr;
|
||||
|
||||
// ItemLinked is used for non-key links
|
||||
struct ItemLinked
|
||||
{
|
||||
ValidateItem* mParent;
|
||||
std::string mParentItem;
|
||||
std::string mChildItem;
|
||||
};
|
||||
|
||||
std::vector<ItemLinked> mLinked;
|
||||
|
||||
bool operator<(const ValidateItem& rhs) const
|
||||
{
|
||||
return icompare(mTag, rhs.mTag) < 0;
|
||||
}
|
||||
|
||||
bool operator==(const ValidateItem& rhs) const
|
||||
{
|
||||
return iequals(mTag, rhs.mTag);
|
||||
}
|
||||
|
||||
void operator()(std::string value) const;
|
||||
};
|
||||
|
||||
struct ValidateCategory
|
||||
{
|
||||
std::string mName;
|
||||
std::vector<std::string> mKeys;
|
||||
cif::iset mGroups;
|
||||
cif::iset mMandatoryFields;
|
||||
std::set<ValidateItem> mItemValidators;
|
||||
|
||||
bool operator<(const ValidateCategory& rhs) const
|
||||
{
|
||||
return icompare(mName, rhs.mName) < 0;
|
||||
}
|
||||
|
||||
void addItemValidator(ValidateItem&& v);
|
||||
|
||||
const ValidateItem* getValidatorForItem(std::string tag) const;
|
||||
|
||||
const std::set<ValidateItem>& itemValidators() const
|
||||
{
|
||||
return mItemValidators;
|
||||
}
|
||||
};
|
||||
|
||||
struct ValidateLink
|
||||
{
|
||||
int mLinkGroupID;
|
||||
std::string mParentCategory;
|
||||
std::vector<std::string> mParentKeys;
|
||||
std::string mChildCategory;
|
||||
std::vector<std::string> mChildKeys;
|
||||
std::string mLinkGroupLabel;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Validator
|
||||
{
|
||||
public:
|
||||
friend class DictParser;
|
||||
|
||||
Validator();
|
||||
~Validator();
|
||||
|
||||
Validator(const Validator& rhs) = delete;
|
||||
Validator& operator=(const Validator& rhs) = delete;
|
||||
|
||||
Validator(Validator&& rhs);
|
||||
Validator& operator=(Validator&& rhs);
|
||||
|
||||
void addTypeValidator(ValidateType&& v);
|
||||
const ValidateType* getValidatorForType(std::string typeCode) const;
|
||||
|
||||
void addCategoryValidator(ValidateCategory&& v);
|
||||
const ValidateCategory* getValidatorForCategory(std::string category) const;
|
||||
|
||||
void addLinkValidator(ValidateLink&& v);
|
||||
std::vector<const ValidateLink*> getLinksForParent(const std::string& category) const;
|
||||
std::vector<const ValidateLink*> getLinksForChild(const std::string& category) const;
|
||||
|
||||
void reportError(const std::string& msg, bool fatal);
|
||||
|
||||
std::string dictName() const { return mName; }
|
||||
void dictName(const std::string& name) { mName = name; }
|
||||
|
||||
std::string dictVersion() const { return mVersion; }
|
||||
void dictVersion(const std::string& version) { mVersion = version; }
|
||||
|
||||
private:
|
||||
|
||||
// name is fully qualified here:
|
||||
ValidateItem* getValidatorForItem(std::string name) const;
|
||||
|
||||
std::string mName;
|
||||
std::string mVersion;
|
||||
bool mStrict = false;
|
||||
// std::set<uint32_t> mSubCategories;
|
||||
std::set<ValidateType> mTypeValidators;
|
||||
std::set<ValidateCategory> mCategoryValidators;
|
||||
std::vector<ValidateLink> mLinkValidators;
|
||||
};
|
||||
|
||||
}
|
||||
@@ -1,391 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
|
||||
* Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// uBlas compatible matrix types
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
// matrix is m x n, addressing i,j is 0 <= i < m and 0 <= j < n
|
||||
// element m i,j is mapped to [i * n + j] and thus storage is row major
|
||||
|
||||
template <typename T>
|
||||
class MatrixBase
|
||||
{
|
||||
public:
|
||||
using value_type = T;
|
||||
|
||||
virtual ~MatrixBase() {}
|
||||
|
||||
virtual uint32_t dim_m() const = 0;
|
||||
virtual uint32_t dim_n() const = 0;
|
||||
|
||||
virtual value_type &operator()(uint32_t i, uint32_t j) { throw std::runtime_error("unimplemented method"); }
|
||||
virtual value_type operator()(uint32_t i, uint32_t j) const = 0;
|
||||
|
||||
MatrixBase &operator*=(const value_type &rhs);
|
||||
|
||||
MatrixBase &operator-=(const value_type &rhs);
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
MatrixBase<T> &MatrixBase<T>::operator*=(const T &rhs)
|
||||
{
|
||||
for (uint32_t i = 0; i < dim_m(); ++i)
|
||||
{
|
||||
for (uint32_t j = 0; j < dim_n(); ++j)
|
||||
{
|
||||
operator()(i, j) *= rhs;
|
||||
}
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
MatrixBase<T> &MatrixBase<T>::operator-=(const T &rhs)
|
||||
{
|
||||
for (uint32_t i = 0; i < dim_m(); ++i)
|
||||
{
|
||||
for (uint32_t j = 0; j < dim_n(); ++j)
|
||||
{
|
||||
operator()(i, j) -= rhs;
|
||||
}
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
std::ostream &operator<<(std::ostream &lhs, const MatrixBase<T> &rhs)
|
||||
{
|
||||
lhs << '[' << rhs.dim_m() << ',' << rhs.dim_n() << ']' << '(';
|
||||
for (uint32_t i = 0; i < rhs.dim_m(); ++i)
|
||||
{
|
||||
lhs << '(';
|
||||
for (uint32_t j = 0; j < rhs.dim_n(); ++j)
|
||||
{
|
||||
if (j > 0)
|
||||
lhs << ',';
|
||||
lhs << rhs(i, j);
|
||||
}
|
||||
lhs << ')';
|
||||
}
|
||||
lhs << ')';
|
||||
|
||||
return lhs;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class Matrix : public MatrixBase<T>
|
||||
{
|
||||
public:
|
||||
using value_type = T;
|
||||
|
||||
template <typename T2>
|
||||
Matrix(const MatrixBase<T2> &m)
|
||||
: m_m(m.dim_m())
|
||||
, m_n(m.dim_n())
|
||||
{
|
||||
m_data = new value_type[m_m * m_n];
|
||||
for (uint32_t i = 0; i < m_m; ++i)
|
||||
{
|
||||
for (uint32_t j = 0; j < m_n; ++j)
|
||||
operator()(i, j) = m(i, j);
|
||||
}
|
||||
}
|
||||
|
||||
Matrix()
|
||||
: m_data(nullptr)
|
||||
, m_m(0)
|
||||
, m_n(0)
|
||||
{
|
||||
}
|
||||
|
||||
Matrix(const Matrix &m)
|
||||
: m_m(m.m_m)
|
||||
, m_n(m.m_n)
|
||||
{
|
||||
m_data = new value_type[m_m * m_n];
|
||||
std::copy(m.m_data, m.m_data + (m_m * m_n), m_data);
|
||||
}
|
||||
|
||||
Matrix &operator=(const Matrix &m)
|
||||
{
|
||||
value_type *t = new value_type[m.m_m * m.m_n];
|
||||
std::copy(m.m_data, m.m_data + (m.m_m * m.m_n), t);
|
||||
|
||||
delete[] m_data;
|
||||
m_data = t;
|
||||
m_m = m.m_m;
|
||||
m_n = m.m_n;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
Matrix(uint32_t m, uint32_t n, T v = T())
|
||||
: m_m(m)
|
||||
, m_n(n)
|
||||
{
|
||||
m_data = new value_type[m_m * m_n];
|
||||
std::fill(m_data, m_data + (m_m * m_n), v);
|
||||
}
|
||||
|
||||
virtual ~Matrix()
|
||||
{
|
||||
delete[] m_data;
|
||||
}
|
||||
|
||||
virtual uint32_t dim_m() const { return m_m; }
|
||||
virtual uint32_t dim_n() const { return m_n; }
|
||||
|
||||
virtual value_type operator()(uint32_t i, uint32_t j) const
|
||||
{
|
||||
assert(i < m_m);
|
||||
assert(j < m_n);
|
||||
return m_data[i * m_n + j];
|
||||
}
|
||||
|
||||
virtual value_type &operator()(uint32_t i, uint32_t j)
|
||||
{
|
||||
assert(i < m_m);
|
||||
assert(j < m_n);
|
||||
return m_data[i * m_n + j];
|
||||
}
|
||||
|
||||
template <typename Func>
|
||||
void each(Func f)
|
||||
{
|
||||
for (uint32_t i = 0; i < m_m * m_n; ++i)
|
||||
f(m_data[i]);
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
Matrix &operator/=(U v)
|
||||
{
|
||||
for (uint32_t i = 0; i < m_m * m_n; ++i)
|
||||
m_data[i] /= v;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
value_type *m_data;
|
||||
uint32_t m_m, m_n;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
template <typename T>
|
||||
class SymmetricMatrix : public MatrixBase<T>
|
||||
{
|
||||
public:
|
||||
typedef typename MatrixBase<T>::value_type value_type;
|
||||
|
||||
SymmetricMatrix(uint32_t n, T v = T())
|
||||
: m_owner(true)
|
||||
, m_n(n)
|
||||
{
|
||||
uint32_t N = (m_n * (m_n + 1)) / 2;
|
||||
m_data = new value_type[N];
|
||||
std::fill(m_data, m_data + N, v);
|
||||
}
|
||||
|
||||
SymmetricMatrix(const T *data, uint32_t n)
|
||||
: m_owner(false)
|
||||
, m_data(const_cast<T *>(data))
|
||||
, m_n(n)
|
||||
{
|
||||
}
|
||||
|
||||
virtual ~SymmetricMatrix()
|
||||
{
|
||||
if (m_owner)
|
||||
delete[] m_data;
|
||||
}
|
||||
|
||||
virtual uint32_t dim_m() const { return m_n; }
|
||||
virtual uint32_t dim_n() const { return m_n; }
|
||||
|
||||
T operator()(uint32_t i, uint32_t j) const;
|
||||
virtual T &operator()(uint32_t i, uint32_t j);
|
||||
|
||||
// erase two rows, add one at the end (for neighbour joining)
|
||||
void erase_2(uint32_t i, uint32_t j);
|
||||
|
||||
template <typename Func>
|
||||
void each(Func f)
|
||||
{
|
||||
uint32_t N = (m_n * (m_n + 1)) / 2;
|
||||
|
||||
for (uint32_t i = 0; i < N; ++i)
|
||||
f(m_data[i]);
|
||||
}
|
||||
|
||||
template <typename U>
|
||||
SymmetricMatrix &operator/=(U v)
|
||||
{
|
||||
uint32_t N = (m_n * (m_n + 1)) / 2;
|
||||
|
||||
for (uint32_t i = 0; i < N; ++i)
|
||||
m_data[i] /= v;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
private:
|
||||
bool m_owner;
|
||||
value_type *m_data;
|
||||
uint32_t m_n;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline T SymmetricMatrix<T>::operator()(uint32_t i, uint32_t j) const
|
||||
{
|
||||
return i < j
|
||||
? m_data[(j * (j + 1)) / 2 + i]
|
||||
: m_data[(i * (i + 1)) / 2 + j];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline T &SymmetricMatrix<T>::operator()(uint32_t i, uint32_t j)
|
||||
{
|
||||
if (i > j)
|
||||
std::swap(i, j);
|
||||
assert(j < m_n);
|
||||
return m_data[(j * (j + 1)) / 2 + i];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void SymmetricMatrix<T>::erase_2(uint32_t di, uint32_t dj)
|
||||
{
|
||||
uint32_t s = 0, d = 0;
|
||||
for (uint32_t i = 0; i < m_n; ++i)
|
||||
{
|
||||
for (uint32_t j = 0; j < i; ++j)
|
||||
{
|
||||
if (i != di and j != dj and i != dj and j != di)
|
||||
{
|
||||
if (s != d)
|
||||
m_data[d] = m_data[s];
|
||||
++d;
|
||||
}
|
||||
|
||||
++s;
|
||||
}
|
||||
}
|
||||
|
||||
--m_n;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class IdentityMatrix : public MatrixBase<T>
|
||||
{
|
||||
public:
|
||||
typedef typename MatrixBase<T>::value_type value_type;
|
||||
|
||||
IdentityMatrix(uint32_t n)
|
||||
: m_n(n)
|
||||
{
|
||||
}
|
||||
|
||||
virtual uint32_t dim_m() const { return m_n; }
|
||||
virtual uint32_t dim_n() const { return m_n; }
|
||||
|
||||
virtual value_type operator()(uint32_t i, uint32_t j) const
|
||||
{
|
||||
value_type result = 0;
|
||||
if (i == j)
|
||||
result = 1;
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_n;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// matrix functions
|
||||
|
||||
template <typename T>
|
||||
Matrix<T> operator*(const MatrixBase<T> &lhs, const MatrixBase<T> &rhs)
|
||||
{
|
||||
Matrix<T> result(std::min(lhs.dim_m(), rhs.dim_m()), std::min(lhs.dim_n(), rhs.dim_n()));
|
||||
|
||||
for (uint32_t i = 0; i < result.dim_m(); ++i)
|
||||
{
|
||||
for (uint32_t j = 0; j < result.dim_n(); ++j)
|
||||
{
|
||||
for (uint32_t li = 0, rj = 0; li < lhs.dim_m() and rj < rhs.dim_n(); ++li, ++rj)
|
||||
result(i, j) += lhs(li, j) * rhs(i, rj);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Matrix<T> operator*(const MatrixBase<T> &lhs, T rhs)
|
||||
{
|
||||
Matrix<T> result(lhs);
|
||||
result *= rhs;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Matrix<T> operator-(const MatrixBase<T> &lhs, const MatrixBase<T> &rhs)
|
||||
{
|
||||
Matrix<T> result(std::min(lhs.dim_m(), rhs.dim_m()), std::min(lhs.dim_n(), rhs.dim_n()));
|
||||
|
||||
for (uint32_t i = 0; i < result.dim_m(); ++i)
|
||||
{
|
||||
for (uint32_t j = 0; j < result.dim_n(); ++j)
|
||||
{
|
||||
result(i, j) = lhs(i, j) - rhs(i, j);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
Matrix<T> operator-(const MatrixBase<T> &lhs, T rhs)
|
||||
{
|
||||
Matrix<T> result(lhs.dim_m(), lhs.dim_n());
|
||||
result -= rhs;
|
||||
return result;
|
||||
}
|
||||
|
||||
// template <typename T>
|
||||
// symmetric_matrix<T> hammingDistance(const MatrixBase<T> &lhs, T rhs);
|
||||
|
||||
// template <typename T>
|
||||
// std::vector<T> sum(const MatrixBase<T> &m);
|
||||
@@ -1,428 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <functional>
|
||||
|
||||
#if HAVE_LIBCLIPPER
|
||||
#include <clipper/core/coords.h>
|
||||
#endif
|
||||
|
||||
#include <boost/math/quaternion.hpp>
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
typedef boost::math::quaternion<float> Quaternion;
|
||||
|
||||
const double
|
||||
kPI = 3.141592653589793238462643383279502884;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
// Point, a location with x, y and z coordinates as floating point.
|
||||
// This one is derived from a tuple<float,float,float> so
|
||||
// you can do things like:
|
||||
//
|
||||
// float x, y, z;
|
||||
// tie(x, y, z) = atom.loc();
|
||||
|
||||
template<typename F>
|
||||
struct PointF
|
||||
{
|
||||
typedef F FType;
|
||||
|
||||
FType mX, mY, mZ;
|
||||
|
||||
PointF() : mX(0), mY(0), mZ(0) {}
|
||||
PointF(FType x, FType y, FType z) : mX(x), mY(y), mZ(z) {}
|
||||
|
||||
template<typename PF>
|
||||
PointF(const PointF<PF>& pt)
|
||||
: mX(static_cast<F>(pt.mX))
|
||||
, mY(static_cast<F>(pt.mY))
|
||||
, mZ(static_cast<F>(pt.mZ)) {}
|
||||
|
||||
#if HAVE_LIBCLIPPER
|
||||
PointF(const clipper::Coord_orth& pt): mX(pt[0]), mY(pt[1]), mZ(pt[2]) {}
|
||||
|
||||
PointF& operator=(const clipper::Coord_orth& rhs)
|
||||
{
|
||||
mX = rhs[0];
|
||||
mY = rhs[1];
|
||||
mZ = rhs[2];
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
template<typename PF>
|
||||
PointF& operator=(const PointF<PF>& rhs)
|
||||
{
|
||||
mX = static_cast<F>(rhs.mX);
|
||||
mY = static_cast<F>(rhs.mY);
|
||||
mZ = static_cast<F>(rhs.mZ);
|
||||
return *this;
|
||||
}
|
||||
|
||||
FType& getX() { return mX; }
|
||||
FType getX() const { return mX; }
|
||||
void setX(FType x) { mX = x; }
|
||||
|
||||
FType& getY() { return mY; }
|
||||
FType getY() const { return mY; }
|
||||
void setY(FType y) { mY = y; }
|
||||
|
||||
FType& getZ() { return mZ; }
|
||||
FType getZ() const { return mZ; }
|
||||
void setZ(FType z) { mZ = z; }
|
||||
|
||||
PointF& operator+=(const PointF& rhs)
|
||||
{
|
||||
mX += rhs.mX;
|
||||
mY += rhs.mY;
|
||||
mZ += rhs.mZ;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
PointF& operator+=(FType d)
|
||||
{
|
||||
mX += d;
|
||||
mY += d;
|
||||
mZ += d;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
PointF& operator-=(const PointF& rhs)
|
||||
{
|
||||
mX -= rhs.mX;
|
||||
mY -= rhs.mY;
|
||||
mZ -= rhs.mZ;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
PointF& operator-=(FType d)
|
||||
{
|
||||
mX -= d;
|
||||
mY -= d;
|
||||
mZ -= d;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
PointF& operator*=(FType rhs)
|
||||
{
|
||||
mX *= rhs;
|
||||
mY *= rhs;
|
||||
mZ *= rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
PointF& operator/=(FType rhs)
|
||||
{
|
||||
mX /= rhs;
|
||||
mY /= rhs;
|
||||
mZ /= rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
FType normalize()
|
||||
{
|
||||
auto length = mX * mX + mY * mY + mZ * mZ;
|
||||
if (length > 0)
|
||||
{
|
||||
length = std::sqrt(length);
|
||||
operator/=(length);
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
void rotate(const boost::math::quaternion<FType>& q)
|
||||
{
|
||||
boost::math::quaternion<FType> p(0, mX, mY, mZ);
|
||||
|
||||
p = q * p * boost::math::conj(q);
|
||||
|
||||
mX = p.R_component_2();
|
||||
mY = p.R_component_3();
|
||||
mZ = p.R_component_4();
|
||||
}
|
||||
|
||||
#if HAVE_LIBCLIPPER
|
||||
operator clipper::Coord_orth() const
|
||||
{
|
||||
return clipper::Coord_orth(mX, mY, mZ);
|
||||
}
|
||||
#endif
|
||||
|
||||
operator std::tuple<const FType&, const FType&, const FType&>() const
|
||||
{
|
||||
return std::make_tuple(std::ref(mX), std::ref(mY), std::ref(mZ));
|
||||
}
|
||||
|
||||
operator std::tuple<FType&,FType&,FType&>()
|
||||
{
|
||||
return std::make_tuple(std::ref(mX), std::ref(mY), std::ref(mZ));
|
||||
}
|
||||
|
||||
bool operator==(const PointF& rhs) const
|
||||
{
|
||||
return mX == rhs.mX and mY == rhs.mY and mZ == rhs.mZ;
|
||||
}
|
||||
|
||||
// consider point as a vector... perhaps I should rename Point?
|
||||
FType lengthsq() const
|
||||
{
|
||||
return mX * mX + mY * mY + mZ * mZ;
|
||||
}
|
||||
|
||||
FType length() const
|
||||
{
|
||||
return sqrt(mX * mX + mY * mY + mZ * mZ);
|
||||
}
|
||||
};
|
||||
|
||||
typedef PointF<float> Point;
|
||||
typedef PointF<double> DPoint;
|
||||
|
||||
template<typename F>
|
||||
inline std::ostream& operator<<(std::ostream& os, const PointF<F>& pt)
|
||||
{
|
||||
os << '(' << pt.mX << ',' << pt.mY << ',' << pt.mZ << ')';
|
||||
return os;
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
inline PointF<F> operator+(const PointF<F>& lhs, const PointF<F>& rhs)
|
||||
{
|
||||
return PointF<F>(lhs.mX + rhs.mX, lhs.mY + rhs.mY, lhs.mZ + rhs.mZ);
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
inline PointF<F> operator-(const PointF<F>& lhs, const PointF<F>& rhs)
|
||||
{
|
||||
return PointF<F>(lhs.mX - rhs.mX, lhs.mY - rhs.mY, lhs.mZ - rhs.mZ);
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
inline PointF<F> operator-(const PointF<F>& pt)
|
||||
{
|
||||
return PointF<F>(-pt.mX, -pt.mY, -pt.mZ);
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
inline PointF<F> operator*(const PointF<F>& pt, F f)
|
||||
{
|
||||
return PointF<F>(pt.mX * f, pt.mY * f, pt.mZ * f);
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
inline PointF<F> operator*(F f, const PointF<F>& pt)
|
||||
{
|
||||
return PointF<F>(pt.mX * f, pt.mY * f, pt.mZ * f);
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
inline PointF<F> operator/(const PointF<F>& pt, F f)
|
||||
{
|
||||
return PointF<F>(pt.mX / f, pt.mY / f, pt.mZ / f);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// several standard 3d operations
|
||||
|
||||
template<typename F>
|
||||
inline double DistanceSquared(const PointF<F>& a, const PointF<F>& b)
|
||||
{
|
||||
return
|
||||
(a.mX - b.mX) * (a.mX - b.mX) +
|
||||
(a.mY - b.mY) * (a.mY - b.mY) +
|
||||
(a.mZ - b.mZ) * (a.mZ - b.mZ);
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
inline double Distance(const PointF<F>& a, const PointF<F>& b)
|
||||
{
|
||||
return sqrt(
|
||||
(a.mX - b.mX) * (a.mX - b.mX) +
|
||||
(a.mY - b.mY) * (a.mY - b.mY) +
|
||||
(a.mZ - b.mZ) * (a.mZ - b.mZ));
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
inline F DotProduct(const PointF<F>& a, const PointF<F>& b)
|
||||
{
|
||||
return a.mX * b.mX + a.mY * b.mY + a.mZ * b.mZ;
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
inline PointF<F> CrossProduct(const PointF<F>& a, const PointF<F>& b)
|
||||
{
|
||||
return PointF<F>(a.mY * b.mZ - b.mY * a.mZ,
|
||||
a.mZ * b.mX - b.mZ * a.mX,
|
||||
a.mX * b.mY - b.mX * a.mY);
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
double Angle(const PointF<F>& p1, const PointF<F>& p2, const PointF<F>& p3)
|
||||
{
|
||||
PointF<F> v1 = p1 - p2;
|
||||
PointF<F> v2 = p3 - p2;
|
||||
|
||||
return std::acos(DotProduct(v1, v2) / (v1.length() * v2.length())) * 180 / kPI;
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
double DihedralAngle(const PointF<F>& p1, const PointF<F>& p2, const PointF<F>& p3, const PointF<F>& p4)
|
||||
{
|
||||
PointF<F> v12 = p1 - p2; // vector from p2 to p1
|
||||
PointF<F> v43 = p4 - p3; // vector from p3 to p4
|
||||
|
||||
PointF<F> z = p2 - p3; // vector from p3 to p2
|
||||
|
||||
PointF<F> p = CrossProduct(z, v12);
|
||||
PointF<F> x = CrossProduct(z, v43);
|
||||
PointF<F> y = CrossProduct(z, x);
|
||||
|
||||
double u = DotProduct(x, x);
|
||||
double v = DotProduct(y, y);
|
||||
|
||||
double result = 360;
|
||||
if (u > 0 and v > 0)
|
||||
{
|
||||
u = DotProduct(p, x) / sqrt(u);
|
||||
v = DotProduct(p, y) / sqrt(v);
|
||||
if (u != 0 or v != 0)
|
||||
result = atan2(v, u) * 180 / kPI;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
double CosinusAngle(const PointF<F>& p1, const PointF<F>& p2, const PointF<F>& p3, const PointF<F>& p4)
|
||||
{
|
||||
PointF<F> v12 = p1 - p2;
|
||||
PointF<F> v34 = p3 - p4;
|
||||
|
||||
double result = 0;
|
||||
|
||||
double x = DotProduct(v12, v12) * DotProduct(v34, v34);
|
||||
if (x > 0)
|
||||
result = DotProduct(v12, v34) / sqrt(x);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename F>
|
||||
auto DistancePointToLine(const PointF<F> &l1, const PointF<F> &l2, const PointF<F> &p)
|
||||
{
|
||||
auto line = l2 - l1;
|
||||
auto p_to_l1 = p - l1;
|
||||
auto p_to_l2 = p - l2;
|
||||
auto cross = CrossProduct(p_to_l1, p_to_l2);
|
||||
return cross.length() / line.length();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// For e.g. simulated annealing, returns a new point that is moved in
|
||||
// a random direction with a distance randomly chosen from a normal
|
||||
// distribution with a stddev of offset.
|
||||
|
||||
template<typename F>
|
||||
PointF<F> Nudge(PointF<F> p, F offset);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// We use quaternions to do rotations in 3d space
|
||||
|
||||
Quaternion Normalize(Quaternion q);
|
||||
|
||||
std::tuple<double,Point> QuaternionToAngleAxis(Quaternion q);
|
||||
Point Centroid(std::vector<Point>& Points);
|
||||
Point CenterPoints(std::vector<Point>& Points);
|
||||
Quaternion AlignPoints(const std::vector<Point>& a, const std::vector<Point>& b);
|
||||
double RMSd(const std::vector<Point>& a, const std::vector<Point>& b);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Helper class to generate evenly divided Points on a sphere
|
||||
// we use a fibonacci sphere to calculate even distribution of the dots
|
||||
|
||||
template<int N>
|
||||
class SphericalDots
|
||||
{
|
||||
public:
|
||||
enum { P = 2 * N + 1 };
|
||||
typedef typename std::array<Point,P> array_type;
|
||||
typedef typename array_type::const_iterator iterator;
|
||||
|
||||
static SphericalDots& instance()
|
||||
{
|
||||
static SphericalDots sInstance;
|
||||
return sInstance;
|
||||
}
|
||||
|
||||
size_t size() const { return mPoints.size(); }
|
||||
const Point operator[](uint32_t inIx) const { return mPoints[inIx]; }
|
||||
iterator begin() const { return mPoints.begin(); }
|
||||
iterator end() const { return mPoints.end(); }
|
||||
|
||||
double weight() const { return mWeight; }
|
||||
|
||||
SphericalDots()
|
||||
{
|
||||
|
||||
const double
|
||||
kGoldenRatio = (1 + std::sqrt(5.0)) / 2;
|
||||
|
||||
mWeight = (4 * kPI) / P;
|
||||
|
||||
auto p = mPoints.begin();
|
||||
|
||||
for (int32_t i = -N; i <= N; ++i)
|
||||
{
|
||||
double lat = std::asin((2.0 * i) / P);
|
||||
double lon = std::fmod(i, kGoldenRatio) * 2 * kPI / kGoldenRatio;
|
||||
|
||||
p->mX = sin(lon) * cos(lat);
|
||||
p->mY = cos(lon) * cos(lat);
|
||||
p->mZ = sin(lat);
|
||||
|
||||
++p;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
array_type mPoints;
|
||||
double mWeight;
|
||||
};
|
||||
|
||||
typedef SphericalDots<50> SphericalDots_50;
|
||||
|
||||
}
|
||||
@@ -1,218 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
// Calculate DSSP-like secondary structure information
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
class Structure;
|
||||
class Monomer;
|
||||
|
||||
struct Res;
|
||||
|
||||
extern const float
|
||||
kCouplingConstant, kMinHBondEnergy, kMaxHBondEnergy;
|
||||
|
||||
enum SecondaryStructureType : char
|
||||
{
|
||||
ssLoop = ' ',
|
||||
ssAlphahelix = 'H',
|
||||
ssBetabridge = 'B',
|
||||
ssStrand = 'E',
|
||||
ssHelix_3 = 'G',
|
||||
ssHelix_5 = 'I',
|
||||
ssHelix_PPII = 'P',
|
||||
ssTurn = 'T',
|
||||
ssBend = 'S'
|
||||
};
|
||||
|
||||
enum class HelixType
|
||||
{
|
||||
rh_3_10, rh_alpha, rh_pi, rh_pp
|
||||
};
|
||||
|
||||
enum class Helix
|
||||
{
|
||||
None, Start, End, StartAndEnd, Middle
|
||||
};
|
||||
|
||||
//struct HBond
|
||||
//{
|
||||
// std::string labelAsymID;
|
||||
// int labelSeqID;
|
||||
// double energy;
|
||||
//};
|
||||
//
|
||||
//struct BridgePartner
|
||||
//{
|
||||
// std::string labelAsymID;
|
||||
// int labelSeqID;
|
||||
// int ladder;
|
||||
// bool parallel;
|
||||
//};
|
||||
|
||||
struct SecondaryStructure
|
||||
{
|
||||
SecondaryStructureType type;
|
||||
// HBond donor[2], acceptor[2];
|
||||
// BridgePartner beta[2];
|
||||
// int sheet;
|
||||
// bool bend;
|
||||
};
|
||||
|
||||
//void CalculateSecondaryStructure(Structure& s);
|
||||
|
||||
const size_t
|
||||
kHistogramSize = 30;
|
||||
|
||||
struct DSSP_Statistics
|
||||
{
|
||||
uint32_t nrOfResidues, nrOfChains, nrOfSSBridges, nrOfIntraChainSSBridges, nrOfHBonds;
|
||||
uint32_t nrOfHBondsInAntiparallelBridges, nrOfHBondsInParallelBridges;
|
||||
uint32_t nrOfHBondsPerDistance[11] = {};
|
||||
double accessibleSurface = 0;
|
||||
|
||||
uint32_t residuesPerAlphaHelixHistogram[kHistogramSize] = {};
|
||||
uint32_t parallelBridgesPerLadderHistogram[kHistogramSize] = {};
|
||||
uint32_t antiparallelBridgesPerLadderHistogram[kHistogramSize] = {};
|
||||
uint32_t laddersPerSheetHistogram[kHistogramSize] = {};
|
||||
};
|
||||
|
||||
enum class ChainBreak
|
||||
{
|
||||
None, NewChain, Gap
|
||||
};
|
||||
|
||||
class DSSP
|
||||
{
|
||||
public:
|
||||
DSSP(const Structure& s, int min_poly_proline_stretch_length, bool calculateSurfaceAccessibility);
|
||||
~DSSP();
|
||||
|
||||
DSSP(const DSSP&) = delete;
|
||||
DSSP& operator=(const DSSP&) = delete;
|
||||
|
||||
SecondaryStructureType operator()(const std::string& inAsymID, int inSeqID) const;
|
||||
SecondaryStructureType operator()(const Monomer& m) const;
|
||||
|
||||
double accessibility(const std::string& inAsymID, int inSeqID) const;
|
||||
double accessibility(const Monomer& m) const;
|
||||
|
||||
bool isAlphaHelixEndBeforeStart(const Monomer& m) const;
|
||||
bool isAlphaHelixEndBeforeStart(const std::string& inAsymID, int inSeqID) const;
|
||||
|
||||
DSSP_Statistics GetStatistics() const;
|
||||
|
||||
class iterator;
|
||||
using res_iterator = typename std::vector<Res>::iterator;
|
||||
|
||||
class ResidueInfo
|
||||
{
|
||||
public:
|
||||
friend class iterator;
|
||||
|
||||
explicit operator bool() const { return not empty(); }
|
||||
bool empty() const { return mImpl == nullptr; }
|
||||
|
||||
const Monomer& residue() const;
|
||||
std::string alt_id() const;
|
||||
|
||||
/// \brief return 0 if not a break, ' ' in case of a new chain and '*' in case of a broken chain
|
||||
ChainBreak chainBreak() const;
|
||||
|
||||
/// \brief the internal number in DSSP
|
||||
int nr() const;
|
||||
|
||||
SecondaryStructureType ss() const;
|
||||
|
||||
int ssBridgeNr() const;
|
||||
|
||||
Helix helix(HelixType helixType) const;
|
||||
|
||||
bool bend() const;
|
||||
|
||||
double accessibility() const;
|
||||
|
||||
/// \brief returns resinfo, ladder and parallel
|
||||
std::tuple<ResidueInfo,int,bool> bridgePartner(int i) const;
|
||||
|
||||
int sheet() const;
|
||||
|
||||
/// \brief return resinfo and the energy of the bond
|
||||
std::tuple<ResidueInfo,double> acceptor(int i) const;
|
||||
std::tuple<ResidueInfo,double> donor(int i) const;
|
||||
|
||||
private:
|
||||
ResidueInfo(Res* res) : mImpl(res) {}
|
||||
|
||||
Res* mImpl;
|
||||
};
|
||||
|
||||
class iterator
|
||||
{
|
||||
public:
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
using value_type = ResidueInfo;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = value_type*;
|
||||
using reference = value_type&;
|
||||
|
||||
iterator(const iterator& i);
|
||||
iterator(Res* res);
|
||||
iterator& operator=(const iterator& i);
|
||||
|
||||
reference operator*() { return mCurrent; }
|
||||
pointer operator->() { return &mCurrent; }
|
||||
|
||||
iterator& operator++();
|
||||
iterator operator++(int)
|
||||
{
|
||||
auto tmp(*this);
|
||||
this->operator++();
|
||||
return tmp;
|
||||
}
|
||||
|
||||
bool operator==(const iterator& rhs) const { return mCurrent.mImpl == rhs.mCurrent.mImpl; }
|
||||
bool operator!=(const iterator& rhs) const { return mCurrent.mImpl != rhs.mCurrent.mImpl; }
|
||||
|
||||
private:
|
||||
ResidueInfo mCurrent;
|
||||
};
|
||||
|
||||
iterator begin() const;
|
||||
iterator end() const;
|
||||
|
||||
bool empty() const { return begin() == end(); }
|
||||
|
||||
private:
|
||||
struct DSSPImpl* mImpl;
|
||||
};
|
||||
|
||||
|
||||
}
|
||||
@@ -1,544 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#include "cif++/AtomType.hpp"
|
||||
#include "cif++/Cif++.hpp"
|
||||
#include "cif++/Compound.hpp"
|
||||
#include "cif++/Point.hpp"
|
||||
|
||||
/*
|
||||
To modify a structure, you will have to use actions.
|
||||
|
||||
The currently supported actions are:
|
||||
|
||||
// - Move atom to new location
|
||||
- Remove atom
|
||||
// - Add new atom that was formerly missing
|
||||
// - Add alternate Residue
|
||||
-
|
||||
|
||||
*/
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
class Atom;
|
||||
class Residue;
|
||||
class Monomer;
|
||||
class Polymer;
|
||||
class Structure;
|
||||
class File;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Atom
|
||||
{
|
||||
public:
|
||||
Atom();
|
||||
Atom(struct AtomImpl *impl);
|
||||
Atom(const Atom &rhs);
|
||||
|
||||
Atom(cif::Datablock &db, cif::Row &row);
|
||||
|
||||
// a special constructor to create symmetry copies
|
||||
Atom(const Atom &rhs, const Point &symmmetry_location, const std::string &symmetry_operation);
|
||||
|
||||
~Atom();
|
||||
|
||||
explicit operator bool() const { return mImpl_ != nullptr; }
|
||||
|
||||
// return a copy of this atom, with data copied instead of referenced
|
||||
Atom clone() const;
|
||||
|
||||
Atom &operator=(const Atom &rhs);
|
||||
|
||||
const std::string &id() const;
|
||||
AtomType type() const;
|
||||
|
||||
Point location() const;
|
||||
void location(Point p);
|
||||
|
||||
/// \brief Translate the position of this atom by \a t
|
||||
void translate(Point t);
|
||||
|
||||
/// \brief Rotate the position of this atom by \a q
|
||||
void rotate(Quaternion q);
|
||||
|
||||
// for direct access to underlying data, be careful!
|
||||
const cif::Row getRow() const;
|
||||
const cif::Row getRowAniso() const;
|
||||
|
||||
// Atom symmetryCopy(const Point& d, const clipper::RTop_orth& rt);
|
||||
bool isSymmetryCopy() const;
|
||||
std::string symmetry() const;
|
||||
// const clipper::RTop_orth& symop() const;
|
||||
|
||||
const Compound &comp() const;
|
||||
bool isWater() const;
|
||||
int charge() const;
|
||||
|
||||
float uIso() const;
|
||||
bool getAnisoU(float anisou[6]) const;
|
||||
float occupancy() const;
|
||||
|
||||
template <typename T>
|
||||
T property(const std::string &name) const;
|
||||
|
||||
void property(const std::string &name, const std::string &value);
|
||||
|
||||
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
|
||||
void property(const std::string &name, const T &value)
|
||||
{
|
||||
property(name, std::to_string(value));
|
||||
}
|
||||
|
||||
// specifications
|
||||
std::string labelAtomID() const;
|
||||
std::string labelCompID() const;
|
||||
std::string labelAsymID() const;
|
||||
std::string labelEntityID() const;
|
||||
int labelSeqID() const;
|
||||
std::string labelAltID() const;
|
||||
bool isAlternate() const;
|
||||
|
||||
std::string authAtomID() const;
|
||||
std::string authCompID() const;
|
||||
std::string authAsymID() const;
|
||||
std::string authSeqID() const;
|
||||
std::string pdbxAuthInsCode() const;
|
||||
std::string pdbxAuthAltID() const;
|
||||
|
||||
std::string labelID() const; // label_comp_id + '_' + label_asym_id + '_' + label_seq_id
|
||||
std::string pdbID() const; // auth_comp_id + '_' + auth_asym_id + '_' + auth_seq_id + pdbx_PDB_ins_code
|
||||
|
||||
bool operator==(const Atom &rhs) const;
|
||||
|
||||
// // get clipper format Atom
|
||||
// clipper::Atom toClipper() const;
|
||||
|
||||
// Radius calculation based on integrating the density until perc of electrons is found
|
||||
void calculateRadius(float resHigh, float resLow, float perc);
|
||||
float radius() const;
|
||||
|
||||
// access data in compound for this atom
|
||||
|
||||
// convenience routine
|
||||
bool isBackBone() const
|
||||
{
|
||||
auto atomID = labelAtomID();
|
||||
return atomID == "N" or atomID == "O" or atomID == "C" or atomID == "CA";
|
||||
}
|
||||
|
||||
void swap(Atom &b)
|
||||
{
|
||||
std::swap(mImpl_, b.mImpl_);
|
||||
}
|
||||
|
||||
int compare(const Atom &b) const;
|
||||
|
||||
bool operator<(const Atom &rhs) const
|
||||
{
|
||||
return compare(rhs) < 0;
|
||||
}
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const Atom &atom);
|
||||
|
||||
private:
|
||||
friend class Structure;
|
||||
void setID(int id);
|
||||
|
||||
AtomImpl *impl();
|
||||
const AtomImpl *impl() const;
|
||||
|
||||
struct AtomImpl *mImpl_;
|
||||
};
|
||||
|
||||
inline void swap(mmcif::Atom &a, mmcif::Atom &b)
|
||||
{
|
||||
a.swap(b);
|
||||
}
|
||||
|
||||
inline double Distance(const Atom &a, const Atom &b)
|
||||
{
|
||||
return Distance(a.location(), b.location());
|
||||
}
|
||||
|
||||
inline double DistanceSquared(const Atom &a, const Atom &b)
|
||||
{
|
||||
return DistanceSquared(a.location(), b.location());
|
||||
}
|
||||
|
||||
typedef std::vector<Atom> AtomView;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Residue
|
||||
{
|
||||
public:
|
||||
// constructors should be private, but that's not possible for now (needed in emplace)
|
||||
|
||||
// constructor for waters
|
||||
Residue(const Structure &structure, const std::string &compoundID,
|
||||
const std::string &asymID, const std::string &authSeqID);
|
||||
|
||||
// constructor for a residue without a sequence number
|
||||
Residue(const Structure &structure, const std::string &compoundID,
|
||||
const std::string &asymID);
|
||||
|
||||
// constructor for a residue with a sequence number
|
||||
Residue(const Structure &structure, const std::string &compoundID,
|
||||
const std::string &asymID, int seqID, const std::string &authSeqID);
|
||||
|
||||
Residue(const Residue &rhs) = delete;
|
||||
Residue &operator=(const Residue &rhs) = delete;
|
||||
|
||||
Residue(Residue &&rhs);
|
||||
Residue &operator=(Residue &&rhs);
|
||||
|
||||
virtual ~Residue();
|
||||
|
||||
const Compound &compound() const;
|
||||
const AtomView &atoms() const;
|
||||
|
||||
/// \brief Unique atoms returns only the atoms without alternates and the first of each alternate atom id.
|
||||
AtomView unique_atoms() const;
|
||||
|
||||
/// \brief The alt ID used for the unique atoms
|
||||
std::string unique_alt_id() const;
|
||||
|
||||
Atom atomByID(const std::string &atomID) const;
|
||||
|
||||
const std::string &compoundID() const { return mCompoundID; }
|
||||
const std::string &asymID() const { return mAsymID; }
|
||||
int seqID() const { return mSeqID; }
|
||||
std::string entityID() const;
|
||||
|
||||
std::string authAsymID() const;
|
||||
std::string authSeqID() const;
|
||||
std::string authInsCode() const;
|
||||
|
||||
// return a human readable PDB-like auth id (chain+seqnr+iCode)
|
||||
std::string authID() const;
|
||||
|
||||
// similar for mmCIF space
|
||||
std::string labelID() const;
|
||||
|
||||
// Is this residue a single entity?
|
||||
bool isEntity() const;
|
||||
|
||||
bool isWater() const { return mCompoundID == "HOH"; }
|
||||
|
||||
const Structure &structure() const { return *mStructure; }
|
||||
|
||||
bool empty() const { return mStructure == nullptr; }
|
||||
|
||||
bool hasAlternateAtoms() const;
|
||||
|
||||
/// \brief Return the list of unique alt ID's present in this residue
|
||||
std::set<std::string> getAlternateIDs() const;
|
||||
|
||||
/// \brief Return the list of unique atom ID's
|
||||
std::set<std::string> getAtomIDs() const;
|
||||
|
||||
/// \brief Return the list of atoms having ID \a atomID
|
||||
AtomView getAtomsByID(const std::string &atomID) const;
|
||||
|
||||
// some routines for 3d work
|
||||
std::tuple<Point, float> centerAndRadius() const;
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const Residue &res);
|
||||
|
||||
protected:
|
||||
Residue() {}
|
||||
|
||||
friend class Polymer;
|
||||
|
||||
const Structure *mStructure = nullptr;
|
||||
std::string mCompoundID, mAsymID;
|
||||
int mSeqID = 0;
|
||||
|
||||
// Watch out, this is used only to label waters... The rest of the code relies on
|
||||
// MapLabelToAuth to get this info. Perhaps we should rename this member field.
|
||||
std::string mAuthSeqID;
|
||||
AtomView mAtoms;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// a monomer models a single Residue in a protein chain
|
||||
|
||||
class Monomer : public Residue
|
||||
{
|
||||
public:
|
||||
// Monomer();
|
||||
Monomer(const Monomer &rhs) = delete;
|
||||
Monomer &operator=(const Monomer &rhs) = delete;
|
||||
|
||||
Monomer(Monomer &&rhs);
|
||||
Monomer &operator=(Monomer &&rhs);
|
||||
|
||||
Monomer(const Polymer &polymer, size_t index, int seqID, const std::string &authSeqID,
|
||||
const std::string &compoundID);
|
||||
|
||||
bool is_first_in_chain() const;
|
||||
bool is_last_in_chain() const;
|
||||
|
||||
// convenience
|
||||
bool has_alpha() const;
|
||||
bool has_kappa() const;
|
||||
|
||||
// Assuming this is really an amino acid...
|
||||
|
||||
float phi() const;
|
||||
float psi() const;
|
||||
float alpha() const;
|
||||
float kappa() const;
|
||||
float tco() const;
|
||||
float omega() const;
|
||||
|
||||
// torsion angles
|
||||
size_t nrOfChis() const;
|
||||
float chi(size_t i) const;
|
||||
|
||||
bool isCis() const;
|
||||
|
||||
/// \brief Returns true if the four atoms C, CA, N and O are present
|
||||
bool isComplete() const;
|
||||
|
||||
/// \brief Returns true if any of the backbone atoms has an alternate
|
||||
bool hasAlternateBackboneAtoms() const;
|
||||
|
||||
Atom CAlpha() const { return atomByID("CA"); }
|
||||
Atom C() const { return atomByID("C"); }
|
||||
Atom N() const { return atomByID("N"); }
|
||||
Atom O() const { return atomByID("O"); }
|
||||
Atom H() const { return atomByID("H"); }
|
||||
|
||||
bool isBondedTo(const Monomer &rhs) const
|
||||
{
|
||||
return this != &rhs and areBonded(*this, rhs);
|
||||
}
|
||||
|
||||
static bool areBonded(const Monomer &a, const Monomer &b, float errorMargin = 0.5f);
|
||||
static bool isCis(const Monomer &a, const Monomer &b);
|
||||
static float omega(const Monomer &a, const Monomer &b);
|
||||
|
||||
// for LEU and VAL
|
||||
float chiralVolume() const;
|
||||
|
||||
private:
|
||||
const Polymer *mPolymer;
|
||||
size_t mIndex;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Polymer : public std::vector<Monomer>
|
||||
{
|
||||
public:
|
||||
Polymer(const Structure &s, const std::string &entityID, const std::string &asymID);
|
||||
|
||||
Polymer(const Polymer &) = delete;
|
||||
Polymer &operator=(const Polymer &) = delete;
|
||||
|
||||
// Polymer(Polymer&& rhs) = delete;
|
||||
// Polymer& operator=(Polymer&& rhs) = de;
|
||||
|
||||
Monomer &getBySeqID(int seqID);
|
||||
const Monomer &getBySeqID(int seqID) const;
|
||||
|
||||
Structure *structure() const { return mStructure; }
|
||||
|
||||
std::string asymID() const { return mAsymID; }
|
||||
std::string entityID() const { return mEntityID; }
|
||||
|
||||
std::string chainID() const;
|
||||
|
||||
int Distance(const Monomer &a, const Monomer &b) const;
|
||||
|
||||
private:
|
||||
Structure *mStructure;
|
||||
std::string mEntityID;
|
||||
std::string mAsymID;
|
||||
cif::RowSet mPolySeq;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// file is a reference to the data stored in e.g. the cif file.
|
||||
// This object is not copyable.
|
||||
|
||||
class File : public std::enable_shared_from_this<File>
|
||||
{
|
||||
public:
|
||||
File();
|
||||
File(const std::filesystem::path &path);
|
||||
File(const char *data, size_t length); // good luck trying to find out what it is...
|
||||
~File();
|
||||
|
||||
File(const File &) = delete;
|
||||
File &operator=(const File &) = delete;
|
||||
|
||||
cif::Datablock& createDatablock(const std::string &name);
|
||||
|
||||
void load(const std::filesystem::path &path);
|
||||
void save(const std::filesystem::path &path);
|
||||
|
||||
Structure *model(size_t nr = 1);
|
||||
|
||||
struct FileImpl &impl() const { return *mImpl; }
|
||||
|
||||
cif::Datablock &data();
|
||||
cif::File &file();
|
||||
|
||||
private:
|
||||
struct FileImpl *mImpl;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
enum class StructureOpenOptions
|
||||
{
|
||||
SkipHydrogen = 1 << 0
|
||||
};
|
||||
|
||||
inline bool operator&(StructureOpenOptions a, StructureOpenOptions b)
|
||||
{
|
||||
return static_cast<int>(a) bitand static_cast<int>(b);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Structure
|
||||
{
|
||||
public:
|
||||
Structure(File &p, size_t modelNr = 1, StructureOpenOptions options = {});
|
||||
Structure &operator=(const Structure &) = delete;
|
||||
~Structure();
|
||||
|
||||
// Create a read-only clone of the current structure (for multithreaded calculations that move atoms)
|
||||
Structure(const Structure &);
|
||||
|
||||
File &getFile() const;
|
||||
|
||||
const AtomView &atoms() const { return mAtoms; }
|
||||
AtomView waters() const;
|
||||
|
||||
const std::list<Polymer> &polymers() const { return mPolymers; }
|
||||
std::list<Polymer> &polymers() { return mPolymers; }
|
||||
|
||||
const std::vector<Residue> &nonPolymers() const { return mNonPolymers; }
|
||||
const std::vector<Residue> &branchResidues() const { return mBranchResidues; }
|
||||
|
||||
Atom getAtomByID(std::string id) const;
|
||||
// Atom getAtomByLocation(Point pt, float maxDistance) const;
|
||||
|
||||
Atom getAtomByLabel(const std::string &atomID, const std::string &asymID,
|
||||
const std::string &compID, int seqID, const std::string &altID = "");
|
||||
|
||||
/// \brief Get a residue, if \a seqID is zero, the non-polymers are searched
|
||||
const Residue &getResidue(const std::string &asymID, const std::string &compID, int seqID = 0) const;
|
||||
|
||||
// map between auth and label locations
|
||||
|
||||
std::tuple<std::string, int, std::string> MapAuthToLabel(const std::string &asymID,
|
||||
const std::string &seqID, const std::string &compID, const std::string &insCode = "");
|
||||
|
||||
std::tuple<std::string, std::string, std::string, std::string> MapLabelToAuth(
|
||||
const std::string &asymID, int seqID, const std::string &compID);
|
||||
|
||||
// returns chain, seqnr, icode
|
||||
std::tuple<char, int, char> MapLabelToAuth(
|
||||
const std::string &asymID, int seqID) const;
|
||||
|
||||
// returns chain,seqnr,comp,iCode
|
||||
std::tuple<std::string, int, std::string, std::string> MapLabelToPDB(
|
||||
const std::string &asymID, int seqID, const std::string &compID,
|
||||
const std::string &authSeqID) const;
|
||||
|
||||
std::tuple<std::string, int, std::string> MapPDBToLabel(
|
||||
const std::string &asymID, int seqID, const std::string &compID, const std::string &iCode) const;
|
||||
|
||||
// Actions
|
||||
void removeAtom(Atom &a);
|
||||
void swapAtoms(Atom &a1, Atom &a2); // swap the labels for these atoms
|
||||
void moveAtom(Atom &a, Point p); // move atom to a new location
|
||||
void changeResidue(const Residue &res, const std::string &newCompound,
|
||||
const std::vector<std::tuple<std::string, std::string>> &remappedAtoms);
|
||||
|
||||
/// \brief Create a new non-polymer entity, returns new ID
|
||||
/// \param mon_id The mon_id for the new nonpoly, must be an existing and known compound from CCD
|
||||
/// \return The ID of the created entity
|
||||
std::string createNonPolyEntity(const std::string &mon_id);
|
||||
|
||||
/// \brief Create a new NonPolymer struct_asym with atoms constructed from \a atoms, returns asym_id.
|
||||
/// This method assumes you are copying data from one cif file to another.
|
||||
///
|
||||
/// \param entity_id The entity ID of the new nonpoly
|
||||
/// \param atoms The array of atom_site rows containing the data.
|
||||
/// \return The newly create asym ID
|
||||
std::string createNonpoly(const std::string &entity_id, const std::vector<mmcif::Atom> &atoms);
|
||||
|
||||
/// \brief To sort the atoms in order of model > asym-id > res-id > atom-id
|
||||
/// Will asssign new atom_id's to all atoms. Be carefull
|
||||
void sortAtoms();
|
||||
|
||||
/// \brief Translate the coordinates of all atoms in the structure by \a t
|
||||
void translate(Point t);
|
||||
|
||||
/// \brief Rotate the coordinates of all atoms in the structure by \a q
|
||||
void rotate(Quaternion t);
|
||||
|
||||
const std::vector<Residue> &getNonPolymers() const { return mNonPolymers; }
|
||||
const std::vector<Residue> &getBranchResidues() const { return mBranchResidues; }
|
||||
|
||||
void cleanupEmptyCategories();
|
||||
|
||||
private:
|
||||
friend Polymer;
|
||||
friend Residue;
|
||||
// friend residue_view;
|
||||
// friend residue_iterator;
|
||||
|
||||
cif::Category &category(const char *name) const;
|
||||
cif::Datablock &datablock() const;
|
||||
|
||||
std::string insertCompound(const std::string &compoundID, bool isEntity);
|
||||
|
||||
void loadData();
|
||||
void updateAtomIndex();
|
||||
|
||||
File &mFile;
|
||||
size_t mModelNr;
|
||||
AtomView mAtoms;
|
||||
std::vector<size_t> mAtomIndex;
|
||||
std::list<Polymer> mPolymers;
|
||||
std::vector<Residue> mNonPolymers, mBranchResidues;
|
||||
};
|
||||
|
||||
} // namespace mmcif
|
||||
277
include/cif++/atom_type.hpp
Normal file
277
include/cif++/atom_type.hpp
Normal file
@@ -0,0 +1,277 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
// Lib for working with structures as contained in mmCIF and PDB files
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
enum atom_type : uint8_t
|
||||
{
|
||||
Nn = 0, // Unknown
|
||||
|
||||
H = 1, // Hydrogen
|
||||
He = 2, // Helium
|
||||
|
||||
Li = 3, // Lithium
|
||||
Be = 4, // Beryllium
|
||||
B = 5, // Boron
|
||||
C = 6, // Carbon
|
||||
N = 7, // Nitrogen
|
||||
O = 8, // Oxygen
|
||||
F = 9, // Fluorine
|
||||
Ne = 10, // Neon
|
||||
|
||||
Na = 11, // Sodium
|
||||
Mg = 12, // Magnesium
|
||||
Al = 13, // Aluminium
|
||||
Si = 14, // Silicon
|
||||
P = 15, // Phosphorus
|
||||
S = 16, // Sulfur
|
||||
Cl = 17, // Chlorine
|
||||
Ar = 18, // Argon
|
||||
|
||||
K = 19, // Potassium
|
||||
Ca = 20, // Calcium
|
||||
Sc = 21, // Scandium
|
||||
Ti = 22, // Titanium
|
||||
V = 23, // Vanadium
|
||||
Cr = 24, // Chromium
|
||||
Mn = 25, // Manganese
|
||||
Fe = 26, // Iron
|
||||
Co = 27, // Cobalt
|
||||
Ni = 28, // Nickel
|
||||
Cu = 29, // Copper
|
||||
Zn = 30, // Zinc
|
||||
Ga = 31, // Gallium
|
||||
Ge = 32, // Germanium
|
||||
As = 33, // Arsenic
|
||||
Se = 34, // Selenium
|
||||
Br = 35, // Bromine
|
||||
Kr = 36, // Krypton
|
||||
|
||||
Rb = 37, // Rubidium
|
||||
Sr = 38, // Strontium
|
||||
Y = 39, // Yttrium
|
||||
Zr = 40, // Zirconium
|
||||
Nb = 41, // Niobium
|
||||
Mo = 42, // Molybdenum
|
||||
Tc = 43, // Technetium
|
||||
Ru = 44, // Ruthenium
|
||||
Rh = 45, // Rhodium
|
||||
Pd = 46, // Palladium
|
||||
Ag = 47, // Silver
|
||||
Cd = 48, // Cadmium
|
||||
In = 49, // Indium
|
||||
Sn = 50, // Tin
|
||||
Sb = 51, // Antimony
|
||||
Te = 52, // Tellurium
|
||||
I = 53, // Iodine
|
||||
Xe = 54, // Xenon
|
||||
Cs = 55, // Caesium
|
||||
Ba = 56, // Barium
|
||||
La = 57, // Lanthanum
|
||||
|
||||
Hf = 72, // Hafnium
|
||||
Ta = 73, // Tantalum
|
||||
W = 74, // Tungsten
|
||||
Re = 75, // Rhenium
|
||||
Os = 76, // Osmium
|
||||
Ir = 77, // Iridium
|
||||
Pt = 78, // Platinum
|
||||
Au = 79, // Gold
|
||||
Hg = 80, // Mercury
|
||||
Tl = 81, // Thallium
|
||||
Pb = 82, // Lead
|
||||
Bi = 83, // Bismuth
|
||||
Po = 84, // Polonium
|
||||
At = 85, // Astatine
|
||||
Rn = 86, // Radon
|
||||
Fr = 87, // Francium
|
||||
Ra = 88, // Radium
|
||||
Ac = 89, // Actinium
|
||||
|
||||
Rf = 104, // Rutherfordium
|
||||
Db = 105, // Dubnium
|
||||
Sg = 106, // Seaborgium
|
||||
Bh = 107, // Bohrium
|
||||
Hs = 108, // Hassium
|
||||
Mt = 109, // Meitnerium
|
||||
Ds = 110, // Darmstadtium
|
||||
Rg = 111, // Roentgenium
|
||||
Cn = 112, // Copernicium
|
||||
Nh = 113, // Nihonium
|
||||
Fl = 114, // Flerovium
|
||||
Mc = 115, // Moscovium
|
||||
Lv = 116, // Livermorium
|
||||
Ts = 117, // Tennessine
|
||||
Og = 118, // Oganesson
|
||||
|
||||
Ce = 58, // Cerium
|
||||
Pr = 59, // Praseodymium
|
||||
Nd = 60, // Neodymium
|
||||
Pm = 61, // Promethium
|
||||
Sm = 62, // Samarium
|
||||
Eu = 63, // Europium
|
||||
Gd = 64, // Gadolinium
|
||||
Tb = 65, // Terbium
|
||||
Dy = 66, // Dysprosium
|
||||
Ho = 67, // Holmium
|
||||
Er = 68, // Erbium
|
||||
Tm = 69, // Thulium
|
||||
Yb = 70, // Ytterbium
|
||||
Lu = 71, // Lutetium
|
||||
|
||||
Th = 90, // Thorium
|
||||
Pa = 91, // Protactinium
|
||||
U = 92, // Uranium
|
||||
Np = 93, // Neptunium
|
||||
Pu = 94, // Plutonium
|
||||
Am = 95, // Americium
|
||||
Cm = 96, // Curium
|
||||
Bk = 97, // Berkelium
|
||||
Cf = 98, // Californium
|
||||
Es = 99, // Einsteinium
|
||||
Fm = 100, // Fermium
|
||||
Md = 101, // Mendelevium
|
||||
No = 102, // Nobelium
|
||||
Lr = 103, // Lawrencium
|
||||
|
||||
D = 129, // Deuterium
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// atom_type_info
|
||||
|
||||
enum class radius_type
|
||||
{
|
||||
calculated,
|
||||
empirical,
|
||||
covalent_empirical,
|
||||
|
||||
single_bond,
|
||||
double_bond,
|
||||
triple_bond,
|
||||
|
||||
van_der_waals,
|
||||
|
||||
type_count
|
||||
};
|
||||
|
||||
constexpr size_t kRadiusTypeCount = static_cast<size_t>(radius_type::type_count);
|
||||
|
||||
enum class ionic_radius_type
|
||||
{
|
||||
effective, crystal
|
||||
};
|
||||
|
||||
struct atom_type_info
|
||||
{
|
||||
atom_type type;
|
||||
std::string name;
|
||||
std::string symbol;
|
||||
float weight;
|
||||
bool metal;
|
||||
float radii[kRadiusTypeCount];
|
||||
};
|
||||
|
||||
extern const atom_type_info kKnownAtoms[];
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// AtomTypeTraits
|
||||
|
||||
class atom_type_traits
|
||||
{
|
||||
public:
|
||||
atom_type_traits(atom_type a);
|
||||
atom_type_traits(const std::string &symbol);
|
||||
|
||||
atom_type type() const { return m_info->type; }
|
||||
std::string name() const { return m_info->name; }
|
||||
std::string symbol() const { return m_info->symbol; }
|
||||
float weight() const { return m_info->weight; }
|
||||
|
||||
bool is_metal() const { return m_info->metal; }
|
||||
|
||||
static bool is_element(const std::string &symbol);
|
||||
static bool is_metal(const std::string &symbol);
|
||||
|
||||
float radius(radius_type type = radius_type::single_bond) const
|
||||
{
|
||||
if (type >= radius_type::type_count)
|
||||
throw std::invalid_argument("invalid radius requested");
|
||||
return m_info->radii[static_cast<size_t>(type)] / 100.f;
|
||||
}
|
||||
|
||||
/// \brief Return the radius for a charged version of this atom in a solid crystal
|
||||
///
|
||||
/// \param charge The charge of the ion
|
||||
/// \return The radius of the ion
|
||||
float crystal_ionic_radius(int charge) const;
|
||||
|
||||
/// \brief Return the radius for a charged version of this atom in a non-solid environment
|
||||
///
|
||||
/// \param charge The charge of the ion
|
||||
/// \return The radius of the ion
|
||||
float effective_ionic_radius(int charge) const;
|
||||
|
||||
/// \brief Return the radius for a charged version of this atom, returns the effective radius by default
|
||||
///
|
||||
/// \param charge The charge of the ion
|
||||
/// \return The radius of the ion
|
||||
float ionic_radius(int charge, ionic_radius_type type = ionic_radius_type::effective) const
|
||||
{
|
||||
return type == ionic_radius_type::effective ? effective_ionic_radius(charge) : crystal_ionic_radius(charge);
|
||||
}
|
||||
|
||||
// data type encapsulating the Waasmaier & Kirfel scattering factors
|
||||
// in a simplified form (only a and b).
|
||||
// Added the electrion scattering factors as well
|
||||
struct SFData
|
||||
{
|
||||
double a[6], b[6];
|
||||
};
|
||||
|
||||
// to get the Cval and Siva values, use this constant as charge:
|
||||
enum
|
||||
{
|
||||
kWKSFVal = -99
|
||||
};
|
||||
|
||||
const SFData &wksf(int charge = 0) const;
|
||||
const SFData &elsf() const;
|
||||
|
||||
private:
|
||||
const struct atom_type_info *m_info;
|
||||
};
|
||||
|
||||
} // namespace pdbx
|
||||
587
include/cif++/category.hpp
Normal file
587
include/cif++/category.hpp
Normal file
@@ -0,0 +1,587 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
#include <cif++/forward_decl.hpp>
|
||||
|
||||
#include <cif++/condition.hpp>
|
||||
#include <cif++/iterator.hpp>
|
||||
#include <cif++/row.hpp>
|
||||
#include <cif++/validate.hpp>
|
||||
|
||||
// TODO: implement all of:
|
||||
// https://en.cppreference.com/w/cpp/named_req/Container
|
||||
// https://en.cppreference.com/w/cpp/named_req/SequenceContainer
|
||||
// and more?
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class category
|
||||
{
|
||||
public:
|
||||
friend class row_handle;
|
||||
|
||||
template <typename, typename...>
|
||||
friend class iterator_impl;
|
||||
|
||||
using value_type = row_handle;
|
||||
using reference = value_type;
|
||||
using const_reference = const value_type;
|
||||
using iterator = iterator_impl<category>;
|
||||
using const_iterator = iterator_impl<const category>;
|
||||
|
||||
category() = default;
|
||||
|
||||
category(std::string_view name);
|
||||
|
||||
category(const category &rhs);
|
||||
|
||||
category(category &&rhs);
|
||||
|
||||
category &operator=(const category &rhs);
|
||||
|
||||
category &operator=(category &&rhs);
|
||||
|
||||
~category();
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
const std::string &name() const { return m_name; }
|
||||
|
||||
iset fields() const;
|
||||
|
||||
std::set<uint16_t> key_field_indices() const;
|
||||
|
||||
void set_validator(const validator *v, datablock &db);
|
||||
void update_links(datablock &db);
|
||||
|
||||
const validator *get_validator() const { return m_validator; }
|
||||
const category_validator *get_cat_validator() const { return m_cat_validator; }
|
||||
|
||||
bool is_valid() const;
|
||||
bool validate_links() const;
|
||||
|
||||
bool operator==(const category &rhs) const;
|
||||
bool operator!=(const category &rhs) const
|
||||
{
|
||||
return not operator==(rhs);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
reference front()
|
||||
{
|
||||
return {*this, *m_head};
|
||||
}
|
||||
|
||||
const_reference front() const
|
||||
{
|
||||
return {const_cast<category &>(*this), const_cast<row &>(*m_head)};
|
||||
}
|
||||
|
||||
reference back()
|
||||
{
|
||||
return {*this, *m_tail};
|
||||
}
|
||||
|
||||
const_reference back() const
|
||||
{
|
||||
return {const_cast<category &>(*this), const_cast<row &>(*m_tail)};
|
||||
}
|
||||
|
||||
iterator begin()
|
||||
{
|
||||
return {*this, m_head};
|
||||
}
|
||||
|
||||
iterator end()
|
||||
{
|
||||
return {*this, nullptr};
|
||||
}
|
||||
|
||||
const_iterator begin() const
|
||||
{
|
||||
return {*this, m_head};
|
||||
}
|
||||
|
||||
const_iterator end() const
|
||||
{
|
||||
return {*this, nullptr};
|
||||
}
|
||||
|
||||
const_iterator cbegin() const
|
||||
{
|
||||
return {*this, m_head};
|
||||
}
|
||||
|
||||
const_iterator cend() const
|
||||
{
|
||||
return {*this, nullptr};
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
{
|
||||
return std::distance(cbegin(), cend());
|
||||
}
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
return m_head == nullptr;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// A category can have a key, as defined by the validator/dictionary
|
||||
|
||||
/// @brief The key type
|
||||
using key_type = row_initializer;
|
||||
|
||||
/// @brief Return a row_handle for the row specified by \a key
|
||||
/// @param key The value for the key, fields specified in the dictionary should have a value
|
||||
/// @return The row found in the index, or an undefined row_handle
|
||||
row_handle operator[](const key_type &key);
|
||||
|
||||
const row_handle operator[](const key_type &key) const
|
||||
{
|
||||
return const_cast<category *>(this)->operator[](key);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
template <typename... Ts, typename... Ns>
|
||||
iterator_proxy<const category, Ts...> rows(Ns... names) const
|
||||
{
|
||||
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
|
||||
return iterator_proxy<const category, Ts...>(*this, begin(), {names...});
|
||||
}
|
||||
|
||||
template <typename... Ts, typename... Ns>
|
||||
iterator_proxy<category, Ts...> rows(Ns... names)
|
||||
{
|
||||
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
|
||||
return iterator_proxy<category, Ts...>(*this, begin(), {names...});
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
conditional_iterator_proxy<category> find(condition &&cond)
|
||||
{
|
||||
return find(begin(), std::forward<condition>(cond));
|
||||
}
|
||||
|
||||
conditional_iterator_proxy<category> find(iterator pos, condition &&cond)
|
||||
{
|
||||
return {*this, pos, std::forward<condition>(cond)};
|
||||
}
|
||||
|
||||
conditional_iterator_proxy<const category> find(condition &&cond) const
|
||||
{
|
||||
return find(cbegin(), std::forward<condition>(cond));
|
||||
}
|
||||
|
||||
conditional_iterator_proxy<const category> find(const_iterator pos, condition &&cond) const
|
||||
{
|
||||
return conditional_iterator_proxy<const category>{*this, pos, std::forward<condition>(cond)};
|
||||
}
|
||||
|
||||
template <typename... Ts, typename... Ns>
|
||||
conditional_iterator_proxy<category, Ts...> find(condition &&cond, Ns... names)
|
||||
{
|
||||
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
|
||||
return find<Ts...>(cbegin(), std::forward<condition>(cond), std::forward<Ns>(names)...);
|
||||
}
|
||||
|
||||
template <typename... Ts, typename... Ns>
|
||||
conditional_iterator_proxy<const category, Ts...> find(condition &&cond, Ns... names) const
|
||||
{
|
||||
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
|
||||
return find<Ts...>(cbegin(), std::forward<condition>(cond), std::forward<Ns>(names)...);
|
||||
}
|
||||
|
||||
template <typename... Ts, typename... Ns>
|
||||
conditional_iterator_proxy<category, Ts...> find(const_iterator pos, condition &&cond, Ns... names)
|
||||
{
|
||||
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
|
||||
return {*this, pos, std::forward<condition>(cond), std::forward<Ns>(names)...};
|
||||
}
|
||||
|
||||
template <typename... Ts, typename... Ns>
|
||||
conditional_iterator_proxy<const category, Ts...> find(const_iterator pos, condition &&cond, Ns... names) const
|
||||
{
|
||||
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
|
||||
return {*this, pos, std::forward<condition>(cond), std::forward<Ns>(names)...};
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// if you only expect a single row
|
||||
|
||||
row_handle find1(condition &&cond)
|
||||
{
|
||||
return find1(begin(), std::forward<condition>(cond));
|
||||
}
|
||||
|
||||
row_handle find1(iterator pos, condition &&cond)
|
||||
{
|
||||
auto h = find(pos, std::forward<condition>(cond));
|
||||
|
||||
return h.size() != 1 ? row_handle{} : *h.begin();
|
||||
}
|
||||
|
||||
const row_handle find1(condition &&cond) const
|
||||
{
|
||||
return find1(cbegin(), std::forward<condition>(cond));
|
||||
}
|
||||
|
||||
const row_handle find1(const_iterator pos, condition &&cond) const
|
||||
{
|
||||
auto h = find(pos, std::forward<condition>(cond));
|
||||
|
||||
return h.size() != 1 ? row_handle{} : *h.begin();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T find1(condition &&cond, const char *column) const
|
||||
{
|
||||
return find1<T>(cbegin(), std::forward<condition>(cond), column);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T find1(const_iterator pos, condition &&cond, const char *column) const
|
||||
{
|
||||
auto h = find<T>(pos, std::forward<condition>(cond), column);
|
||||
|
||||
return h.size() == 1 ? *h.begin() : T{};
|
||||
}
|
||||
|
||||
template <typename... Ts, typename... Cs, typename U = std::enable_if_t<sizeof...(Ts) != 1>>
|
||||
std::tuple<Ts...> find1(condition &&cond, Cs... columns) const
|
||||
{
|
||||
static_assert(sizeof...(Ts) == sizeof...(Cs), "The number of column titles should be equal to the number of types to return");
|
||||
// static_assert(std::is_same_v<Cs, const char*>..., "The column names should be const char");
|
||||
return find1<Ts...>(cbegin(), std::forward<condition>(cond), std::forward<Cs>(columns)...);
|
||||
}
|
||||
|
||||
template <typename... Ts, typename... Cs, typename U = std::enable_if_t<sizeof...(Ts) != 1>>
|
||||
std::tuple<Ts...> find1(const_iterator pos, condition &&cond, Cs... columns) const
|
||||
{
|
||||
static_assert(sizeof...(Ts) == sizeof...(Cs), "The number of column titles should be equal to the number of types to return");
|
||||
auto h = find<Ts...>(pos, std::forward<condition>(cond), std::forward<Cs>(columns)...);
|
||||
|
||||
return h.size() == 1 ? *h.begin() : std::tuple<Ts...>{};
|
||||
}
|
||||
|
||||
bool exists(condition &&cond) const
|
||||
{
|
||||
bool result = false;
|
||||
|
||||
if (cond)
|
||||
{
|
||||
cond.prepare(*this);
|
||||
|
||||
auto sh = cond.single();
|
||||
|
||||
if (sh.has_value() and *sh)
|
||||
result = true;
|
||||
else
|
||||
{
|
||||
for (auto r : *this)
|
||||
{
|
||||
if (cond(r))
|
||||
{
|
||||
result = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
bool has_children(row_handle r) const;
|
||||
bool has_parents(row_handle r) const;
|
||||
|
||||
std::vector<row_handle> get_children(row_handle r, const category &childCat) const;
|
||||
std::vector<row_handle> get_parents(row_handle r, const category &parentCat) const;
|
||||
std::vector<row_handle> get_linked(row_handle r, const category &cat) const;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
// void insert(const_iterator pos, const row_initializer &row)
|
||||
// {
|
||||
// insert_impl(pos, row);
|
||||
// }
|
||||
|
||||
// void insert(const_iterator pos, row_initializer &&row)
|
||||
// {
|
||||
// insert_impl(pos, std::move(row));
|
||||
// }
|
||||
|
||||
iterator erase(iterator pos);
|
||||
void erase(row_handle rh)
|
||||
{
|
||||
erase(iterator(*this, rh.m_row));
|
||||
}
|
||||
|
||||
size_t erase(condition &&cond);
|
||||
size_t erase(condition &&cond, std::function<void(row_handle)> &&visit);
|
||||
|
||||
iterator emplace(row_initializer &&ri)
|
||||
{
|
||||
return this->emplace(ri.begin(), ri.end());
|
||||
}
|
||||
|
||||
template <typename ItemIter>
|
||||
iterator emplace(ItemIter b, ItemIter e)
|
||||
{
|
||||
row *r = this->create_row();
|
||||
|
||||
try
|
||||
{
|
||||
for (auto i = b; i != e; ++i)
|
||||
{
|
||||
// item_value *new_item = this->create_item(*i);
|
||||
r->append(add_column(i->name()), { i->value() });
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
if (r != nullptr)
|
||||
this->delete_row(r);
|
||||
throw;
|
||||
}
|
||||
|
||||
return insert_impl(cend(), r);
|
||||
}
|
||||
|
||||
void clear();
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
/// \brief generate a new, unique ID. Pass it an ID generating function
|
||||
/// based on a sequence number. This function will be called until the
|
||||
/// result is unique in the context of this category
|
||||
std::string get_unique_id(std::function<std::string(int)> generator = cif::cif_id_for_number);
|
||||
std::string get_unique_id(const std::string &prefix)
|
||||
{
|
||||
return get_unique_id([prefix](int nr)
|
||||
{ return prefix + std::to_string(nr + 1); });
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
/// \brief Rename a single column in the rows that match \a cond to value \a value
|
||||
/// making sure the linked categories are updated according to the link.
|
||||
/// That means, child categories are updated if the links are absolute
|
||||
/// and unique. If they are not, the child category rows are split.
|
||||
|
||||
void update_value(condition &&cond, std::string_view tag, std::string_view value)
|
||||
{
|
||||
auto rs = find(std::move(cond));
|
||||
std::vector<row_handle> rows;
|
||||
std::copy(rs.begin(), rs.end(), std::back_inserter(rows));
|
||||
update_value(rows, tag, value);
|
||||
}
|
||||
|
||||
void update_value(const std::vector<row_handle> &rows, std::string_view tag, std::string_view value);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
/// \brief Return the index number for \a column_name
|
||||
|
||||
uint16_t get_column_ix(std::string_view column_name) const
|
||||
{
|
||||
uint16_t result;
|
||||
|
||||
for (result = 0; result < m_columns.size(); ++result)
|
||||
{
|
||||
if (iequals(column_name, m_columns[result].m_name))
|
||||
break;
|
||||
}
|
||||
|
||||
if (VERBOSE > 0 and result == m_columns.size() and m_cat_validator != nullptr) // validate the name, if it is known at all (since it was not found)
|
||||
{
|
||||
auto iv = m_cat_validator->get_validator_for_item(column_name);
|
||||
if (iv == nullptr)
|
||||
std::cerr << "Invalid name used '" << column_name << "' is not a known column in " + m_name << std::endl;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string_view get_column_name(uint16_t ix) const
|
||||
{
|
||||
if (ix >= m_columns.size())
|
||||
throw std::out_of_range("column index is out of range");
|
||||
|
||||
return m_columns[ix].m_name;
|
||||
}
|
||||
|
||||
uint16_t add_column(std::string_view column_name)
|
||||
{
|
||||
using namespace std::literals;
|
||||
|
||||
size_t result = get_column_ix(column_name);
|
||||
|
||||
if (result == m_columns.size())
|
||||
{
|
||||
const item_validator *item_validator = nullptr;
|
||||
|
||||
if (m_cat_validator != nullptr)
|
||||
{
|
||||
item_validator = m_cat_validator->get_validator_for_item(column_name);
|
||||
if (item_validator == nullptr)
|
||||
m_validator->report_error("tag " + std::string(column_name) + " not allowed in category " + m_name, false);
|
||||
}
|
||||
|
||||
m_columns.emplace_back(column_name, item_validator);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool has_column(std::string_view name) const
|
||||
{
|
||||
return get_column_ix(name) < m_columns.size();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void sort(std::function<int(row_handle,row_handle)> f);
|
||||
void reorder_by_index();
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::vector<std::string> get_tag_order() const;
|
||||
|
||||
void write(std::ostream &os) const;
|
||||
void write(std::ostream &os, const std::vector<std::string> &order, bool addMissingColumns = true);
|
||||
|
||||
private:
|
||||
void write(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyColumns) const;
|
||||
|
||||
public:
|
||||
friend std::ostream &operator<<(std::ostream &os, const category &cat)
|
||||
{
|
||||
cat.write(os);
|
||||
return os;
|
||||
}
|
||||
|
||||
private:
|
||||
void update_value(row *row, size_t column, std::string_view value, bool updateLinked, bool validate = true);
|
||||
|
||||
private:
|
||||
void erase_orphans(condition &&cond, category &parent);
|
||||
|
||||
using allocator_type = std::allocator<void>;
|
||||
|
||||
constexpr allocator_type get_allocator() const
|
||||
{
|
||||
return {};
|
||||
}
|
||||
|
||||
using char_allocator_type = typename std::allocator_traits<allocator_type>::template rebind_alloc<char>;
|
||||
using char_allocator_traits = std::allocator_traits<char_allocator_type>;
|
||||
|
||||
using row_allocator_type = typename std::allocator_traits<allocator_type>::template rebind_alloc<row>;
|
||||
using row_allocator_traits = std::allocator_traits<row_allocator_type>;
|
||||
|
||||
row_allocator_traits::pointer get_row()
|
||||
{
|
||||
row_allocator_type ra(get_allocator());
|
||||
return row_allocator_traits::allocate(ra, 1);
|
||||
}
|
||||
|
||||
row *create_row()
|
||||
{
|
||||
auto p = this->get_row();
|
||||
row_allocator_type ra(get_allocator());
|
||||
row_allocator_traits::construct(ra, p);
|
||||
return p;
|
||||
}
|
||||
|
||||
row *clone_row(const row &r);
|
||||
|
||||
void delete_row(row *r);
|
||||
|
||||
row_handle create_copy(row_handle r);
|
||||
|
||||
struct item_column
|
||||
{
|
||||
std::string m_name;
|
||||
const item_validator *m_validator;
|
||||
|
||||
item_column(std::string_view name, const item_validator *validator)
|
||||
: m_name(name)
|
||||
, m_validator(validator)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
struct link
|
||||
{
|
||||
link(category *linked, const link_validator *v)
|
||||
: linked(linked)
|
||||
, v(v)
|
||||
{
|
||||
}
|
||||
|
||||
category *linked;
|
||||
const link_validator *v;
|
||||
};
|
||||
|
||||
// proxy methods for every insertion
|
||||
iterator insert_impl(const_iterator pos, row *n);
|
||||
iterator erase_impl(const_iterator pos);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
condition get_parents_condition(row_handle rh, const category &parentCat) const;
|
||||
condition get_children_condition(row_handle rh, const category &childCat) const;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void swap_item(size_t column_ix, row_handle &a, row_handle &b);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::string m_name;
|
||||
std::vector<item_column> m_columns;
|
||||
const validator *m_validator = nullptr;
|
||||
const category_validator *m_cat_validator = nullptr;
|
||||
std::vector<link> m_parent_links, m_child_links;
|
||||
bool m_cascade = true;
|
||||
uint32_t m_last_unique_num = 0;
|
||||
class category_index *m_index = nullptr;
|
||||
row *m_head = nullptr, *m_tail = nullptr;
|
||||
};
|
||||
|
||||
} // namespace cif
|
||||
@@ -1,17 +1,17 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2020-2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
@@ -26,7 +26,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
/// \file This file contains the definition for the class Compound, encapsulating
|
||||
/// \file This file contains the definition for the class compound, encapsulating
|
||||
/// the information found for compounds in the CCD.
|
||||
|
||||
#include <map>
|
||||
@@ -34,20 +34,20 @@
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "cif++/AtomType.hpp"
|
||||
#include "cif++/Cif++.hpp"
|
||||
#include <cif++.hpp>
|
||||
#include <cif++/atom_type.hpp>
|
||||
|
||||
namespace mmcif
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class Compound;
|
||||
struct CompoundAtom;
|
||||
class CompoundFactoryImpl;
|
||||
class compound;
|
||||
struct compound_atom;
|
||||
class compound_factory_impl;
|
||||
|
||||
/// \brief The bond type as defined in the CCD, possible values taken from the mmcif_pdbx_v50 file
|
||||
enum class BondType
|
||||
/// \brief The bond type as defined in the CCD, possible values taken from the mmcif_pdbx file
|
||||
enum class bond_type
|
||||
{
|
||||
sing, // 'single bond'
|
||||
doub, // 'double bond'
|
||||
@@ -59,32 +59,32 @@ enum class BondType
|
||||
pi, // 'pi bond'
|
||||
};
|
||||
|
||||
std::string to_string(BondType bondType);
|
||||
BondType from_string(const std::string& bondType);
|
||||
std::string to_string(bond_type bondType);
|
||||
bond_type from_string(const std::string &bondType);
|
||||
|
||||
/// --------------------------------------------------------------------
|
||||
/// \brief struct containing information about an atom in a chemical compound.
|
||||
/// This is a subset of the available information. Contact the author if you need more fields.
|
||||
|
||||
struct CompoundAtom
|
||||
struct compound_atom
|
||||
{
|
||||
std::string id;
|
||||
AtomType typeSymbol;
|
||||
atom_type type_symbol;
|
||||
int charge = 0;
|
||||
bool aromatic = false;
|
||||
bool leavingAtom = false;
|
||||
bool stereoConfig = false;
|
||||
bool leaving_atom = false;
|
||||
bool stereo_config = false;
|
||||
float x, y, z;
|
||||
};
|
||||
|
||||
/// --------------------------------------------------------------------
|
||||
/// \brief struct containing information about the bonds
|
||||
|
||||
struct CompoundBond
|
||||
struct compound_bond
|
||||
{
|
||||
std::string atomID[2];
|
||||
BondType type;
|
||||
bool aromatic = false, stereoConfig = false;
|
||||
std::string atom_id[2];
|
||||
bond_type type;
|
||||
bool aromatic = false, stereo_config = false;
|
||||
};
|
||||
|
||||
/// --------------------------------------------------------------------
|
||||
@@ -95,62 +95,56 @@ struct CompoundBond
|
||||
/// compound definitions by calling the addExtraComponents function and
|
||||
/// pass it a valid CCD formatted file.
|
||||
|
||||
class Compound
|
||||
class compound
|
||||
{
|
||||
public:
|
||||
|
||||
// accessors
|
||||
|
||||
std::string id() const { return mID; }
|
||||
std::string name() const { return mName; }
|
||||
std::string type() const { return mType; }
|
||||
std::string formula() const { return mFormula; }
|
||||
float formulaWeight() const { return mFormulaWeight; }
|
||||
int formalCharge() const { return mFormalCharge; }
|
||||
std::string id() const { return m_id; }
|
||||
std::string name() const { return m_name; }
|
||||
std::string type() const { return m_type; }
|
||||
std::string group() const { return m_group; }
|
||||
std::string formula() const { return m_formula; }
|
||||
float formula_weight() const { return m_formula_weight; }
|
||||
int formal_charge() const { return m_formal_charge; }
|
||||
|
||||
const std::vector<CompoundAtom> &atoms() const { return mAtoms; }
|
||||
const std::vector<CompoundBond> &bonds() const { return mBonds; }
|
||||
const std::vector<compound_atom> &atoms() const { return m_atoms; }
|
||||
const std::vector<compound_bond> &bonds() const { return m_bonds; }
|
||||
|
||||
CompoundAtom getAtomByID(const std::string &atomID) const;
|
||||
compound_atom get_atom_by_atom_id(const std::string &atom_id) const;
|
||||
|
||||
bool atomsBonded(const std::string &atomId_1, const std::string &atomId_2) const;
|
||||
// float atomBondValue(const std::string &atomId_1, const std::string &atomId_2) const;
|
||||
// float bondAngle(const std::string &atomId_1, const std::string &atomId_2, const std::string &atomId_3) const;
|
||||
// float chiralVolume(const std::string ¢reID) const;
|
||||
bool atoms_bonded(const std::string &atomId_1, const std::string &atomId_2) const;
|
||||
|
||||
bool isWater() const
|
||||
bool is_water() const
|
||||
{
|
||||
return mID == "HOH" or mID == "H2O" or mID == "WAT";
|
||||
return m_id == "HOH" or m_id == "H2O" or m_id == "WAT";
|
||||
}
|
||||
|
||||
private:
|
||||
friend class compound_factory_impl;
|
||||
friend class CCD_compound_factory_impl;
|
||||
friend class CCP4_compound_factory_impl;
|
||||
|
||||
friend class CompoundFactoryImpl;
|
||||
friend class CCDCompoundFactoryImpl;
|
||||
friend class CCP4CompoundFactoryImpl;
|
||||
compound(cif::datablock &db);
|
||||
compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group);
|
||||
|
||||
Compound(cif::Datablock &db);
|
||||
Compound(cif::Datablock &db, const std::string &id, const std::string &name, const std::string &type);
|
||||
|
||||
std::string mID;
|
||||
std::string mName;
|
||||
std::string mType;
|
||||
std::string mFormula;
|
||||
float mFormulaWeight = 0;
|
||||
int mFormalCharge = 0;
|
||||
std::vector<CompoundAtom> mAtoms;
|
||||
std::vector<CompoundBond> mBonds;
|
||||
std::string m_id;
|
||||
std::string m_name;
|
||||
std::string m_type;
|
||||
std::string m_group;
|
||||
std::string m_formula;
|
||||
float m_formula_weight = 0;
|
||||
int m_formal_charge = 0;
|
||||
std::vector<compound_atom> m_atoms;
|
||||
std::vector<compound_bond> m_bonds;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Factory class for Compound and Link objects
|
||||
// Factory class for compound and Link objects
|
||||
|
||||
CIFPP_EXPORT extern const std::map<std::string, char> kAAMap, kBaseMap;
|
||||
|
||||
class CompoundFactory
|
||||
class compound_factory
|
||||
{
|
||||
public:
|
||||
|
||||
/// \brief Initialise a singleton instance.
|
||||
///
|
||||
/// If you have a multithreaded application and want to have different
|
||||
@@ -159,37 +153,39 @@ class CompoundFactory
|
||||
/// flag to true.
|
||||
|
||||
static void init(bool useThreadLocalInstanceOnly);
|
||||
static CompoundFactory &instance();
|
||||
static compound_factory &instance();
|
||||
static void clear();
|
||||
|
||||
void setDefaultDictionary(const std::filesystem::path &inDictFile);
|
||||
void pushDictionary(const std::filesystem::path &inDictFile);
|
||||
void popDictionary();
|
||||
void set_default_dictionary(const std::filesystem::path &inDictFile);
|
||||
void push_dictionary(const std::filesystem::path &inDictFile);
|
||||
void pop_dictionary();
|
||||
|
||||
bool isKnownPeptide(const std::string &res_name) const;
|
||||
bool isKnownBase(const std::string &res_name) const;
|
||||
bool is_known_peptide(const std::string &res_name) const;
|
||||
bool is_known_base(const std::string &res_name) const;
|
||||
|
||||
/// \brief Create the Compound object for \a id
|
||||
/// \brief Create the compound object for \a id
|
||||
///
|
||||
/// This will create the Compound instance for \a id if it doesn't exist already.
|
||||
/// This will create the compound instance for \a id if it doesn't exist already.
|
||||
/// The result is owned by this factory and should not be deleted by the user.
|
||||
/// \param id The Compound ID, a three letter code usually
|
||||
/// \param id The compound ID, a three letter code usually
|
||||
/// \result The compound, or nullptr if it could not be created (missing info)
|
||||
const Compound *create(std::string id);
|
||||
const compound *create(std::string id);
|
||||
|
||||
~CompoundFactory();
|
||||
~compound_factory();
|
||||
|
||||
static const std::map<std::string, char> kAAMap, kBaseMap;
|
||||
|
||||
private:
|
||||
CompoundFactory();
|
||||
compound_factory();
|
||||
|
||||
CompoundFactory(const CompoundFactory &) = delete;
|
||||
CompoundFactory &operator=(const CompoundFactory &) = delete;
|
||||
compound_factory(const compound_factory &) = delete;
|
||||
compound_factory &operator=(const compound_factory &) = delete;
|
||||
|
||||
static std::unique_ptr<CompoundFactory> sInstance;
|
||||
static thread_local std::unique_ptr<CompoundFactory> tlInstance;
|
||||
static bool sUseThreadLocalInstance;
|
||||
static std::unique_ptr<compound_factory> s_instance;
|
||||
static thread_local std::unique_ptr<compound_factory> tl_instance;
|
||||
static bool s_use_thread_local_instance;
|
||||
|
||||
std::shared_ptr<CompoundFactoryImpl> mImpl;
|
||||
std::shared_ptr<compound_factory_impl> m_impl;
|
||||
};
|
||||
|
||||
} // namespace mmcif
|
||||
} // namespace pdbx
|
||||
745
include/cif++/condition.hpp
Normal file
745
include/cif++/condition.hpp
Normal file
@@ -0,0 +1,745 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <functional>
|
||||
#include <iostream>
|
||||
#include <regex>
|
||||
#include <utility>
|
||||
|
||||
#include <cif++/row.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// let's make life easier
|
||||
|
||||
iset get_category_fields(const category &cat);
|
||||
uint16_t get_column_ix(const category &cat, std::string_view col);
|
||||
bool is_column_type_uchar(const category &cat, std::string_view col);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// some more templates to be able to do querying
|
||||
|
||||
namespace detail
|
||||
{
|
||||
struct condition_impl
|
||||
{
|
||||
virtual ~condition_impl() {}
|
||||
|
||||
virtual condition_impl *prepare(const category &c) { return this; }
|
||||
virtual bool test(row_handle r) const = 0;
|
||||
virtual void str(std::ostream &os) const = 0;
|
||||
virtual std::optional<row_handle> single() const { return {}; };
|
||||
};
|
||||
|
||||
struct all_condition_impl : public condition_impl
|
||||
{
|
||||
bool test(row_handle r) const override { return true; }
|
||||
void str(std::ostream &os) const override { os << "*"; }
|
||||
};
|
||||
|
||||
struct or_condition_impl;
|
||||
struct and_condition_impl;
|
||||
struct not_condition_impl;
|
||||
} // namespace detail
|
||||
|
||||
class condition
|
||||
{
|
||||
public:
|
||||
using condition_impl = detail::condition_impl;
|
||||
|
||||
condition()
|
||||
: m_impl(nullptr)
|
||||
{
|
||||
}
|
||||
|
||||
explicit condition(condition_impl *impl)
|
||||
: m_impl(impl)
|
||||
{
|
||||
}
|
||||
|
||||
condition(const condition &) = delete;
|
||||
|
||||
condition(condition &&rhs) noexcept
|
||||
: m_impl(nullptr)
|
||||
{
|
||||
std::swap(m_impl, rhs.m_impl);
|
||||
}
|
||||
|
||||
condition &operator=(const condition &) = delete;
|
||||
|
||||
condition &operator=(condition &&rhs) noexcept
|
||||
{
|
||||
std::swap(m_impl, rhs.m_impl);
|
||||
return *this;
|
||||
}
|
||||
|
||||
~condition()
|
||||
{
|
||||
delete m_impl;
|
||||
m_impl = nullptr;
|
||||
}
|
||||
|
||||
void prepare(const category &c);
|
||||
|
||||
bool operator()(row_handle r) const
|
||||
{
|
||||
assert(this->m_impl != nullptr);
|
||||
assert(this->m_prepared);
|
||||
return m_impl ? m_impl->test(r) : false;
|
||||
}
|
||||
|
||||
explicit operator bool() { return not empty(); }
|
||||
bool empty() const { return m_impl == nullptr; }
|
||||
|
||||
std::optional<row_handle> single() const
|
||||
{
|
||||
return m_impl ? m_impl->single() : std::optional<row_handle>();
|
||||
}
|
||||
|
||||
friend condition operator||(condition &&a, condition &&b);
|
||||
friend condition operator&&(condition &&a, condition &&b);
|
||||
|
||||
friend struct detail::or_condition_impl;
|
||||
friend struct detail::and_condition_impl;
|
||||
friend struct detail::not_condition_impl;
|
||||
|
||||
void swap(condition &rhs)
|
||||
{
|
||||
std::swap(m_impl, rhs.m_impl);
|
||||
std::swap(m_prepared, rhs.m_prepared);
|
||||
}
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const condition &cond)
|
||||
{
|
||||
if (cond.m_impl)
|
||||
cond.m_impl->str(os);
|
||||
return os;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void optimise(condition_impl *&impl);
|
||||
|
||||
condition_impl *m_impl;
|
||||
bool m_prepared = false;
|
||||
};
|
||||
|
||||
namespace detail
|
||||
{
|
||||
struct key_is_empty_condition_impl : public condition_impl
|
||||
{
|
||||
key_is_empty_condition_impl(const std::string &item_tag)
|
||||
: m_item_tag(item_tag)
|
||||
{
|
||||
}
|
||||
|
||||
condition_impl *prepare(const category &c) override
|
||||
{
|
||||
m_item_ix = get_column_ix(c, m_item_tag);
|
||||
return this;
|
||||
}
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
return r[m_item_ix].empty();
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << m_item_tag << " IS NULL";
|
||||
}
|
||||
|
||||
std::string m_item_tag;
|
||||
size_t m_item_ix = 0;
|
||||
};
|
||||
|
||||
struct key_equals_condition_impl : public condition_impl
|
||||
{
|
||||
key_equals_condition_impl(item &&i)
|
||||
: m_item_tag(i.name())
|
||||
, m_value(i.value())
|
||||
{
|
||||
}
|
||||
|
||||
condition_impl *prepare(const category &c) override;
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
return m_single_hit.has_value() ?
|
||||
*m_single_hit == r :
|
||||
r[m_item_ix].compare(m_value, m_icase) == 0;
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << m_item_tag << (m_icase ? "^ " : " ") << " == " << m_value;
|
||||
}
|
||||
|
||||
virtual std::optional<row_handle> single() const override
|
||||
{
|
||||
return m_single_hit;
|
||||
}
|
||||
|
||||
std::string m_item_tag;
|
||||
size_t m_item_ix = 0;
|
||||
bool m_icase = false;
|
||||
std::string m_value;
|
||||
std::optional<row_handle> m_single_hit;
|
||||
};
|
||||
|
||||
struct key_equals_or_empty_condition_impl : public condition_impl
|
||||
{
|
||||
key_equals_or_empty_condition_impl(key_equals_condition_impl *equals, key_is_empty_condition_impl *empty)
|
||||
: m_item_tag(equals->m_item_tag)
|
||||
, m_value(equals->m_value)
|
||||
, m_icase(equals->m_icase)
|
||||
, m_single_hit(equals->m_single_hit)
|
||||
{
|
||||
assert(empty->m_item_ix == equals->m_item_ix);
|
||||
}
|
||||
|
||||
condition_impl *prepare(const category &c) override
|
||||
{
|
||||
m_item_ix = get_column_ix(c, m_item_tag);
|
||||
m_icase = is_column_type_uchar(c, m_item_tag);
|
||||
return this;
|
||||
}
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
bool result = false;
|
||||
if (m_single_hit.has_value())
|
||||
result = *m_single_hit == r;
|
||||
else
|
||||
result = r[m_item_ix].empty() or r[m_item_ix].compare(m_value, m_icase) == 0;
|
||||
return result;
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << m_item_tag << (m_icase ? "^ " : " ") << " == " << m_value << " OR " << m_item_tag << " IS NULL";
|
||||
}
|
||||
|
||||
virtual std::optional<row_handle> single() const override
|
||||
{
|
||||
return m_single_hit;
|
||||
}
|
||||
|
||||
std::string m_item_tag;
|
||||
size_t m_item_ix = 0;
|
||||
std::string m_value;
|
||||
bool m_icase = false;
|
||||
std::optional<row_handle> m_single_hit;
|
||||
};
|
||||
|
||||
struct key_compare_condition_impl : public condition_impl
|
||||
{
|
||||
template <typename COMP>
|
||||
key_compare_condition_impl(const std::string &item_tag, COMP &&comp, const std::string &s)
|
||||
: m_item_tag(item_tag)
|
||||
, m_compare(std::move(comp))
|
||||
, m_str(s)
|
||||
{
|
||||
}
|
||||
|
||||
condition_impl *prepare(const category &c) override
|
||||
{
|
||||
m_item_ix = get_column_ix(c, m_item_tag);
|
||||
m_icase = is_column_type_uchar(c, m_item_tag);
|
||||
return this;
|
||||
}
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
return m_compare(r, m_icase);
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << m_item_tag << (m_icase ? "^ " : " ") << m_str;
|
||||
}
|
||||
|
||||
std::string m_item_tag;
|
||||
size_t m_item_ix = 0;
|
||||
bool m_icase = false;
|
||||
std::function<bool(row_handle, bool)> m_compare;
|
||||
std::string m_str;
|
||||
};
|
||||
|
||||
struct key_matches_condition_impl : public condition_impl
|
||||
{
|
||||
key_matches_condition_impl(const std::string &item_tag, const std::regex &rx)
|
||||
: m_item_tag(item_tag)
|
||||
, m_item_ix(0)
|
||||
, mRx(rx)
|
||||
{
|
||||
}
|
||||
|
||||
condition_impl *prepare(const category &c) override
|
||||
{
|
||||
m_item_ix = get_column_ix(c, m_item_tag);
|
||||
return this;
|
||||
}
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
std::string_view txt = r[m_item_ix].text();
|
||||
return std::regex_match(txt.begin(), txt.end(), mRx);
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << m_item_tag << " =~ expression";
|
||||
}
|
||||
|
||||
std::string m_item_tag;
|
||||
size_t m_item_ix;
|
||||
std::regex mRx;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct any_is_condition_impl : public condition_impl
|
||||
{
|
||||
typedef T valueType;
|
||||
|
||||
any_is_condition_impl(const valueType &value)
|
||||
: mValue(value)
|
||||
{
|
||||
}
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
auto &c = r.get_category();
|
||||
|
||||
bool result = false;
|
||||
for (auto &f : get_category_fields(c))
|
||||
{
|
||||
try
|
||||
{
|
||||
if (r[f].compare(mValue) == 0)
|
||||
{
|
||||
result = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << "<any> == " << mValue;
|
||||
}
|
||||
|
||||
valueType mValue;
|
||||
};
|
||||
|
||||
struct any_matches_condition_impl : public condition_impl
|
||||
{
|
||||
any_matches_condition_impl(const std::regex &rx)
|
||||
: mRx(rx)
|
||||
{
|
||||
}
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
auto &c = r.get_category();
|
||||
|
||||
bool result = false;
|
||||
for (auto &f : get_category_fields(c))
|
||||
{
|
||||
try
|
||||
{
|
||||
std::string_view txt = r[f].text();
|
||||
if (std::regex_match(txt.begin(), txt.end(), mRx))
|
||||
{
|
||||
result = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << "<any> =~ expression";
|
||||
}
|
||||
|
||||
std::regex mRx;
|
||||
};
|
||||
|
||||
// TODO: Optimize and_condition by having a list of sub items.
|
||||
// That way you can also collapse multiple _is_ conditions in
|
||||
// case they make up an indexed tuple.
|
||||
struct and_condition_impl : public condition_impl
|
||||
{
|
||||
and_condition_impl(condition &&a, condition &&b)
|
||||
{
|
||||
mSub.emplace_back(std::exchange(a.m_impl, nullptr));
|
||||
mSub.emplace_back(std::exchange(b.m_impl, nullptr));
|
||||
}
|
||||
|
||||
~and_condition_impl()
|
||||
{
|
||||
for (auto sub : mSub)
|
||||
delete sub;
|
||||
}
|
||||
|
||||
condition_impl *prepare(const category &c) override;
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
bool result = true;
|
||||
|
||||
for (auto sub : mSub)
|
||||
{
|
||||
if (sub->test(r))
|
||||
continue;
|
||||
|
||||
result = false;
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << '(';
|
||||
|
||||
bool first = true;
|
||||
for (auto sub : mSub)
|
||||
{
|
||||
if (first)
|
||||
first = false;
|
||||
else
|
||||
os << " AND ";
|
||||
|
||||
sub->str(os);
|
||||
}
|
||||
|
||||
os << ')';
|
||||
}
|
||||
|
||||
virtual std::optional<row_handle> single() const override
|
||||
{
|
||||
std::optional<row_handle> result;
|
||||
|
||||
for (auto sub : mSub)
|
||||
{
|
||||
auto s = sub->single();
|
||||
|
||||
if (not result.has_value())
|
||||
{
|
||||
result = s;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (s == result)
|
||||
continue;
|
||||
|
||||
result.reset();
|
||||
break;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<condition_impl *> mSub;
|
||||
};
|
||||
|
||||
struct or_condition_impl : public condition_impl
|
||||
{
|
||||
or_condition_impl(condition &&a, condition &&b)
|
||||
: mA(nullptr)
|
||||
, mB(nullptr)
|
||||
{
|
||||
std::swap(mA, a.m_impl);
|
||||
std::swap(mB, b.m_impl);
|
||||
}
|
||||
|
||||
~or_condition_impl()
|
||||
{
|
||||
delete mA;
|
||||
delete mB;
|
||||
}
|
||||
|
||||
condition_impl *prepare(const category &c) override;
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
return mA->test(r) or mB->test(r);
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << '(';
|
||||
mA->str(os);
|
||||
os << ") OR (";
|
||||
mB->str(os);
|
||||
os << ')';
|
||||
}
|
||||
|
||||
virtual std::optional<row_handle> single() const override
|
||||
{
|
||||
auto sa = mA->single();
|
||||
auto sb = mB->single();
|
||||
|
||||
if (sa.has_value() and sb.has_value() and sa != sb)
|
||||
sa.reset();
|
||||
else if (not sa.has_value())
|
||||
sa = sb;
|
||||
|
||||
return sa;
|
||||
}
|
||||
|
||||
condition_impl *mA;
|
||||
condition_impl *mB;
|
||||
};
|
||||
|
||||
struct not_condition_impl : public condition_impl
|
||||
{
|
||||
not_condition_impl(condition &&a)
|
||||
: mA(nullptr)
|
||||
{
|
||||
std::swap(mA, a.m_impl);
|
||||
}
|
||||
|
||||
~not_condition_impl()
|
||||
{
|
||||
delete mA;
|
||||
}
|
||||
|
||||
condition_impl *prepare(const category &c) override
|
||||
{
|
||||
mA = mA->prepare(c);
|
||||
return this;
|
||||
}
|
||||
|
||||
bool test(row_handle r) const override
|
||||
{
|
||||
return not mA->test(r);
|
||||
}
|
||||
|
||||
void str(std::ostream &os) const override
|
||||
{
|
||||
os << "NOT (";
|
||||
mA->str(os);
|
||||
os << ')';
|
||||
}
|
||||
|
||||
condition_impl *mA;
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
inline condition operator&&(condition &&a, condition &&b)
|
||||
{
|
||||
if (a.m_impl and b.m_impl)
|
||||
return condition(new detail::and_condition_impl(std::move(a), std::move(b)));
|
||||
if (a.m_impl)
|
||||
return condition(std::move(a));
|
||||
return condition(std::move(b));
|
||||
}
|
||||
|
||||
inline condition operator||(condition &&a, condition &&b)
|
||||
{
|
||||
if (a.m_impl and b.m_impl)
|
||||
return condition(new detail::or_condition_impl(std::move(a), std::move(b)));
|
||||
if (a.m_impl)
|
||||
return condition(std::move(a));
|
||||
return condition(std::move(b));
|
||||
}
|
||||
|
||||
struct empty_type
|
||||
{
|
||||
};
|
||||
|
||||
/// \brief A helper to make it possible to have conditions like ("id"_key == cif::null)
|
||||
|
||||
inline constexpr empty_type null = empty_type();
|
||||
|
||||
struct key
|
||||
{
|
||||
explicit key(const std::string &itemTag)
|
||||
: m_item_tag(itemTag)
|
||||
{
|
||||
}
|
||||
|
||||
explicit key(const char *itemTag)
|
||||
: m_item_tag(itemTag)
|
||||
{
|
||||
}
|
||||
|
||||
key(const key &) = delete;
|
||||
key &operator=(const key &) = delete;
|
||||
|
||||
std::string m_item_tag;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
condition operator==(const key &key, const T &v)
|
||||
{
|
||||
return condition(new detail::key_equals_condition_impl({ key.m_item_tag, v }));
|
||||
}
|
||||
|
||||
inline condition operator==(const key &key, const char *value)
|
||||
{
|
||||
if (value != nullptr and *value != 0)
|
||||
return condition(new detail::key_equals_condition_impl({ key.m_item_tag, value }));
|
||||
else
|
||||
return condition(new detail::key_is_empty_condition_impl(key.m_item_tag));
|
||||
}
|
||||
|
||||
// inline condition_t operator==(const key& key, const detail::ItemReference& v)
|
||||
// {
|
||||
// if (v.empty())
|
||||
// return condition_t(new detail::key_is_empty_condition_impl(key.m_item_tag));
|
||||
// else
|
||||
// return condition_t(new detail::key_compare_condition_impl(key.m_item_tag, [tag = key.m_item_tag, v](const category& c, const row& r, bool icase)
|
||||
// { return r[tag].template compare<(v, icase) == 0; }));
|
||||
// }
|
||||
|
||||
template <typename T>
|
||||
condition operator!=(const key &key, const T &v)
|
||||
{
|
||||
return condition(new detail::not_condition_impl(operator==(key, v)));
|
||||
}
|
||||
|
||||
inline condition operator!=(const key &key, const char *v)
|
||||
{
|
||||
std::string value(v ? v : "");
|
||||
return condition(new detail::not_condition_impl(operator==(key, value)));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
condition operator>(const key &key, const T &v)
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << " > " << v;
|
||||
|
||||
return condition(new detail::key_compare_condition_impl(
|
||||
key.m_item_tag, [tag = key.m_item_tag, v](row_handle r, bool icase)
|
||||
{ return r[tag].template compare<T>(v, icase) > 0; },
|
||||
s.str()));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
condition operator>=(const key &key, const T &v)
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << " >= " << v;
|
||||
|
||||
return condition(new detail::key_compare_condition_impl(
|
||||
key.m_item_tag, [tag = key.m_item_tag, v](row_handle r, bool icase)
|
||||
{ return r[tag].template compare<T>(v, icase) >= 0; },
|
||||
s.str()));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
condition operator<(const key &key, const T &v)
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << " < " << v;
|
||||
|
||||
return condition(new detail::key_compare_condition_impl(
|
||||
key.m_item_tag, [tag = key.m_item_tag, v](row_handle r, bool icase)
|
||||
{ return r[tag].template compare<T>(v, icase) < 0; },
|
||||
s.str()));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
condition operator<=(const key &key, const T &v)
|
||||
{
|
||||
std::ostringstream s;
|
||||
s << " <= " << v;
|
||||
|
||||
return condition(new detail::key_compare_condition_impl(
|
||||
key.m_item_tag, [tag = key.m_item_tag, v](row_handle r, bool icase)
|
||||
{ return r[tag].template compare<T>(v, icase) <= 0; },
|
||||
s.str()));
|
||||
}
|
||||
|
||||
inline condition operator==(const key &key, const std::regex &rx)
|
||||
{
|
||||
return condition(new detail::key_matches_condition_impl(key.m_item_tag, rx));
|
||||
}
|
||||
|
||||
inline condition operator==(const key &key, const empty_type &)
|
||||
{
|
||||
return condition(new detail::key_is_empty_condition_impl(key.m_item_tag));
|
||||
}
|
||||
|
||||
inline condition operator !(condition &&rhs)
|
||||
{
|
||||
return condition(new detail::not_condition_impl(std::move(rhs)));
|
||||
}
|
||||
|
||||
struct any_type
|
||||
{
|
||||
};
|
||||
|
||||
inline constexpr any_type any = any_type{};
|
||||
|
||||
template <typename T>
|
||||
condition operator==(const any_type &, const T &v)
|
||||
{
|
||||
return condition(new detail::any_is_condition_impl<T>(v));
|
||||
}
|
||||
|
||||
inline condition operator==(const any_type &, const std::regex &rx)
|
||||
{
|
||||
return condition(new detail::any_matches_condition_impl(rx));
|
||||
}
|
||||
|
||||
inline condition all()
|
||||
{
|
||||
return condition(new detail::all_condition_impl());
|
||||
}
|
||||
|
||||
namespace literals
|
||||
{
|
||||
inline key operator""_key(const char *text, size_t length)
|
||||
{
|
||||
return key(std::string(text, length));
|
||||
}
|
||||
} // namespace literals
|
||||
|
||||
} // namespace cif
|
||||
100
include/cif++/datablock.hpp
Normal file
100
include/cif++/datablock.hpp
Normal file
@@ -0,0 +1,100 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++/forward_decl.hpp>
|
||||
|
||||
#include <cif++/category.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class datablock : public std::list<category>
|
||||
{
|
||||
public:
|
||||
datablock() = default;
|
||||
|
||||
datablock(std::string_view name)
|
||||
: m_name(name)
|
||||
{
|
||||
}
|
||||
|
||||
datablock(const datablock &) = default;
|
||||
|
||||
datablock(datablock &&) = default;
|
||||
|
||||
datablock &operator=(const datablock &) = default;
|
||||
datablock &operator=(datablock &&) = default;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
const std::string &name() const { return m_name; }
|
||||
|
||||
void set_name(std::string_view name)
|
||||
{
|
||||
m_name = name;
|
||||
}
|
||||
|
||||
void set_validator(const validator *v);
|
||||
|
||||
const validator *get_validator() const;
|
||||
|
||||
bool is_valid() const;
|
||||
bool validate_links() const;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
category &operator[](std::string_view name);
|
||||
const category &operator[](std::string_view name) const;
|
||||
|
||||
category *get(std::string_view name);
|
||||
const category *get(std::string_view name) const;
|
||||
|
||||
std::tuple<iterator, bool> emplace(std::string_view name);
|
||||
|
||||
std::vector<std::string> get_tag_order() const;
|
||||
void write(std::ostream &os) const;
|
||||
void write(std::ostream &os, const std::vector<std::string> &tag_order);
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const datablock &db)
|
||||
{
|
||||
db.write(os);
|
||||
return os;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
bool operator==(const datablock &rhs) const;
|
||||
|
||||
private:
|
||||
std::string m_name;
|
||||
const validator *m_validator = nullptr;
|
||||
};
|
||||
|
||||
} // namespace cif
|
||||
36
include/cif++/dictionary_parser.hpp
Normal file
36
include/cif++/dictionary_parser.hpp
Normal file
@@ -0,0 +1,36 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++/validate.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
validator parse_dictionary(std::string_view name, std::istream &is);
|
||||
|
||||
} // namespace cif
|
||||
122
include/cif++/file.hpp
Normal file
122
include/cif++/file.hpp
Normal file
@@ -0,0 +1,122 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <list>
|
||||
|
||||
#include <cif++/datablock.hpp>
|
||||
#include <cif++/parser.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class file : public std::list<datablock>
|
||||
{
|
||||
public:
|
||||
file() = default;
|
||||
|
||||
explicit file(const std::filesystem::path &p)
|
||||
{
|
||||
load(p);
|
||||
}
|
||||
|
||||
explicit file(std::istream &is)
|
||||
{
|
||||
load(is);
|
||||
}
|
||||
|
||||
explicit file(const char *data, size_t length)
|
||||
{
|
||||
struct membuf : public std::streambuf
|
||||
{
|
||||
membuf(char *text, size_t length)
|
||||
{
|
||||
this->setg(text, text, text + length);
|
||||
}
|
||||
} buffer(const_cast<char *>(data), length);
|
||||
|
||||
std::istream is(&buffer);
|
||||
load(is);
|
||||
}
|
||||
|
||||
file(const file &) = default;
|
||||
file(file &&) = default;
|
||||
file &operator=(const file &) = default;
|
||||
file &operator=(file &&) = default;
|
||||
|
||||
void set_validator(const validator *v);
|
||||
|
||||
const validator *get_validator() const
|
||||
{
|
||||
return m_validator;
|
||||
}
|
||||
|
||||
bool is_valid() const;
|
||||
bool is_valid();
|
||||
bool validate_links() const;
|
||||
|
||||
void load_dictionary();
|
||||
void load_dictionary(std::string_view name);
|
||||
|
||||
bool contains(std::string_view name) const;
|
||||
|
||||
datablock &front()
|
||||
{
|
||||
assert(not empty());
|
||||
return std::list<datablock>::front();
|
||||
}
|
||||
|
||||
const datablock &front() const
|
||||
{
|
||||
assert(not empty());
|
||||
return std::list<datablock>::front();
|
||||
}
|
||||
|
||||
datablock &operator[](std::string_view name);
|
||||
const datablock &operator[](std::string_view name) const;
|
||||
|
||||
std::tuple<iterator, bool> emplace(std::string_view name);
|
||||
|
||||
void load(const std::filesystem::path &p);
|
||||
void load(std::istream &is);
|
||||
|
||||
void save(const std::filesystem::path &p) const;
|
||||
void save(std::ostream &os) const;
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const file &f)
|
||||
{
|
||||
f.save(os);
|
||||
return os;
|
||||
}
|
||||
|
||||
private:
|
||||
const validator *m_validator = nullptr;
|
||||
};
|
||||
|
||||
} // namespace cif
|
||||
221
include/cif++/format.hpp
Normal file
221
include/cif++/format.hpp
Normal file
@@ -0,0 +1,221 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
|
||||
/// \file format.hpp
|
||||
/// File containing a basic reimplementation of boost::format
|
||||
/// but then a bit more simplistic. Still this allowed me to move my code
|
||||
/// from using boost::format to something without external dependency easily.
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
template <typename T>
|
||||
struct to_varg
|
||||
{
|
||||
using type = T;
|
||||
|
||||
to_varg(const T &v)
|
||||
: m_value(v)
|
||||
{
|
||||
}
|
||||
|
||||
type operator*() { return m_value; }
|
||||
|
||||
T m_value;
|
||||
};
|
||||
|
||||
// template <>
|
||||
// struct to_varg<char>
|
||||
// {
|
||||
// using type = const char *;
|
||||
|
||||
// to_varg(const char &v)
|
||||
// : m_value({ v })
|
||||
// {
|
||||
// }
|
||||
|
||||
// type operator*() { return m_value.c_str(); }
|
||||
|
||||
// std::string m_value;
|
||||
// };
|
||||
|
||||
template <>
|
||||
struct to_varg<const char *>
|
||||
{
|
||||
using type = const char *;
|
||||
|
||||
to_varg(const char *v)
|
||||
: m_value(v)
|
||||
{
|
||||
}
|
||||
|
||||
type operator*() { return m_value.c_str(); }
|
||||
|
||||
std::string m_value;
|
||||
};
|
||||
|
||||
template <>
|
||||
struct to_varg<std::string>
|
||||
{
|
||||
using type = const char *;
|
||||
|
||||
to_varg(const std::string &v)
|
||||
: m_value(v)
|
||||
{
|
||||
}
|
||||
|
||||
type operator*() { return m_value.c_str(); }
|
||||
|
||||
std::string m_value;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
template <typename... Args>
|
||||
class format_plus_arg
|
||||
{
|
||||
public:
|
||||
using args_vector_type = std::tuple<detail::to_varg<Args>...>;
|
||||
using vargs_vector_type = std::tuple<typename detail::to_varg<Args>::type...>;
|
||||
|
||||
format_plus_arg(const format_plus_arg &) = delete;
|
||||
format_plus_arg &operator=(const format_plus_arg &) = delete;
|
||||
|
||||
|
||||
format_plus_arg(std::string_view fmt, Args... args)
|
||||
: m_fmt(fmt)
|
||||
, m_args(std::forward<Args>(args)...)
|
||||
{
|
||||
auto ix = std::make_index_sequence<sizeof...(Args)>();
|
||||
copy_vargs(ix);
|
||||
}
|
||||
|
||||
std::string str()
|
||||
{
|
||||
char buffer[1024];
|
||||
std::string::size_type r = std::apply(snprintf, std::tuple_cat(std::make_tuple(buffer, sizeof(buffer), m_fmt.c_str()), m_vargs));
|
||||
return { buffer, r };
|
||||
}
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const format_plus_arg &f)
|
||||
{
|
||||
char buffer[1024];
|
||||
std::string::size_type r = std::apply(snprintf, std::tuple_cat(std::make_tuple(buffer, sizeof(buffer), f.m_fmt.c_str()), f.m_vargs));
|
||||
os.write(buffer, r);
|
||||
return os;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
template <size_t... I>
|
||||
void copy_vargs(std::index_sequence<I...>)
|
||||
{
|
||||
((std::get<I>(m_vargs) = *std::get<I>(m_args)), ...);
|
||||
}
|
||||
|
||||
std::string m_fmt;
|
||||
args_vector_type m_args;
|
||||
vargs_vector_type m_vargs;
|
||||
};
|
||||
|
||||
template <typename... Args>
|
||||
constexpr auto format(std::string_view fmt, Args... args)
|
||||
{
|
||||
return format_plus_arg(fmt, std::forward<Args>(args)...);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
/// A streambuf that fills out lines with spaces up until a specified width
|
||||
|
||||
class fill_out_streambuf : public std::streambuf
|
||||
{
|
||||
public:
|
||||
using base_type = std::streambuf;
|
||||
using int_type = base_type::int_type;
|
||||
using char_type = base_type::char_type;
|
||||
using traits_type = base_type::traits_type;
|
||||
|
||||
fill_out_streambuf(std::ostream &os, int width = 80)
|
||||
: m_os(os)
|
||||
, m_upstream(os.rdbuf())
|
||||
, m_width(width)
|
||||
{
|
||||
}
|
||||
|
||||
~fill_out_streambuf()
|
||||
{
|
||||
m_os.rdbuf(m_upstream);
|
||||
}
|
||||
|
||||
virtual int_type
|
||||
overflow(int_type ic = traits_type::eof())
|
||||
{
|
||||
char ch = traits_type::to_char_type(ic);
|
||||
|
||||
int_type result = ic;
|
||||
|
||||
if (ch == '\n')
|
||||
{
|
||||
for (int i = m_column_count; result != traits_type::eof() and i < m_width; ++i)
|
||||
result = m_upstream->sputc(' ');
|
||||
}
|
||||
|
||||
if (result != traits_type::eof())
|
||||
result = m_upstream->sputc(ch);
|
||||
|
||||
if (result != traits_type::eof())
|
||||
{
|
||||
if (ch == '\n')
|
||||
{
|
||||
m_column_count = 0;
|
||||
++m_line_count;
|
||||
}
|
||||
else
|
||||
++m_column_count;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::streambuf *get_upstream() const { return m_upstream; }
|
||||
|
||||
int get_line_count() const { return m_line_count; }
|
||||
|
||||
private:
|
||||
std::ostream &m_os;
|
||||
std::streambuf *m_upstream;
|
||||
int m_width;
|
||||
int m_line_count = 0;
|
||||
int m_column_count = 0;
|
||||
};
|
||||
|
||||
} // namespace pdbx
|
||||
46
include/cif++/forward_decl.hpp
Normal file
46
include/cif++/forward_decl.hpp
Normal file
@@ -0,0 +1,46 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
class category;
|
||||
class datablock;
|
||||
class file;
|
||||
class parser;
|
||||
|
||||
class row;
|
||||
class row_handle;
|
||||
|
||||
class item;
|
||||
class item_handle;
|
||||
|
||||
} // namespace cif
|
||||
566
include/cif++/item.hpp
Normal file
566
include/cif++/item.hpp
Normal file
@@ -0,0 +1,566 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <charconv>
|
||||
#include <cstring>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <optional>
|
||||
|
||||
#include <cif++/forward_decl.hpp>
|
||||
#include <cif++/text.hpp>
|
||||
|
||||
/// \file item.hpp
|
||||
/// This file contains the declaration of item but also the item_value and item_handle
|
||||
/// These handle the storage of and access to the data for a single data field.
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
extern int VERBOSE;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
/// \brief item is a transient class that is used to pass data into rows
|
||||
/// but it also takes care of formatting data.
|
||||
class item
|
||||
{
|
||||
public:
|
||||
/// \brief Default constructor, empty item
|
||||
item() = default;
|
||||
|
||||
/// \brief constructor for an item with name \a name and as
|
||||
/// content a single character string with content \a value
|
||||
item(std::string_view name, char value)
|
||||
: m_name(name)
|
||||
, m_value({ value })
|
||||
{
|
||||
}
|
||||
|
||||
/// \brief constructor for an item with name \a name and as
|
||||
/// content a the formatted floating point value \a value with
|
||||
/// precision \a precision
|
||||
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
|
||||
item(std::string_view name, const T &value, int precision)
|
||||
: m_name(name)
|
||||
{
|
||||
using namespace std;
|
||||
using namespace cif;
|
||||
|
||||
char buffer[32];
|
||||
|
||||
auto r = to_chars(buffer, buffer + sizeof(buffer) - 1, value, chars_format::fixed, precision);
|
||||
if (r.ec != std::errc())
|
||||
throw std::runtime_error("Could not format number");
|
||||
|
||||
assert(r.ptr >= buffer and r.ptr < buffer + sizeof(buffer));
|
||||
*r.ptr = 0;
|
||||
m_value.assign(buffer, r.ptr - buffer);
|
||||
}
|
||||
|
||||
/// \brief constructor for an item with name \a name and as
|
||||
/// content a formatted floating point value \a value with
|
||||
/// so-called general formatting
|
||||
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
|
||||
item(const std::string_view name, const T &value)
|
||||
: m_name(name)
|
||||
{
|
||||
using namespace std;
|
||||
using namespace cif;
|
||||
|
||||
char buffer[32];
|
||||
|
||||
auto r = to_chars(buffer, buffer + sizeof(buffer) - 1, value, chars_format::general);
|
||||
if (r.ec != std::errc())
|
||||
throw std::runtime_error("Could not format number");
|
||||
|
||||
assert(r.ptr >= buffer and r.ptr < buffer + sizeof(buffer));
|
||||
*r.ptr = 0;
|
||||
m_value.assign(buffer, r.ptr - buffer);
|
||||
}
|
||||
|
||||
/// \brief constructor for an item with name \a name and as
|
||||
/// content a the formatted integral value \a value
|
||||
template <typename T, std::enable_if_t<std::is_integral_v<T> and not std::is_same_v<T,bool>, int> = 0>
|
||||
item(const std::string_view name, const T &value)
|
||||
: m_name(name)
|
||||
{
|
||||
char buffer[32];
|
||||
|
||||
auto r = std::to_chars(buffer, buffer + sizeof(buffer) - 1, value);
|
||||
if (r.ec != std::errc())
|
||||
throw std::runtime_error("Could not format number");
|
||||
|
||||
assert(r.ptr >= buffer and r.ptr < buffer + sizeof(buffer));
|
||||
*r.ptr = 0;
|
||||
m_value.assign(buffer, r.ptr - buffer);
|
||||
}
|
||||
|
||||
/// \brief constructor for an item with name \a name and as
|
||||
/// content a the formatted boolean value \a value
|
||||
template <typename T, std::enable_if_t<std::is_same_v<T,bool>, int> = 0>
|
||||
item(const std::string_view name, const T &value)
|
||||
: m_name(name)
|
||||
{
|
||||
m_value.assign(value ? "y" : "n");
|
||||
}
|
||||
|
||||
/// \brief constructor for an item with name \a name and as
|
||||
/// content value \a value
|
||||
item(const std::string_view name, const std::string_view value)
|
||||
: m_name(name)
|
||||
, m_value(value)
|
||||
{
|
||||
}
|
||||
|
||||
item(const item &rhs) = default;
|
||||
|
||||
item(item &&rhs) noexcept = default;
|
||||
|
||||
item &operator=(const item &rhs) = default;
|
||||
|
||||
item &operator=(item &&rhs) noexcept = default;
|
||||
|
||||
std::string_view name() const { return m_name; }
|
||||
std::string_view value() const { return m_value; }
|
||||
|
||||
/// \brief replace the content of the stored value with \a v
|
||||
void value(std::string_view v) { m_value = v; }
|
||||
|
||||
/// \brief empty means either null or unknown
|
||||
bool empty() const { return m_value.empty(); }
|
||||
|
||||
/// \brief returns true if the field contains '.'
|
||||
bool is_null() const { return m_value == "."; }
|
||||
|
||||
/// \brief returns true if the field contains '?'
|
||||
bool is_unknown() const { return m_value == "?"; }
|
||||
|
||||
/// \brief the length of the value string
|
||||
size_t length() const { return m_value.length(); }
|
||||
|
||||
/// \brief support for structured binding
|
||||
template<size_t N>
|
||||
decltype(auto) get() const
|
||||
{
|
||||
if constexpr (N == 0) return name();
|
||||
else if constexpr (N == 1) return value();
|
||||
}
|
||||
|
||||
private:
|
||||
std::string_view m_name;
|
||||
std::string m_value;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
/// \brief the internal storage for items in a category
|
||||
///
|
||||
/// Internal storage, strictly forward linked list with minimal space
|
||||
/// requirements. Strings of size 7 or shorter are stored internally.
|
||||
/// Typically, more than 99% of the strings in an mmCIF file are less
|
||||
/// than 8 bytes in length.
|
||||
|
||||
struct item_value
|
||||
{
|
||||
item_value() = default;
|
||||
|
||||
/// \brief constructor
|
||||
item_value(std::string_view text)
|
||||
: m_length(text.length())
|
||||
{
|
||||
if (m_length >= kBufferSize)
|
||||
{
|
||||
m_data = new char[m_length + 1];
|
||||
std::copy(text.begin(), text.end(), m_data);
|
||||
m_data[m_length] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::copy(text.begin(), text.end(), m_local_data);
|
||||
m_local_data[m_length] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
item_value(item_value &&rhs)
|
||||
: m_length(std::exchange(rhs.m_length, 0))
|
||||
, m_data(std::exchange(rhs.m_data, nullptr))
|
||||
{
|
||||
}
|
||||
|
||||
item_value &operator=(item_value &&rhs)
|
||||
{
|
||||
if (this != &rhs)
|
||||
{
|
||||
m_length = std::exchange(rhs.m_length, m_length);
|
||||
m_data = std::exchange(rhs.m_data, m_data);
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
~item_value()
|
||||
{
|
||||
if (m_length >= kBufferSize)
|
||||
delete[] m_data;
|
||||
m_data = nullptr;
|
||||
m_length = 0;
|
||||
}
|
||||
|
||||
item_value(const item_value &) = delete;
|
||||
item_value &operator=(const item_value &) = delete;
|
||||
|
||||
explicit operator bool() const
|
||||
{
|
||||
return m_length != 0;
|
||||
}
|
||||
|
||||
size_t m_length = 0;
|
||||
union
|
||||
{
|
||||
char m_local_data[8];
|
||||
char *m_data;
|
||||
};
|
||||
|
||||
static constexpr size_t kBufferSize = sizeof(m_local_data);
|
||||
|
||||
// By using std::string_view instead of c_str we obain a
|
||||
// nice performance gain since we avoid many calls to strlen.
|
||||
constexpr inline std::string_view text() const
|
||||
{
|
||||
return { m_length >= kBufferSize ? m_data : m_local_data, m_length };
|
||||
}
|
||||
};
|
||||
|
||||
// static_assert(sizeof(item_value) == 24, "sizeof(item_value) should be 24 bytes");
|
||||
static_assert(sizeof(item_value) == 16, "sizeof(item_value) should be 16 bytes");
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Transient object to access stored data
|
||||
|
||||
/// \brief This is item_handle, it is used to access the data stored in item_value.
|
||||
|
||||
struct item_handle
|
||||
{
|
||||
public:
|
||||
// conversion helper class
|
||||
template <typename T, typename = void>
|
||||
struct item_value_as;
|
||||
|
||||
template <typename T>
|
||||
item_handle &operator=(const T &value)
|
||||
{
|
||||
item v{ "", value };
|
||||
assign_value(v);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename... Ts>
|
||||
void os(const Ts &...v)
|
||||
{
|
||||
std::ostringstream ss;
|
||||
((ss << v), ...);
|
||||
this->operator=(ss.str());
|
||||
}
|
||||
|
||||
void swap(item_handle &b);
|
||||
|
||||
template <typename T = std::string>
|
||||
auto as() const -> T
|
||||
{
|
||||
using value_type = std::remove_cv_t<std::remove_reference_t<T>>;
|
||||
return item_value_as<value_type>::convert(*this);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
auto value_or(const T &dv) const
|
||||
{
|
||||
return empty() ? dv : this->as<T>();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
int compare(const T &value, bool icase = true) const
|
||||
{
|
||||
return item_value_as<T>::compare(*this, value, icase);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool operator==(const T &value) const
|
||||
{
|
||||
// TODO: icase or not icase?
|
||||
return item_value_as<T>::compare(*this, value, true) == 0;
|
||||
}
|
||||
|
||||
// We may not have C++20 yet...
|
||||
template <typename T>
|
||||
bool operator!=(const T &value) const
|
||||
{
|
||||
return not operator==(value);
|
||||
}
|
||||
|
||||
// empty means either null or unknown
|
||||
bool empty() const
|
||||
{
|
||||
auto txt = text();
|
||||
return txt.empty() or (txt.length() == 1 and (txt.front() == '.' or txt.front() == '?'));
|
||||
}
|
||||
|
||||
explicit operator bool() const { return not empty(); }
|
||||
|
||||
// is_null means the field contains '.'
|
||||
bool is_null() const
|
||||
{
|
||||
auto txt = text();
|
||||
return txt.length() == 1 and txt.front() == '.';
|
||||
}
|
||||
|
||||
// is_unknown means the field contains '?'
|
||||
bool is_unknown() const
|
||||
{
|
||||
auto txt = text();
|
||||
return txt.length() == 1 and txt.front() == '?';
|
||||
}
|
||||
|
||||
std::string_view text() const;
|
||||
|
||||
item_handle(uint16_t column, row_handle &row)
|
||||
: m_column(column)
|
||||
, m_row_handle(row)
|
||||
{
|
||||
}
|
||||
|
||||
static const item_handle s_null_item;
|
||||
|
||||
friend void swap(item_handle a, item_handle b)
|
||||
{
|
||||
a.swap(b);
|
||||
}
|
||||
|
||||
private:
|
||||
item_handle();
|
||||
|
||||
uint16_t m_column;
|
||||
row_handle &m_row_handle;
|
||||
|
||||
void assign_value(const item &value);
|
||||
};
|
||||
|
||||
// So sad that older gcc implementations of from_chars did not support floats yet...
|
||||
|
||||
template <typename T>
|
||||
struct item_handle::item_value_as<T, std::enable_if_t<std::is_arithmetic_v<T> and not std::is_same_v<T, bool>>>
|
||||
{
|
||||
using value_type = std::remove_reference_t<std::remove_cv_t<T>>;
|
||||
|
||||
static value_type convert(const item_handle &ref)
|
||||
{
|
||||
value_type result = {};
|
||||
|
||||
if (not ref.empty())
|
||||
{
|
||||
auto txt = ref.text();
|
||||
|
||||
std::from_chars_result r = selected_charconv<value_type>::from_chars(txt.data(), txt.data() + txt.size(), result);
|
||||
|
||||
if (r.ec != std::errc())
|
||||
{
|
||||
result = {};
|
||||
if (cif::VERBOSE)
|
||||
{
|
||||
if (r.ec == std::errc::invalid_argument)
|
||||
std::cerr << "Attempt to convert " << std::quoted(txt) << " into a number" << std::endl;
|
||||
else if (r.ec == std::errc::result_out_of_range)
|
||||
std::cerr << "Conversion of " << std::quoted(txt) << " into a type that is too small" << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static int compare(const item_handle &ref, const T &value, bool icase)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
auto txt = ref.text();
|
||||
|
||||
if (txt.empty())
|
||||
result = 1;
|
||||
else
|
||||
{
|
||||
value_type v = {};
|
||||
|
||||
std::from_chars_result r = selected_charconv<value_type>::from_chars(txt.data(), txt.data() + txt.size(), v);
|
||||
|
||||
if (r.ec != std::errc())
|
||||
{
|
||||
if (cif::VERBOSE)
|
||||
{
|
||||
if (r.ec == std::errc::invalid_argument)
|
||||
std::cerr << "Attempt to convert " << std::quoted(txt) << " into a number" << std::endl;
|
||||
else if (r.ec == std::errc::result_out_of_range)
|
||||
std::cerr << "Conversion of " << std::quoted(txt) << " into a type that is too small" << std::endl;
|
||||
}
|
||||
result = 1;
|
||||
}
|
||||
else if (v < value)
|
||||
result = -1;
|
||||
else if (v > value)
|
||||
result = 1;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct item_handle::item_value_as<std::optional<T>>
|
||||
{
|
||||
static std::optional<T> convert(const item_handle &ref)
|
||||
{
|
||||
std::optional<T> result;
|
||||
if (ref)
|
||||
result = ref.as<T>();
|
||||
return result;
|
||||
}
|
||||
|
||||
static int compare(const item_handle &ref, std::optional<T> value, bool icase)
|
||||
{
|
||||
if (ref.empty() and not value)
|
||||
return 0;
|
||||
|
||||
if (ref.empty())
|
||||
return -1;
|
||||
else if (not value)
|
||||
return 1;
|
||||
else
|
||||
return ref.compare(*value, icase);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct item_handle::item_value_as<T, std::enable_if_t<std::is_same_v<T, bool>>>
|
||||
{
|
||||
static bool convert(const item_handle &ref)
|
||||
{
|
||||
bool result = false;
|
||||
if (not ref.empty())
|
||||
result = iequals(ref.text(), "y");
|
||||
return result;
|
||||
}
|
||||
|
||||
static int compare(const item_handle &ref, bool value, bool icase)
|
||||
{
|
||||
bool rv = convert(ref);
|
||||
return value && rv ? 0
|
||||
: (rv < value ? -1 : 1);
|
||||
}
|
||||
};
|
||||
|
||||
template <size_t N>
|
||||
struct item_handle::item_value_as<char[N]>
|
||||
{
|
||||
static std::string convert(const item_handle &ref)
|
||||
{
|
||||
if (ref.empty())
|
||||
return {};
|
||||
return { ref.text().data(), ref.text().size() };
|
||||
}
|
||||
|
||||
static int compare(const item_handle &ref, const char (&value)[N], bool icase)
|
||||
{
|
||||
return icase ? cif::icompare(ref.text(), value) : ref.text().compare(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct item_handle::item_value_as<T, std::enable_if_t<std::is_same_v<T, const char *>>>
|
||||
{
|
||||
static std::string convert(const item_handle &ref)
|
||||
{
|
||||
if (ref.empty())
|
||||
return {};
|
||||
return { ref.text().data(), ref.text().size() };
|
||||
}
|
||||
|
||||
static int compare(const item_handle &ref, const char *value, bool icase)
|
||||
{
|
||||
return icase ? cif::icompare(ref.text(), value) : ref.text().compare(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct item_handle::item_value_as<T, std::enable_if_t<std::is_same_v<T, std::string_view>>>
|
||||
{
|
||||
static std::string convert(const item_handle &ref)
|
||||
{
|
||||
if (ref.empty())
|
||||
return {};
|
||||
return { ref.text().data(), ref.text().size() };
|
||||
}
|
||||
|
||||
static int compare(const item_handle &ref, const std::string_view &value, bool icase)
|
||||
{
|
||||
return icase ? cif::icompare(ref.text(), value) : ref.text().compare(value);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct item_handle::item_value_as<T, std::enable_if_t<std::is_same_v<T, std::string>>>
|
||||
{
|
||||
static std::string convert(const item_handle &ref)
|
||||
{
|
||||
if (ref.empty())
|
||||
return {};
|
||||
return { ref.text().data(), ref.text().size() };
|
||||
}
|
||||
|
||||
static int compare(const item_handle &ref, const std::string &value, bool icase)
|
||||
{
|
||||
return icase ? cif::icompare(ref.text(), value) : ref.text().compare(value);
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace cif
|
||||
|
||||
namespace std
|
||||
{
|
||||
|
||||
template<> struct tuple_size<::cif::item>
|
||||
: public std::integral_constant<std::size_t, 2> {};
|
||||
|
||||
template<> struct tuple_element<0, ::cif::item>
|
||||
{
|
||||
using type = decltype(std::declval<::cif::item>().name());
|
||||
};
|
||||
|
||||
template<> struct tuple_element<1, ::cif::item>
|
||||
{
|
||||
using type = decltype(std::declval<::cif::item>().value());
|
||||
};
|
||||
|
||||
}
|
||||
676
include/cif++/iterator.hpp
Normal file
676
include/cif++/iterator.hpp
Normal file
@@ -0,0 +1,676 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++/row.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
class iterator_impl
|
||||
{
|
||||
public:
|
||||
template <typename, typename...>
|
||||
friend class iterator_impl;
|
||||
|
||||
friend class category;
|
||||
|
||||
static constexpr size_t N = sizeof...(Ts);
|
||||
|
||||
using category_type = std::remove_cv_t<Category>;
|
||||
using row_type = std::conditional_t<std::is_const_v<Category>, const row, row>;
|
||||
|
||||
using tuple_type = std::tuple<Ts...>;
|
||||
|
||||
using iterator_category = std::forward_iterator_tag;
|
||||
using value_type = tuple_type;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = value_type *;
|
||||
using reference = value_type &;
|
||||
|
||||
iterator_impl() = default;
|
||||
|
||||
iterator_impl(const iterator_impl &rhs) = default;
|
||||
|
||||
template <typename C2, typename... T2s>
|
||||
iterator_impl(const iterator_impl<C2, T2s...> &rhs)
|
||||
: m_category(rhs.m_category)
|
||||
, m_current(rhs.m_current)
|
||||
, m_value(rhs.m_value)
|
||||
, m_column_ix(rhs.m_column_ix)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename IRowType>
|
||||
iterator_impl(iterator_impl<IRowType, Ts...> &rhs)
|
||||
: m_category(rhs.m_category)
|
||||
, m_current(const_cast<row_type *>(rhs.m_current))
|
||||
, m_value(rhs.m_value)
|
||||
, m_column_ix(rhs.m_column_ix)
|
||||
{
|
||||
m_value = get(std::make_index_sequence<N>());
|
||||
}
|
||||
|
||||
template <typename IRowType>
|
||||
iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<size_t, N> &cix)
|
||||
: m_category(rhs.m_category)
|
||||
, m_current(rhs.m_current)
|
||||
, m_column_ix(cix)
|
||||
{
|
||||
m_value = get(std::make_index_sequence<N>());
|
||||
}
|
||||
|
||||
iterator_impl &operator=(const iterator_impl &i)
|
||||
{
|
||||
m_category = i.m_category;
|
||||
m_current = i.m_current;
|
||||
m_column_ix = i.m_column_ix;
|
||||
m_value = i.m_value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
virtual ~iterator_impl() = default;
|
||||
|
||||
reference operator*()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
|
||||
pointer operator->()
|
||||
{
|
||||
return &m_value;
|
||||
}
|
||||
|
||||
operator const row_handle() const
|
||||
{
|
||||
return { *m_category, *m_current };
|
||||
}
|
||||
|
||||
operator row_handle()
|
||||
{
|
||||
return { *m_category, *m_current };
|
||||
}
|
||||
|
||||
iterator_impl &operator++()
|
||||
{
|
||||
if (m_current != nullptr)
|
||||
m_current = m_current->m_next;
|
||||
|
||||
m_value = get(std::make_index_sequence<N>());
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
iterator_impl operator++(int)
|
||||
{
|
||||
iterator_impl result(*this);
|
||||
this->operator++();
|
||||
return result;
|
||||
}
|
||||
|
||||
bool operator==(const iterator_impl &rhs) const { return m_current == rhs.m_current; }
|
||||
bool operator!=(const iterator_impl &rhs) const { return m_current != rhs.m_current; }
|
||||
|
||||
template <typename IRowType, typename... ITs>
|
||||
bool operator==(const iterator_impl<IRowType, ITs...> &rhs) const
|
||||
{
|
||||
return m_current == rhs.m_current;
|
||||
}
|
||||
|
||||
template <typename IRowType, typename... ITs>
|
||||
bool operator!=(const iterator_impl<IRowType, ITs...> &rhs) const
|
||||
{
|
||||
return m_current != rhs.m_current;
|
||||
}
|
||||
|
||||
private:
|
||||
template <std::size_t... Is>
|
||||
tuple_type get(std::index_sequence<Is...>) const
|
||||
{
|
||||
if (m_current != nullptr)
|
||||
{
|
||||
row_handle rh{*m_category, *m_current};
|
||||
return tuple_type{rh[m_column_ix[Is]].template as<Ts>()...};
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
category_type *m_category = nullptr;
|
||||
row_type *m_current = nullptr;
|
||||
value_type m_value;
|
||||
std::array<size_t, N> m_column_ix;
|
||||
};
|
||||
|
||||
template<typename Category>
|
||||
class iterator_impl<Category>
|
||||
{
|
||||
public:
|
||||
template <typename, typename...>
|
||||
friend class iterator_impl;
|
||||
|
||||
friend class category;
|
||||
using category_type = std::remove_cv_t<Category>;
|
||||
using row_type = std::conditional_t<std::is_const_v<Category>, const row, row>;
|
||||
|
||||
using iterator_category = std::forward_iterator_tag;
|
||||
using value_type = row_handle;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = row_handle;
|
||||
using reference = row_handle;
|
||||
|
||||
iterator_impl() = default;
|
||||
|
||||
iterator_impl(const iterator_impl &rhs) = default;
|
||||
|
||||
template <typename C2>
|
||||
iterator_impl(const iterator_impl<C2> &rhs)
|
||||
: m_category(rhs.m_category)
|
||||
, m_current(const_cast<row_type*>(rhs.m_current))
|
||||
{
|
||||
}
|
||||
|
||||
iterator_impl(Category &cat, row *current)
|
||||
: m_category(const_cast<category_type *>(&cat))
|
||||
, m_current(current)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename IRowType>
|
||||
iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<size_t, 0> &cix)
|
||||
: m_category(rhs.m_category)
|
||||
, m_current(rhs.m_current)
|
||||
{
|
||||
}
|
||||
|
||||
iterator_impl &operator=(const iterator_impl &i)
|
||||
{
|
||||
m_category = i.m_category;
|
||||
m_current = i.m_current;
|
||||
return *this;
|
||||
}
|
||||
|
||||
virtual ~iterator_impl() = default;
|
||||
|
||||
reference operator*()
|
||||
{
|
||||
return {*m_category, *m_current};
|
||||
}
|
||||
|
||||
pointer operator->()
|
||||
{
|
||||
return &m_current;
|
||||
}
|
||||
|
||||
operator const row_handle() const
|
||||
{
|
||||
return { *m_category, *m_current };
|
||||
}
|
||||
|
||||
operator row_handle()
|
||||
{
|
||||
return { *m_category, *m_current };
|
||||
}
|
||||
|
||||
iterator_impl &operator++()
|
||||
{
|
||||
if (m_current != nullptr)
|
||||
m_current = m_current->m_next;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
iterator_impl operator++(int)
|
||||
{
|
||||
iterator_impl result(*this);
|
||||
this->operator++();
|
||||
return result;
|
||||
}
|
||||
|
||||
bool operator==(const iterator_impl &rhs) const { return m_current == rhs.m_current; }
|
||||
bool operator!=(const iterator_impl &rhs) const { return m_current != rhs.m_current; }
|
||||
|
||||
template <typename IRowType, typename... ITs>
|
||||
bool operator==(const iterator_impl<IRowType, ITs...> &rhs) const
|
||||
{
|
||||
return m_current == rhs.m_current;
|
||||
}
|
||||
|
||||
template <typename IRowType, typename... ITs>
|
||||
bool operator!=(const iterator_impl<IRowType, ITs...> &rhs) const
|
||||
{
|
||||
return m_current != rhs.m_current;
|
||||
}
|
||||
|
||||
private:
|
||||
category_type *m_category = nullptr;
|
||||
row_type *m_current = nullptr;
|
||||
};
|
||||
|
||||
|
||||
template<typename Category, typename T>
|
||||
class iterator_impl<Category, T>
|
||||
{
|
||||
public:
|
||||
template <typename, typename...>
|
||||
friend class iterator_impl;
|
||||
|
||||
friend class category;
|
||||
|
||||
using category_type = std::remove_cv_t<Category>;
|
||||
using row_type = std::conditional_t<std::is_const_v<Category>, const row, row>;
|
||||
|
||||
using iterator_category = std::forward_iterator_tag;
|
||||
using value_type = T;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = value_type *;
|
||||
using reference = value_type &;
|
||||
|
||||
iterator_impl() = default;
|
||||
|
||||
iterator_impl(const iterator_impl &rhs) = default;
|
||||
|
||||
template <typename C2, typename T2>
|
||||
iterator_impl(const iterator_impl<C2, T2> &rhs)
|
||||
: m_category(rhs.m_category)
|
||||
, m_current(rhs.m_current)
|
||||
, m_value(rhs.m_value)
|
||||
, m_column_ix(rhs.m_column_ix)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename IRowType>
|
||||
iterator_impl(iterator_impl<IRowType, T> &rhs)
|
||||
: m_category(rhs.m_category)
|
||||
, m_current(const_cast<row_type *>(rhs.m_current))
|
||||
, m_value(rhs.m_value)
|
||||
, m_column_ix(rhs.m_column_ix)
|
||||
{
|
||||
m_value = get(m_current);
|
||||
}
|
||||
|
||||
template <typename IRowType>
|
||||
iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<size_t, 1> &cix)
|
||||
: m_category(rhs.m_category)
|
||||
, m_current(rhs.m_current)
|
||||
, m_column_ix(cix[0])
|
||||
{
|
||||
m_value = get();
|
||||
}
|
||||
|
||||
iterator_impl &operator=(const iterator_impl &i)
|
||||
{
|
||||
m_category = i.m_category;
|
||||
m_current = i.m_current;
|
||||
m_column_ix = i.m_column_ix;
|
||||
m_value = i.m_value;
|
||||
return *this;
|
||||
}
|
||||
|
||||
virtual ~iterator_impl() = default;
|
||||
|
||||
reference operator*()
|
||||
{
|
||||
return m_value;
|
||||
}
|
||||
|
||||
pointer operator->()
|
||||
{
|
||||
return &m_value;
|
||||
}
|
||||
|
||||
operator const row_handle() const
|
||||
{
|
||||
return { *m_category, *m_current };
|
||||
}
|
||||
|
||||
operator row_handle()
|
||||
{
|
||||
return { *m_category, *m_current };
|
||||
}
|
||||
|
||||
iterator_impl &operator++()
|
||||
{
|
||||
if (m_current != nullptr)
|
||||
m_current = m_current->m_next;
|
||||
|
||||
m_value = get();
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
iterator_impl operator++(int)
|
||||
{
|
||||
iterator_impl result(*this);
|
||||
this->operator++();
|
||||
return result;
|
||||
}
|
||||
|
||||
bool operator==(const iterator_impl &rhs) const { return m_current == rhs.m_current; }
|
||||
bool operator!=(const iterator_impl &rhs) const { return m_current != rhs.m_current; }
|
||||
|
||||
template <typename IRowType, typename... ITs>
|
||||
bool operator==(const iterator_impl<IRowType, ITs...> &rhs) const
|
||||
{
|
||||
return m_current == rhs.m_current;
|
||||
}
|
||||
|
||||
template <typename IRowType, typename... ITs>
|
||||
bool operator!=(const iterator_impl<IRowType, ITs...> &rhs) const
|
||||
{
|
||||
return m_current != rhs.m_current;
|
||||
}
|
||||
|
||||
private:
|
||||
value_type get() const
|
||||
{
|
||||
if (m_current != nullptr)
|
||||
{
|
||||
row_handle rh{*m_category, *m_current};
|
||||
return rh[m_column_ix].template as<T>();
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
category_type *m_category = nullptr;
|
||||
row_type *m_current = nullptr;
|
||||
value_type m_value;
|
||||
size_t m_column_ix;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// iterator proxy
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
class iterator_proxy
|
||||
{
|
||||
public:
|
||||
static constexpr const size_t N = sizeof...(Ts);
|
||||
|
||||
using category_type = Category;
|
||||
using row_type = std::conditional_t<std::is_const_v<category_type>, const row, row>;
|
||||
|
||||
using iterator = iterator_impl<category_type, Ts...>;
|
||||
using row_iterator = iterator_impl<category_type>;
|
||||
|
||||
iterator_proxy(category_type &cat, row_iterator pos, char const *const columns[N]);
|
||||
iterator_proxy(category_type &cat, row_iterator pos, std::initializer_list<char const *> columns);
|
||||
|
||||
iterator_proxy(iterator_proxy &&p);
|
||||
iterator_proxy &operator=(iterator_proxy &&p);
|
||||
|
||||
iterator_proxy(const iterator_proxy &) = delete;
|
||||
iterator_proxy &operator=(const iterator_proxy &) = delete;
|
||||
|
||||
iterator begin() const { return iterator(m_begin, m_column_ix); }
|
||||
iterator end() const { return iterator(m_end, m_column_ix); }
|
||||
|
||||
bool empty() const { return m_begin == m_end; }
|
||||
|
||||
explicit operator bool() const { return not empty(); }
|
||||
|
||||
size_t size() const { return std::distance(begin(), end()); }
|
||||
|
||||
// row front() { return *begin(); }
|
||||
// row back() { return *(std::prev(end())); }
|
||||
|
||||
category_type &category() const { return *m_category; }
|
||||
|
||||
void swap(iterator_proxy &rhs)
|
||||
{
|
||||
std::swap(m_category, rhs.m_category);
|
||||
std::swap(m_begin, rhs.m_begin);
|
||||
std::swap(m_end, rhs.m_end);
|
||||
std::swap(m_column_ix, rhs.m_column_ix);
|
||||
}
|
||||
|
||||
private:
|
||||
category_type *m_category;
|
||||
row_iterator m_begin, m_end;
|
||||
std::array<size_t, N> m_column_ix;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// conditional iterator proxy
|
||||
|
||||
template <typename CategoryType, typename... Ts>
|
||||
class conditional_iterator_proxy
|
||||
{
|
||||
public:
|
||||
static constexpr const size_t N = sizeof...(Ts);
|
||||
|
||||
using category_type = std::remove_cv_t<CategoryType>;
|
||||
|
||||
using base_iterator = iterator_impl<CategoryType, Ts...>;
|
||||
using value_type = typename base_iterator::value_type;
|
||||
using row_type = typename base_iterator::row_type;
|
||||
using row_iterator = iterator_impl<CategoryType>;
|
||||
|
||||
class conditional_iterator_impl
|
||||
{
|
||||
public:
|
||||
using iterator_category = std::forward_iterator_tag;
|
||||
using value_type = conditional_iterator_proxy::value_type;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = value_type *;
|
||||
using reference = value_type;
|
||||
|
||||
conditional_iterator_impl(CategoryType &cat, row_iterator pos, const condition &cond, const std::array<size_t, N> &cix);
|
||||
conditional_iterator_impl(const conditional_iterator_impl &i) = default;
|
||||
conditional_iterator_impl &operator=(const conditional_iterator_impl &i) = default;
|
||||
|
||||
virtual ~conditional_iterator_impl() = default;
|
||||
|
||||
reference operator*()
|
||||
{
|
||||
return *mBegin;
|
||||
}
|
||||
|
||||
pointer operator->()
|
||||
{
|
||||
return &*mBegin;
|
||||
}
|
||||
|
||||
conditional_iterator_impl &operator++()
|
||||
{
|
||||
while (mBegin != mEnd)
|
||||
{
|
||||
if (++mBegin == mEnd)
|
||||
break;
|
||||
|
||||
if (m_condition->operator()(mBegin))
|
||||
break;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
conditional_iterator_impl operator++(int)
|
||||
{
|
||||
conditional_iterator_impl result(*this);
|
||||
this->operator++();
|
||||
return result;
|
||||
}
|
||||
|
||||
bool operator==(const conditional_iterator_impl &rhs) const { return mBegin == rhs.mBegin; }
|
||||
bool operator!=(const conditional_iterator_impl &rhs) const { return mBegin != rhs.mBegin; }
|
||||
|
||||
template <typename IRowType, typename... ITs>
|
||||
bool operator==(const iterator_impl<IRowType, ITs...> &rhs) const { return mBegin == rhs; }
|
||||
|
||||
template <typename IRowType, typename... ITs>
|
||||
bool operator!=(const iterator_impl<IRowType, ITs...> &rhs) const { return mBegin != rhs; }
|
||||
|
||||
private:
|
||||
CategoryType *mCat;
|
||||
base_iterator mBegin, mEnd;
|
||||
const condition *m_condition;
|
||||
};
|
||||
|
||||
using iterator = conditional_iterator_impl;
|
||||
using reference = typename iterator::reference;
|
||||
|
||||
template <typename... Ns>
|
||||
conditional_iterator_proxy(CategoryType &cat, row_iterator pos, condition &&cond, Ns... names);
|
||||
|
||||
conditional_iterator_proxy(conditional_iterator_proxy &&p);
|
||||
conditional_iterator_proxy &operator=(conditional_iterator_proxy &&p);
|
||||
|
||||
conditional_iterator_proxy(const conditional_iterator_proxy &) = delete;
|
||||
conditional_iterator_proxy &operator=(const conditional_iterator_proxy &) = delete;
|
||||
|
||||
iterator begin() const;
|
||||
iterator end() const;
|
||||
|
||||
bool empty() const;
|
||||
|
||||
explicit operator bool() const { return not empty(); }
|
||||
|
||||
size_t size() const { return std::distance(begin(), end()); }
|
||||
|
||||
row_handle front() { return *begin(); }
|
||||
// row_handle back() { return *begin(); }
|
||||
|
||||
CategoryType &category() const { return *m_cat; }
|
||||
|
||||
void swap(conditional_iterator_proxy &rhs);
|
||||
|
||||
private:
|
||||
CategoryType *m_cat;
|
||||
condition m_condition;
|
||||
row_iterator mCBegin, mCEnd;
|
||||
std::array<size_t, N> mCix;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
iterator_proxy<Category, Ts...>::iterator_proxy(Category &cat, row_iterator pos, char const *const columns[N])
|
||||
: m_category(&cat)
|
||||
, m_begin(pos)
|
||||
, m_end(cat.end())
|
||||
{
|
||||
for (size_t i = 0; i < N; ++i)
|
||||
m_column_ix[i] = m_category->get_column_ix(columns[i]);
|
||||
}
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
iterator_proxy<Category, Ts...>::iterator_proxy(Category &cat, row_iterator pos, std::initializer_list<char const *> columns)
|
||||
: m_category(&cat)
|
||||
, m_begin(pos)
|
||||
, m_end(cat.end())
|
||||
{
|
||||
// static_assert(columns.size() == N, "The list of column names should be exactly the same as the list of requested columns");
|
||||
|
||||
std::size_t i = 0;
|
||||
for (auto column : columns)
|
||||
m_column_ix[i++] = m_category->get_column_ix(column);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
conditional_iterator_proxy<Category, Ts...>::conditional_iterator_impl::conditional_iterator_impl(
|
||||
Category &cat, row_iterator pos, const condition &cond, const std::array<size_t, N> &cix)
|
||||
: mCat(&cat)
|
||||
, mBegin(pos, cix)
|
||||
, mEnd(cat.end(), cix)
|
||||
, m_condition(&cond)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
conditional_iterator_proxy<Category, Ts...>::conditional_iterator_proxy(conditional_iterator_proxy &&p)
|
||||
: m_cat(nullptr)
|
||||
, mCBegin(p.mCBegin)
|
||||
, mCEnd(p.mCEnd)
|
||||
, mCix(p.mCix)
|
||||
{
|
||||
std::swap(m_cat, p.m_cat);
|
||||
std::swap(mCix, p.mCix);
|
||||
m_condition.swap(p.m_condition);
|
||||
}
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
template <typename... Ns>
|
||||
conditional_iterator_proxy<Category, Ts...>::conditional_iterator_proxy(Category &cat, row_iterator pos, condition &&cond, Ns... names)
|
||||
: m_cat(&cat)
|
||||
, m_condition(std::move(cond))
|
||||
, mCBegin(pos)
|
||||
, mCEnd(cat.end())
|
||||
{
|
||||
static_assert(sizeof...(Ts) == sizeof...(Ns), "Number of column names should be equal to number of requested value types");
|
||||
|
||||
m_condition.prepare(cat);
|
||||
|
||||
while (mCBegin != mCEnd and not m_condition(*mCBegin))
|
||||
++mCBegin;
|
||||
|
||||
size_t i = 0;
|
||||
((mCix[i++] = m_cat->get_column_ix(names)), ...);
|
||||
}
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
conditional_iterator_proxy<Category, Ts...> &conditional_iterator_proxy<Category, Ts...>::operator=(conditional_iterator_proxy &&p)
|
||||
{
|
||||
swap(p);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
typename conditional_iterator_proxy<Category, Ts...>::iterator conditional_iterator_proxy<Category, Ts...>::begin() const
|
||||
{
|
||||
return iterator(*m_cat, mCBegin, m_condition, mCix);
|
||||
}
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
typename conditional_iterator_proxy<Category, Ts...>::iterator conditional_iterator_proxy<Category, Ts...>::end() const
|
||||
{
|
||||
return iterator(*m_cat, mCEnd, m_condition, mCix);
|
||||
}
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
bool conditional_iterator_proxy<Category, Ts...>::empty() const
|
||||
{
|
||||
return mCBegin == mCEnd;
|
||||
}
|
||||
|
||||
template <typename Category, typename... Ts>
|
||||
void conditional_iterator_proxy<Category, Ts...>::swap(conditional_iterator_proxy &rhs)
|
||||
{
|
||||
std::swap(m_cat, rhs.m_cat);
|
||||
m_condition.swap(rhs.m_condition);
|
||||
std::swap(mCBegin, rhs.mCBegin);
|
||||
std::swap(mCEnd, rhs.mCEnd);
|
||||
std::swap(mCix, rhs.mCix);
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
79
include/cif++/list.hpp
Normal file
79
include/cif++/list.hpp
Normal file
@@ -0,0 +1,79 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
template<typename Allocator = std::allocator<void>>
|
||||
class list
|
||||
{
|
||||
public:
|
||||
|
||||
|
||||
|
||||
protected:
|
||||
|
||||
struct list_item
|
||||
{
|
||||
list_item *m_next = nullptr;
|
||||
};
|
||||
|
||||
using list_item_allocator_type = typename std::allocator_traits<Alloc>::template rebind_alloc<list_item>;
|
||||
using list_item_allocator_traits = std::allocator_traits<item_allocator_type>;
|
||||
|
||||
list_item_allocator_traits::pointer get_item()
|
||||
{
|
||||
list_item_allocator_type ia(get_allocator());
|
||||
return list_item_allocator_traits::allocate(ia, 1);
|
||||
}
|
||||
|
||||
template<typename ...Arguments>
|
||||
list_item *create_list_item(uint16_t column_ix, Arguments... args)
|
||||
{
|
||||
auto p = this->get_item();
|
||||
list_item_allocator_type ia(get_allocator());
|
||||
list_item_allocator_traits::construct(ia, p, std::forward<Arguments>(args)...);
|
||||
return p;
|
||||
}
|
||||
|
||||
void delete_list_item(list_item *iv)
|
||||
{
|
||||
list_item_allocator_type ia(get_allocator());
|
||||
list_item_allocator_traits::destroy(ia, iv);
|
||||
list_item_allocator_traits::deallocate(ia, iv, 1);
|
||||
}
|
||||
|
||||
list_item *m_head = nullptr, *m_tail = nullptr;
|
||||
};
|
||||
|
||||
|
||||
} // namespace cif
|
||||
922
include/cif++/model.hpp
Normal file
922
include/cif++/model.hpp
Normal file
@@ -0,0 +1,922 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <numeric>
|
||||
|
||||
#include <cif++/atom_type.hpp>
|
||||
|
||||
#if __cpp_lib_format
|
||||
#include <format>
|
||||
#endif
|
||||
|
||||
namespace cif::mm
|
||||
{
|
||||
|
||||
class atom;
|
||||
class residue;
|
||||
class monomer;
|
||||
class polymer;
|
||||
class structure;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class atom
|
||||
{
|
||||
private:
|
||||
struct atom_impl : public std::enable_shared_from_this<atom_impl>
|
||||
{
|
||||
atom_impl(datablock &db, std::string_view id)
|
||||
: m_db(db)
|
||||
, m_cat(db["atom_site"])
|
||||
, m_id(id)
|
||||
{
|
||||
auto r = row();
|
||||
if (r)
|
||||
tie(m_location.m_x, m_location.m_y, m_location.m_z) = r.get("Cartn_x", "Cartn_y", "Cartn_z");
|
||||
}
|
||||
|
||||
// constructor for a symmetry copy of an atom
|
||||
atom_impl(const atom_impl &impl, const point &loc, const std::string &sym_op)
|
||||
: atom_impl(impl)
|
||||
{
|
||||
m_location = loc;
|
||||
m_symop = sym_op;
|
||||
}
|
||||
|
||||
atom_impl(const atom_impl &i) = default;
|
||||
|
||||
void prefetch();
|
||||
|
||||
int compare(const atom_impl &b) const;
|
||||
|
||||
// bool getAnisoU(float anisou[6]) const;
|
||||
|
||||
int get_charge() const;
|
||||
|
||||
void moveTo(const point &p);
|
||||
|
||||
// const compound *compound() const;
|
||||
|
||||
std::string get_property(std::string_view name) const;
|
||||
int get_property_int(std::string_view name) const;
|
||||
float get_property_float(std::string_view name) const;
|
||||
|
||||
void set_property(const std::string_view name, const std::string &value);
|
||||
|
||||
row_handle row()
|
||||
{
|
||||
return m_cat[{{"id", m_id}}];
|
||||
}
|
||||
|
||||
const row_handle row() const
|
||||
{
|
||||
return m_cat[{{"id", m_id}}];
|
||||
}
|
||||
|
||||
row_handle row_aniso()
|
||||
{
|
||||
auto cat = m_db.get("atom_site_anisotrop");
|
||||
return cat ? cat->find1(key("id") == m_id) : row_handle{};
|
||||
}
|
||||
|
||||
const row_handle row_aniso() const
|
||||
{
|
||||
auto cat = m_db.get("atom_site_anisotrop");
|
||||
return cat ? cat->find1(key("id") == m_id) : row_handle{};
|
||||
}
|
||||
|
||||
const datablock &m_db;
|
||||
category &m_cat;
|
||||
std::string m_id;
|
||||
point m_location;
|
||||
std::string m_symop = "1_555";
|
||||
};
|
||||
|
||||
public:
|
||||
atom() {}
|
||||
|
||||
atom(std::shared_ptr<atom_impl> impl)
|
||||
: m_impl(impl)
|
||||
{
|
||||
}
|
||||
|
||||
atom(const atom &rhs)
|
||||
: m_impl(rhs.m_impl)
|
||||
{
|
||||
}
|
||||
|
||||
atom(datablock &db, row_handle &row)
|
||||
: atom(std::make_shared<atom_impl>(db, row["id"].as<std::string>()))
|
||||
{
|
||||
}
|
||||
|
||||
// a special constructor to create symmetry copies
|
||||
atom(const atom &rhs, const point &symmmetry_location, const std::string &symmetry_operation)
|
||||
: atom(std::make_shared<atom_impl>(*rhs.m_impl, symmmetry_location, symmetry_operation))
|
||||
{
|
||||
}
|
||||
|
||||
explicit operator bool() const { return (bool)m_impl; }
|
||||
|
||||
// // return a copy of this atom, with data copied instead of referenced
|
||||
// atom clone() const
|
||||
// {
|
||||
// auto copy = std::make_shared<atom_impl>(*m_impl);
|
||||
// copy->mClone = true;
|
||||
// return atom(copy);
|
||||
// }
|
||||
|
||||
atom &operator=(const atom &rhs) = default;
|
||||
|
||||
// template <typename T>
|
||||
// T get_property(const std::string_view name) const;
|
||||
|
||||
std::string get_property(std::string_view name) const
|
||||
{
|
||||
if (not m_impl)
|
||||
throw std::logic_error("Error trying to fetch a property from an uninitialized atom");
|
||||
return m_impl->get_property(name);
|
||||
}
|
||||
|
||||
int get_property_int(std::string_view name) const
|
||||
{
|
||||
if (not m_impl)
|
||||
throw std::logic_error("Error trying to fetch a property from an uninitialized atom");
|
||||
return m_impl->get_property_int(name);
|
||||
}
|
||||
|
||||
float get_property_float(std::string_view name) const
|
||||
{
|
||||
if (not m_impl)
|
||||
throw std::logic_error("Error trying to fetch a property from an uninitialized atom");
|
||||
return m_impl->get_property_float(name);
|
||||
}
|
||||
|
||||
void set_property(const std::string_view name, const std::string &value)
|
||||
{
|
||||
if (not m_impl)
|
||||
throw std::logic_error("Error trying to modify an uninitialized atom");
|
||||
m_impl->set_property(name, value);
|
||||
}
|
||||
|
||||
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
|
||||
void set_property(const std::string_view name, const T &value)
|
||||
{
|
||||
set_property(name, std::to_string(value));
|
||||
}
|
||||
|
||||
const std::string &id() const { return impl().m_id; }
|
||||
|
||||
cif::atom_type get_type() const { return atom_type_traits(get_property("type_symbol")).type(); }
|
||||
|
||||
point get_location() const { return impl().m_location; }
|
||||
void set_location(point p)
|
||||
{
|
||||
if (not m_impl)
|
||||
throw std::logic_error("Error trying to modify an uninitialized atom");
|
||||
m_impl->moveTo(p);
|
||||
}
|
||||
|
||||
/// \brief Translate the position of this atom by \a t
|
||||
void translate(point t)
|
||||
{
|
||||
set_location(get_location() + t);
|
||||
}
|
||||
|
||||
/// \brief Rotate the position of this atom by \a q
|
||||
void rotate(quaternion q)
|
||||
{
|
||||
auto loc = get_location();
|
||||
loc.rotate(q);
|
||||
set_location(loc);
|
||||
}
|
||||
|
||||
/// \brief Translate and rotate the position of this atom by \a t and \a q
|
||||
void translate_and_rotate(point t, quaternion q)
|
||||
{
|
||||
auto loc = get_location();
|
||||
loc += t;
|
||||
loc.rotate(q);
|
||||
set_location(loc);
|
||||
}
|
||||
|
||||
/// \brief Translate, rotate and translate again the coordinates this atom by \a t1 , \a q and \a t2
|
||||
void translate_rotate_and_translate(point t1, quaternion q, point t2)
|
||||
{
|
||||
auto loc = get_location();
|
||||
loc += t1;
|
||||
loc.rotate(q);
|
||||
loc += t2;
|
||||
set_location(loc);
|
||||
}
|
||||
|
||||
// for direct access to underlying data, be careful!
|
||||
const row_handle get_row() const { return impl().row(); }
|
||||
const row_handle get_row_aniso() const { return impl().row_aniso(); }
|
||||
|
||||
bool is_symmetry_copy() const { return impl().m_symop != "1_555"; }
|
||||
std::string symmetry() const { return impl().m_symop; }
|
||||
|
||||
// const compound &compound() const;
|
||||
|
||||
bool is_water() const
|
||||
{
|
||||
auto comp_id = get_label_comp_id();
|
||||
return comp_id == "HOH" or comp_id == "H2O" or comp_id == "WAT";
|
||||
}
|
||||
|
||||
int get_charge() const { return impl().get_charge(); }
|
||||
|
||||
// float uIso() const;
|
||||
// bool getAnisoU(float anisou[6]) const { return impl().getAnisoU(anisou); }
|
||||
|
||||
float get_occupancy() const { return get_property_float("occupancy"); }
|
||||
|
||||
// specifications
|
||||
|
||||
std::string get_label_asym_id() const { return get_property("label_asym_id"); }
|
||||
int get_label_seq_id() const { return get_property_int("label_seq_id"); }
|
||||
std::string get_label_atom_id() const { return get_property("label_atom_id"); }
|
||||
std::string get_label_alt_id() const { return get_property("label_alt_id"); }
|
||||
std::string get_label_comp_id() const { return get_property("label_comp_id"); }
|
||||
std::string get_label_entity_id() const { return get_property("label_entity_id"); }
|
||||
|
||||
std::string get_auth_asym_id() const { return get_property("auth_asym_id"); }
|
||||
std::string get_auth_seq_id() const { return get_property("auth_seq_id"); }
|
||||
std::string get_auth_atom_id() const { return get_property("auth_atom_id"); }
|
||||
std::string get_auth_alt_id() const { return get_property("auth_alt_id"); }
|
||||
std::string get_auth_comp_id() const { return get_property("auth_comp_id"); }
|
||||
std::string get_pdb_ins_code() const { return get_property("pdbx_PDB_ins_code"); }
|
||||
|
||||
bool is_alternate() const { return not get_label_alt_id().empty(); }
|
||||
|
||||
// std::string labelID() const; // label_comp_id + '_' + label_asym_id + '_' + label_seq_id
|
||||
|
||||
std::string pdb_id() const
|
||||
{
|
||||
return get_label_comp_id() + '_' + get_auth_asym_id() + '_' + get_auth_seq_id() + get_pdb_ins_code();
|
||||
}
|
||||
|
||||
bool operator==(const atom &rhs) const
|
||||
{
|
||||
if (m_impl == rhs.m_impl)
|
||||
return true;
|
||||
|
||||
if (not(m_impl and rhs.m_impl))
|
||||
return false;
|
||||
|
||||
return &m_impl->m_db == &rhs.m_impl->m_db and m_impl->m_id == rhs.m_impl->m_id;
|
||||
}
|
||||
|
||||
bool operator!=(const atom &rhs) const
|
||||
{
|
||||
return not operator==(rhs);
|
||||
}
|
||||
|
||||
// // access data in compound for this atom
|
||||
|
||||
// convenience routine
|
||||
bool is_back_bone() const
|
||||
{
|
||||
auto atomID = get_label_atom_id();
|
||||
return atomID == "N" or atomID == "O" or atomID == "C" or atomID == "CA";
|
||||
}
|
||||
|
||||
void swap(atom &b)
|
||||
{
|
||||
std::swap(m_impl, b.m_impl);
|
||||
}
|
||||
|
||||
int compare(const atom &b) const { return impl().compare(*b.m_impl); }
|
||||
|
||||
bool operator<(const atom &rhs) const
|
||||
{
|
||||
return compare(rhs) < 0;
|
||||
}
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const atom &atom);
|
||||
|
||||
// /// \brief Synchronize data with underlying cif data
|
||||
// void sync()
|
||||
// {
|
||||
// if (m_impl)
|
||||
// m_impl->prefetch();
|
||||
// }
|
||||
|
||||
private:
|
||||
friend class structure;
|
||||
|
||||
const atom_impl &impl() const
|
||||
{
|
||||
if (not m_impl)
|
||||
throw std::runtime_error("Uninitialized atom, not found?");
|
||||
return *m_impl;
|
||||
}
|
||||
|
||||
std::shared_ptr<atom_impl> m_impl;
|
||||
};
|
||||
|
||||
// template <>
|
||||
// inline std::string atom::get_property<std::string>(const std::string_view name) const
|
||||
// {
|
||||
// return get_property(name);
|
||||
// }
|
||||
|
||||
// template <>
|
||||
// inline int atom::get_property<int>(const std::string_view name) const
|
||||
// {
|
||||
// auto v = impl().get_property(name);
|
||||
// return v.empty() ? 0 : stoi(v);
|
||||
// }
|
||||
|
||||
// template <>
|
||||
// inline float atom::get_property<float>(const std::string_view name) const
|
||||
// {
|
||||
// return stof(impl().get_property(name));
|
||||
// }
|
||||
|
||||
inline void swap(atom &a, atom &b)
|
||||
{
|
||||
a.swap(b);
|
||||
}
|
||||
|
||||
inline float distance(const atom &a, const atom &b)
|
||||
{
|
||||
return distance(a.get_location(), b.get_location());
|
||||
}
|
||||
|
||||
inline float distance_squared(const atom &a, const atom &b)
|
||||
{
|
||||
return distance_squared(a.get_location(), b.get_location());
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
enum class EntityType
|
||||
{
|
||||
polymer,
|
||||
NonPolymer,
|
||||
Macrolide,
|
||||
Water,
|
||||
Branched
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class residue
|
||||
{
|
||||
public:
|
||||
friend class structure;
|
||||
|
||||
// constructor
|
||||
residue(const structure &structure, const std::string &compoundID,
|
||||
const std::string &asymID, int seqID,
|
||||
const std::string &authAsymID, const std::string &authSeqID,
|
||||
const std::string &pdbInsCode)
|
||||
: m_structure(&structure)
|
||||
, m_compound_id(compoundID)
|
||||
, m_asym_id(asymID)
|
||||
, m_seq_id(seqID)
|
||||
, m_auth_asym_id(authAsymID)
|
||||
, m_auth_seq_id(authSeqID)
|
||||
, m_pdb_ins_code(pdbInsCode)
|
||||
{
|
||||
}
|
||||
|
||||
residue(const residue &rhs) = delete;
|
||||
residue &operator=(const residue &rhs) = delete;
|
||||
|
||||
residue(residue &&rhs) = default;
|
||||
residue &operator=(residue &&rhs) = default;
|
||||
|
||||
virtual ~residue() = default;
|
||||
|
||||
std::string get_entity_id() const;
|
||||
|
||||
EntityType entity_type() const;
|
||||
|
||||
const std::string &get_asym_id() const { return m_asym_id; }
|
||||
int get_seq_id() const { return m_seq_id; }
|
||||
|
||||
const std::string get_auth_asym_id() const { return m_auth_asym_id; }
|
||||
const std::string get_auth_seq_id() const { return m_auth_seq_id; }
|
||||
std::string get_pdb_ins_code() const { return m_pdb_ins_code; }
|
||||
|
||||
const std::string &get_compound_id() const { return m_compound_id; }
|
||||
void set_compound_id(const std::string &id) { m_compound_id = id; }
|
||||
|
||||
const structure *get_structure() const { return m_structure; }
|
||||
|
||||
// const compound &compound() const;
|
||||
|
||||
std::vector<atom> &atoms()
|
||||
{
|
||||
return m_atoms;
|
||||
}
|
||||
|
||||
const std::vector<atom> &atoms() const
|
||||
{
|
||||
return m_atoms;
|
||||
}
|
||||
|
||||
void add_atom(atom &atom);
|
||||
|
||||
/// \brief Unique atoms returns only the atoms without alternates and the first of each alternate atom id.
|
||||
std::vector<atom> unique_atoms() const;
|
||||
|
||||
/// \brief The alt ID used for the unique atoms
|
||||
std::string unique_alt_id() const;
|
||||
|
||||
atom get_atom_by_atom_id(const std::string &atomID) const;
|
||||
|
||||
// Is this residue a single entity?
|
||||
bool is_entity() const;
|
||||
bool is_water() const { return m_compound_id == "HOH"; }
|
||||
// bool empty() const { return m_structure == nullptr; }
|
||||
|
||||
bool has_alternate_atoms() const;
|
||||
|
||||
/// \brief Return the list of unique alt ID's present in this residue
|
||||
std::set<std::string> get_alternate_ids() const;
|
||||
|
||||
/// \brief Return the list of unique atom ID's
|
||||
std::set<std::string> get_atom_ids() const;
|
||||
|
||||
/// \brief Return the list of atoms having ID \a atomID
|
||||
std::vector<atom> get_atoms_by_id(const std::string &atomID) const;
|
||||
|
||||
// some routines for 3d work
|
||||
std::tuple<point, float> center_and_radius() const;
|
||||
|
||||
friend std::ostream &operator<<(std::ostream &os, const residue &res);
|
||||
|
||||
bool operator==(const residue &rhs) const
|
||||
{
|
||||
return this == &rhs or (m_structure == rhs.m_structure and
|
||||
m_seq_id == rhs.m_seq_id and
|
||||
m_asym_id == rhs.m_asym_id and
|
||||
m_compound_id == rhs.m_compound_id and
|
||||
m_auth_seq_id == rhs.m_auth_seq_id);
|
||||
}
|
||||
|
||||
protected:
|
||||
residue() {}
|
||||
|
||||
const structure *m_structure = nullptr;
|
||||
std::string m_compound_id, m_asym_id;
|
||||
int m_seq_id = 0;
|
||||
std::string m_auth_asym_id, m_auth_seq_id, m_pdb_ins_code;
|
||||
std::vector<atom> m_atoms;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// a monomer models a single residue in a protein chain
|
||||
|
||||
class monomer : public residue
|
||||
{
|
||||
public:
|
||||
// monomer();
|
||||
monomer(const monomer &rhs) = delete;
|
||||
monomer &operator=(const monomer &rhs) = delete;
|
||||
|
||||
monomer(monomer &&rhs);
|
||||
monomer &operator=(monomer &&rhs);
|
||||
|
||||
monomer(const polymer &polymer, size_t index, int seqID, const std::string &authSeqID,
|
||||
const std::string &pdbInsCode, const std::string &compoundID);
|
||||
|
||||
bool is_first_in_chain() const;
|
||||
bool is_last_in_chain() const;
|
||||
|
||||
// convenience
|
||||
bool has_alpha() const;
|
||||
bool has_kappa() const;
|
||||
|
||||
// Assuming this is really an amino acid...
|
||||
|
||||
float phi() const;
|
||||
float psi() const;
|
||||
float alpha() const;
|
||||
float kappa() const;
|
||||
float tco() const;
|
||||
float omega() const;
|
||||
|
||||
// torsion angles
|
||||
size_t nr_of_chis() const;
|
||||
float chi(size_t i) const;
|
||||
|
||||
bool is_cis() const;
|
||||
|
||||
/// \brief Returns true if the four atoms C, CA, N and O are present
|
||||
bool is_complete() const;
|
||||
|
||||
/// \brief Returns true if any of the backbone atoms has an alternate
|
||||
bool has_alternate_backbone_atoms() const;
|
||||
|
||||
atom CAlpha() const { return get_atom_by_atom_id("CA"); }
|
||||
atom C() const { return get_atom_by_atom_id("C"); }
|
||||
atom N() const { return get_atom_by_atom_id("N"); }
|
||||
atom O() const { return get_atom_by_atom_id("O"); }
|
||||
atom H() const { return get_atom_by_atom_id("H"); }
|
||||
|
||||
bool is_bonded_to(const monomer &rhs) const
|
||||
{
|
||||
return this != &rhs and are_bonded(*this, rhs);
|
||||
}
|
||||
|
||||
static bool are_bonded(const monomer &a, const monomer &b, float errorMargin = 0.5f);
|
||||
static bool is_cis(const monomer &a, const monomer &b);
|
||||
static float omega(const monomer &a, const monomer &b);
|
||||
|
||||
// for LEU and VAL
|
||||
float chiral_volume() const;
|
||||
|
||||
bool operator==(const monomer &rhs) const
|
||||
{
|
||||
return m_polymer == rhs.m_polymer and m_index == rhs.m_index;
|
||||
}
|
||||
|
||||
private:
|
||||
const polymer *m_polymer;
|
||||
size_t m_index;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class polymer : public std::vector<monomer>
|
||||
{
|
||||
public:
|
||||
polymer(const structure &s, const std::string &entityID, const std::string &asymID, const std::string &auth_asym_id);
|
||||
|
||||
polymer(const polymer &) = delete;
|
||||
polymer &operator=(const polymer &) = delete;
|
||||
|
||||
// monomer &getBySeqID(int seqID);
|
||||
// const monomer &getBySeqID(int seqID) const;
|
||||
|
||||
const structure *get_structure() const { return m_structure; }
|
||||
|
||||
std::string get_asym_id() const { return m_asym_id; }
|
||||
std::string get_auth_asym_id() const { return m_auth_asym_id; } // The PDB chain ID, actually
|
||||
std::string get_entity_id() const { return m_entity_id; }
|
||||
|
||||
// int Distance(const monomer &a, const monomer &b) const;
|
||||
|
||||
private:
|
||||
const structure *m_structure;
|
||||
std::string m_entity_id;
|
||||
std::string m_asym_id;
|
||||
std::string m_auth_asym_id;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// sugar and branch, to describe glycosylation sites
|
||||
|
||||
class branch;
|
||||
|
||||
class sugar : public residue
|
||||
{
|
||||
public:
|
||||
sugar(const branch &branch, const std::string &compoundID,
|
||||
const std::string &asymID, int authSeqID);
|
||||
|
||||
sugar(sugar &&rhs);
|
||||
sugar &operator=(sugar &&rhs);
|
||||
|
||||
int num() const { return std::stoi(m_auth_seq_id); }
|
||||
std::string name() const;
|
||||
|
||||
/// \brief Return the atom the C1 is linked to
|
||||
atom get_link() const { return m_link; }
|
||||
void set_link(atom link) { m_link = link; }
|
||||
|
||||
size_t get_link_nr() const
|
||||
{
|
||||
size_t result = 0;
|
||||
if (m_link)
|
||||
result = m_link.get_property_int("auth_seq_id");
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
const branch *m_branch;
|
||||
atom m_link;
|
||||
};
|
||||
|
||||
class branch : public std::vector<sugar>
|
||||
{
|
||||
public:
|
||||
branch(structure &structure, const std::string &asymID);
|
||||
|
||||
void link_atoms();
|
||||
|
||||
std::string name() const;
|
||||
float weight() const;
|
||||
std::string get_asym_id() const { return m_asym_id; }
|
||||
|
||||
structure &get_structure() { return *m_structure; }
|
||||
const structure &get_structure() const { return *m_structure; }
|
||||
|
||||
sugar &getSugarByNum(int nr);
|
||||
const sugar &getSugarByNum(int nr) const;
|
||||
|
||||
private:
|
||||
friend sugar;
|
||||
|
||||
std::string name(const sugar &s) const;
|
||||
|
||||
structure *m_structure;
|
||||
std::string m_asym_id;
|
||||
};
|
||||
|
||||
// // --------------------------------------------------------------------
|
||||
// // file is a reference to the data stored in e.g. the cif file.
|
||||
// // This object is not copyable.
|
||||
|
||||
// class File : public file
|
||||
// {
|
||||
// public:
|
||||
// File() {}
|
||||
|
||||
// // File(const std::filesystem::path &path)
|
||||
// // {
|
||||
// // load(path);
|
||||
// // }
|
||||
|
||||
// // File(const char *data, size_t length)
|
||||
// // {
|
||||
// // load(data, length);
|
||||
// // }
|
||||
|
||||
// File(const File &) = delete;
|
||||
// File &operator=(const File &) = delete;
|
||||
|
||||
// // void load(const std::filesystem::path &p) override;
|
||||
// // void save(const std::filesystem::path &p) override;
|
||||
|
||||
// // using file::load;
|
||||
// // using file::save;
|
||||
|
||||
// datablock &data() { return front(); }
|
||||
// };
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
enum class StructureOpenOptions
|
||||
{
|
||||
SkipHydrogen = 1 << 0
|
||||
};
|
||||
|
||||
inline bool operator&(StructureOpenOptions a, StructureOpenOptions b)
|
||||
{
|
||||
return static_cast<int>(a) bitand static_cast<int>(b);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class structure
|
||||
{
|
||||
public:
|
||||
structure(file &p, size_t modelNr = 1, StructureOpenOptions options = {});
|
||||
|
||||
structure(datablock &db, size_t modelNr = 1, StructureOpenOptions options = {});
|
||||
|
||||
structure(structure &&s) = default;
|
||||
|
||||
// Create a read-only clone of the current structure (for multithreaded calculations that move atoms)
|
||||
// NOTE: removed, simply create a new structure for each thread
|
||||
structure(const structure &) = delete;
|
||||
|
||||
structure &operator=(const structure &) = delete;
|
||||
// Structure &operator=(Structure &&s) = default;
|
||||
|
||||
~structure() = default;
|
||||
|
||||
size_t get_model_nr() const { return m_model_nr; }
|
||||
|
||||
const std::vector<atom> &atoms() const { return m_atoms; }
|
||||
// std::vector<atom> &atoms() { return m_atoms; }
|
||||
|
||||
EntityType get_entity_type_for_entity_id(const std::string entityID) const;
|
||||
EntityType get_entity_type_for_asym_id(const std::string asymID) const;
|
||||
|
||||
// std::vector<atom> waters() const;
|
||||
|
||||
const std::list<polymer> &polymers() const { return m_polymers; }
|
||||
std::list<polymer> &polymers() { return m_polymers; }
|
||||
|
||||
polymer &get_polymer_by_asym_id(const std::string &asymID);
|
||||
|
||||
const polymer &get_polymer_by_asym_id(const std::string &asymID) const
|
||||
{
|
||||
return const_cast<structure *>(this)->get_polymer_by_asym_id(asymID);
|
||||
}
|
||||
|
||||
const std::list<branch> &branches() const { return m_branches; }
|
||||
std::list<branch> &branches() { return m_branches; }
|
||||
|
||||
branch &get_branch_by_asym_id(const std::string &asymID);
|
||||
const branch &get_branch_by_asym_id(const std::string &asymID) const;
|
||||
|
||||
const std::vector<residue> &non_polymers() const { return m_non_polymers; }
|
||||
|
||||
atom get_atom_by_id(const std::string &id) const;
|
||||
// atom getAtomByLocation(point pt, float maxDistance) const;
|
||||
|
||||
atom get_atom_by_label(const std::string &atomID, const std::string &asymID,
|
||||
const std::string &compID, int seqID, const std::string &altID = "");
|
||||
|
||||
// /// \brief Return the atom closest to point \a p
|
||||
atom get_atom_by_position(point p) const;
|
||||
|
||||
/// \brief Return the atom closest to point \a p with atom type \a type in a residue of type \a res_type
|
||||
atom get_atom_by_position_and_type(point p, std::string_view type, std::string_view res_type) const;
|
||||
|
||||
/// \brief Get a non-poly residue for an asym with id \a asymID
|
||||
residue &get_residue(const std::string &asymID)
|
||||
{
|
||||
return get_residue(asymID, 0, "");
|
||||
}
|
||||
|
||||
/// \brief Get a non-poly residue for an asym with id \a asymID
|
||||
const residue &get_residue(const std::string &asymID) const
|
||||
{
|
||||
return get_residue(asymID, 0, "");
|
||||
}
|
||||
|
||||
/// \brief Get a residue for an asym with id \a asymID seq id \a seqID and authSeqID \a authSeqID
|
||||
residue &get_residue(const std::string &asymID, int seqID, const std::string &authSeqID);
|
||||
|
||||
/// \brief Get a the single residue for an asym with id \a asymID seq id \a seqID and authSeqID \a authSeqID
|
||||
const residue &get_residue(const std::string &asymID, int seqID, const std::string &authSeqID) const
|
||||
{
|
||||
return const_cast<structure *>(this)->get_residue(asymID, seqID, authSeqID);
|
||||
}
|
||||
|
||||
/// \brief Get a residue for an asym with id \a asymID, compound id \a compID, seq id \a seqID and authSeqID \a authSeqID
|
||||
residue &get_residue(const std::string &asymID, const std::string &compID, int seqID, const std::string &authSeqID);
|
||||
|
||||
/// \brief Get a residue for an asym with id \a asymID, compound id \a compID, seq id \a seqID and authSeqID \a authSeqID
|
||||
const residue &get_residue(const std::string &asymID, const std::string &compID, int seqID, const std::string &authSeqID) const
|
||||
{
|
||||
return const_cast<structure *>(this)->get_residue(asymID, compID, seqID, authSeqID);
|
||||
}
|
||||
|
||||
/// \brief Get a the residue for atom \a atom
|
||||
residue &get_residue(const atom &atom)
|
||||
{
|
||||
return get_residue(atom.get_label_asym_id(), atom.get_label_comp_id(), atom.get_label_seq_id(), atom.get_auth_seq_id());
|
||||
}
|
||||
|
||||
/// \brief Get a the residue for atom \a atom
|
||||
const residue &get_residue(const atom &atom) const
|
||||
{
|
||||
return get_residue(atom.get_label_asym_id(), atom.get_label_comp_id(), atom.get_label_seq_id(), atom.get_auth_seq_id());
|
||||
}
|
||||
|
||||
// Actions
|
||||
void remove_atom(atom &a)
|
||||
{
|
||||
remove_atom(a, true);
|
||||
}
|
||||
|
||||
void swap_atoms(atom a1, atom a2); // swap the labels for these atoms
|
||||
void move_atom(atom a, point p); // move atom to a new location
|
||||
void change_residue(residue &res, const std::string &newcompound,
|
||||
const std::vector<std::tuple<std::string, std::string>> &remappedAtoms);
|
||||
|
||||
/// \brief Remove a residue, can be monomer or nonpoly
|
||||
///
|
||||
/// \param asym_id The asym ID
|
||||
/// \param seq_id The sequence ID
|
||||
void remove_residue(const std::string &asym_id, int seq_id, const std::string &auth_seq_id)
|
||||
{
|
||||
remove_residue(get_residue(asym_id, seq_id, auth_seq_id));
|
||||
}
|
||||
|
||||
/// \brief Create a new non-polymer entity, returns new ID
|
||||
/// \param mon_id The mon_id for the new nonpoly, must be an existing and known compound from CCD
|
||||
/// \return The ID of the created entity
|
||||
std::string create_non_poly_entity(const std::string &mon_id);
|
||||
|
||||
/// \brief Create a new NonPolymer struct_asym with atoms constructed from \a atoms, returns asym_id.
|
||||
/// This method assumes you are copying data from one cif file to another.
|
||||
///
|
||||
/// \param entity_id The entity ID of the new nonpoly
|
||||
/// \param atoms The array of atom_site rows containing the data.
|
||||
/// \return The newly create asym ID
|
||||
std::string create_non_poly(const std::string &entity_id, const std::vector<atom> &atoms);
|
||||
|
||||
/// \brief Create a new NonPolymer struct_asym with atoms constructed from info in \a atom_info, returns asym_id.
|
||||
/// This method creates new atom records filled with info from the info.
|
||||
///
|
||||
/// \param entity_id The entity ID of the new nonpoly
|
||||
/// \param atoms The array of sets of item data containing the data for the atoms.
|
||||
/// \return The newly create asym ID
|
||||
std::string create_non_poly(const std::string &entity_id, std::vector<row_initializer> atoms);
|
||||
|
||||
/// \brief Create a new (sugar) branch with one first NAG containing atoms constructed from \a atoms
|
||||
branch &create_branch(std::vector<row_initializer> atoms);
|
||||
|
||||
/// \brief Extend an existing (sugar) branch identified by \a asymID with one sugar containing atoms constructed from \a atom_info
|
||||
///
|
||||
/// \param asym_id The asym id of the branch to extend
|
||||
/// \param atom_info Array containing the info for the atoms to construct for the new sugar
|
||||
/// \param link_sugar The sugar to link to, note: this is the sugar number (1 based)
|
||||
/// \param link_atom The atom id of the atom linked in the sugar
|
||||
branch &extend_branch(const std::string &asym_id, std::vector<row_initializer> atom_info,
|
||||
int link_sugar, const std::string &link_atom);
|
||||
|
||||
/// \brief Remove \a branch
|
||||
void remove_branch(branch &branch);
|
||||
|
||||
/// \brief Remove residue \a res
|
||||
///
|
||||
/// \param res The residue to remove
|
||||
void remove_residue(residue &res);
|
||||
|
||||
/// \brief Translate the coordinates of all atoms in the structure by \a t
|
||||
void translate(point t);
|
||||
|
||||
/// \brief Rotate the coordinates of all atoms in the structure by \a q
|
||||
void rotate(quaternion t);
|
||||
|
||||
/// \brief Translate and rotate the coordinates of all atoms in the structure by \a t and \a q
|
||||
void translate_and_rotate(point t, quaternion q);
|
||||
|
||||
/// \brief Translate, rotate and translate again the coordinates of all atoms in the structure by \a t1 , \a q and \a t2
|
||||
void translate_rotate_and_translate(point t1, quaternion q, point t2);
|
||||
|
||||
void cleanup_empty_categories();
|
||||
|
||||
/// \brief Direct access to underlying data
|
||||
category &get_category(std::string_view name) const
|
||||
{
|
||||
return m_db[name];
|
||||
}
|
||||
|
||||
datablock &get_datablock() const
|
||||
{
|
||||
return m_db;
|
||||
}
|
||||
|
||||
void validate_atoms() const;
|
||||
|
||||
private:
|
||||
friend polymer;
|
||||
friend residue;
|
||||
|
||||
std::string insert_compound(const std::string &compoundID, bool is_entity);
|
||||
|
||||
std::string create_entity_for_branch(branch &branch);
|
||||
|
||||
void load_data();
|
||||
|
||||
void load_atoms_for_model(StructureOpenOptions options);
|
||||
|
||||
template <typename... Args>
|
||||
atom &emplace_atom(Args... args)
|
||||
{
|
||||
return emplace_atom(atom{ std::forward<Args>(args)... });
|
||||
}
|
||||
|
||||
atom &emplace_atom(atom &&atom);
|
||||
|
||||
void remove_atom(atom &a, bool removeFromResidue);
|
||||
void remove_sugar(sugar &sugar);
|
||||
|
||||
datablock &m_db;
|
||||
size_t m_model_nr;
|
||||
std::vector<atom> m_atoms;
|
||||
std::vector<size_t> m_atom_index;
|
||||
std::list<polymer> m_polymers;
|
||||
std::list<branch> m_branches;
|
||||
std::vector<residue> m_non_polymers;
|
||||
};
|
||||
|
||||
} // namespace cif::mm
|
||||
289
include/cif++/parser.hpp
Normal file
289
include/cif++/parser.hpp
Normal file
@@ -0,0 +1,289 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
#include <cif++/row.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class parse_error : public std::runtime_error
|
||||
{
|
||||
public:
|
||||
parse_error(uint32_t line_nr, const std::string &message)
|
||||
: std::runtime_error("parse error at line " + std::to_string(line_nr) + ": " + message)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
// TODO: Need to implement support for transformed long lines
|
||||
|
||||
class sac_parser
|
||||
{
|
||||
public:
|
||||
using datablock_index = std::map<std::string, std::size_t>;
|
||||
|
||||
sac_parser(std::istream &is, bool init = true);
|
||||
|
||||
virtual ~sac_parser() = default;
|
||||
|
||||
enum CharTraitsMask : uint8_t
|
||||
{
|
||||
kOrdinaryMask = 1 << 0,
|
||||
kNonBlankMask = 1 << 1,
|
||||
kTextLeadMask = 1 << 2,
|
||||
kAnyPrintMask = 1 << 3
|
||||
};
|
||||
|
||||
static bool is_white(int ch)
|
||||
{
|
||||
return std::isspace(ch) or ch == '#';
|
||||
}
|
||||
|
||||
static constexpr bool is_ordinary(int ch)
|
||||
{
|
||||
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kOrdinaryMask) != 0;
|
||||
}
|
||||
|
||||
static constexpr bool is_non_blank(int ch)
|
||||
{
|
||||
return ch > 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kNonBlankMask) != 0;
|
||||
}
|
||||
|
||||
static constexpr bool is_text_lead(int ch)
|
||||
{
|
||||
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kTextLeadMask) != 0;
|
||||
}
|
||||
|
||||
static constexpr bool is_any_print(int ch)
|
||||
{
|
||||
return ch == '\t' or
|
||||
(ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
|
||||
}
|
||||
|
||||
static bool is_unquoted_string(std::string_view text)
|
||||
{
|
||||
auto s = text.begin();
|
||||
|
||||
bool result = is_ordinary(*s++);
|
||||
while (result and s != text.end())
|
||||
{
|
||||
result = is_non_blank(*s);
|
||||
++s;
|
||||
}
|
||||
|
||||
// but be careful it does not contain e.g. stop_
|
||||
if (result)
|
||||
{
|
||||
static const std::regex reservedRx(R"((^(?:data|save)|.*(?:loop|stop|global))_.+)", std::regex_constants::icase);
|
||||
result = not std::regex_match(text.begin(), text.end(), reservedRx);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
protected:
|
||||
static constexpr uint8_t kCharTraitsTable[128] = {
|
||||
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
|
||||
14, 15, 14, 14, 14, 15, 15, 14, 15, 15, 15, 15, 15, 15, 15, 15, // 2
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 10, 15, 15, 15, 15, // 3
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 4
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 15, 14, 15, 14, // 5
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 6
|
||||
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, // 7
|
||||
};
|
||||
|
||||
enum class CIFToken
|
||||
{
|
||||
Unknown,
|
||||
|
||||
Eof,
|
||||
|
||||
DATA,
|
||||
LOOP,
|
||||
GLOBAL,
|
||||
SAVE,
|
||||
STOP,
|
||||
Tag,
|
||||
Value
|
||||
};
|
||||
|
||||
static constexpr const char *get_token_name(CIFToken token)
|
||||
{
|
||||
switch (token)
|
||||
{
|
||||
case CIFToken::Unknown: return "Unknown";
|
||||
case CIFToken::Eof: return "Eof";
|
||||
case CIFToken::DATA: return "DATA";
|
||||
case CIFToken::LOOP: return "LOOP";
|
||||
case CIFToken::GLOBAL: return "GLOBAL";
|
||||
case CIFToken::SAVE: return "SAVE";
|
||||
case CIFToken::STOP: return "STOP";
|
||||
case CIFToken::Tag: return "Tag";
|
||||
case CIFToken::Value: return "Value";
|
||||
default: return "Invalid token parameter";
|
||||
}
|
||||
}
|
||||
|
||||
enum class CIFValue
|
||||
{
|
||||
Int,
|
||||
Float,
|
||||
Numeric,
|
||||
String,
|
||||
TextField,
|
||||
Inapplicable,
|
||||
Unknown
|
||||
};
|
||||
|
||||
static constexpr const char *get_value_name(CIFValue type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case CIFValue::Int: return "Int";
|
||||
case CIFValue::Float: return "Float";
|
||||
case CIFValue::Numeric: return "Numeric";
|
||||
case CIFValue::String: return "String";
|
||||
case CIFValue::TextField: return "TextField";
|
||||
case CIFValue::Inapplicable: return "Inapplicable";
|
||||
case CIFValue::Unknown: return "Unknown";
|
||||
default: return "Invalid type parameter";
|
||||
}
|
||||
}
|
||||
|
||||
// get_next_char takes a char from the buffer, or if it is empty
|
||||
// from the istream. This function also does carriage/linefeed
|
||||
// translation.
|
||||
int get_next_char();
|
||||
|
||||
void retract();
|
||||
|
||||
int restart(int start);
|
||||
|
||||
CIFToken get_next_token();
|
||||
|
||||
void match(CIFToken token);
|
||||
|
||||
public:
|
||||
bool parse_single_datablock(const std::string &datablock);
|
||||
|
||||
datablock_index index_datablocks();
|
||||
|
||||
bool parse_single_datablock(const std::string &datablock, const datablock_index &index);
|
||||
|
||||
void parse_file();
|
||||
|
||||
protected:
|
||||
void parse_global();
|
||||
|
||||
void parse_datablock();
|
||||
|
||||
virtual void parse_save_frame();
|
||||
|
||||
void error(const std::string &msg)
|
||||
{
|
||||
throw parse_error(m_line_nr, msg);
|
||||
}
|
||||
|
||||
void warning(const std::string &msg)
|
||||
{
|
||||
std::cerr << "parser warning at line" << m_line_nr << ": " << msg << std::endl;
|
||||
}
|
||||
|
||||
// production methods, these are pure virtual here
|
||||
|
||||
virtual void produce_datablock(const std::string &name) = 0;
|
||||
virtual void produce_category(const std::string &name) = 0;
|
||||
virtual void produce_row() = 0;
|
||||
virtual void produce_item(const std::string &category, const std::string &item, const std::string &value) = 0;
|
||||
|
||||
protected:
|
||||
enum State
|
||||
{
|
||||
Start,
|
||||
White,
|
||||
Esc,
|
||||
Comment,
|
||||
QuestionMark,
|
||||
Dot,
|
||||
QuotedString,
|
||||
QuotedStringQuote,
|
||||
UnquotedString,
|
||||
Tag,
|
||||
TextField,
|
||||
Float = 100,
|
||||
Int = 110,
|
||||
Value = 300,
|
||||
DATA,
|
||||
SAVE
|
||||
};
|
||||
|
||||
std::istream &m_source;
|
||||
|
||||
// Parser state
|
||||
bool m_validate;
|
||||
uint32_t m_line_nr;
|
||||
bool m_bol;
|
||||
CIFToken m_lookahead;
|
||||
std::string m_token_value;
|
||||
CIFValue mTokenType;
|
||||
std::stack<int> m_buffer;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class parser : public sac_parser
|
||||
{
|
||||
public:
|
||||
parser(std::istream &is, file &file)
|
||||
: sac_parser(is)
|
||||
, m_file(file)
|
||||
{
|
||||
}
|
||||
|
||||
void produce_datablock(const std::string &name) override;
|
||||
|
||||
void produce_category(const std::string &name) override;
|
||||
|
||||
void produce_row() override;
|
||||
|
||||
void produce_item(const std::string &category, const std::string &item, const std::string &value) override;
|
||||
|
||||
protected:
|
||||
file &m_file;
|
||||
datablock *m_datablock = nullptr;
|
||||
category *m_category = nullptr;
|
||||
row_handle m_row;
|
||||
};
|
||||
|
||||
} // namespace cif
|
||||
@@ -1,17 +1,17 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
@@ -26,14 +26,17 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cif++/Cif++.hpp"
|
||||
#include <cif++.hpp>
|
||||
|
||||
void WritePDBFile(std::ostream& pdbFile, cif::File& cifFile);
|
||||
namespace cif::pdb
|
||||
{
|
||||
|
||||
/// \brief Just the HEADER, COMPND, SOURCE and AUTHOR lines
|
||||
void WritePDBHeaderLines(std::ostream& os, cif::File& cifFile);
|
||||
void write_header_lines(std::ostream &os, const datablock &data);
|
||||
|
||||
std::string GetPDBHEADERLine(cif::File& cifFile, std::string::size_type truncate_at = 127);
|
||||
std::string GetPDBCOMPNDLine(cif::File& cifFile, std::string::size_type truncate_at = 127);
|
||||
std::string GetPDBSOURCELine(cif::File& cifFile, std::string::size_type truncate_at = 127);
|
||||
std::string GetPDBAUTHORLine(cif::File& cifFile, std::string::size_type truncate_at = 127);
|
||||
std::string get_HEADER_line(const datablock &data, std::string::size_type truncate_at = 127);
|
||||
std::string get_COMPND_line(const datablock &data, std::string::size_type truncate_at = 127);
|
||||
std::string get_SOURCE_line(const datablock &data, std::string::size_type truncate_at = 127);
|
||||
std::string get_AUTHOR_line(const datablock &data, std::string::size_type truncate_at = 127);
|
||||
|
||||
} // namespace pdbx
|
||||
44
include/cif++/pdb/io.hpp
Normal file
44
include/cif++/pdb/io.hpp
Normal file
@@ -0,0 +1,44 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++.hpp>
|
||||
|
||||
namespace cif::pdb
|
||||
{
|
||||
|
||||
file read(std::istream &is);
|
||||
file read(const std::filesystem::path &file);
|
||||
|
||||
void write(std::ostream &os, const datablock &db);
|
||||
|
||||
inline void write(std::ostream &os, const file &f)
|
||||
{
|
||||
write(os, f.front());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -1,17 +1,17 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
@@ -26,7 +26,10 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cif++/Cif++.hpp"
|
||||
#include <cif++.hpp>
|
||||
|
||||
namespace cif::pdb
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
@@ -57,4 +60,6 @@ struct PDBRecord
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void ReadPDBFile(std::istream &pdbFile, cif::File &cifFile);
|
||||
void ReadPDBFile(std::istream &pdbFile, file &cifFile);
|
||||
|
||||
} // namespace pdbx
|
||||
@@ -1,17 +1,17 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
@@ -26,10 +26,13 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "cif++/PDB2Cif.hpp"
|
||||
#include <cif++/pdb/pdb2cif.hpp>
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
namespace cif::pdb
|
||||
{
|
||||
|
||||
struct TemplateLine;
|
||||
|
||||
class Remark3Parser
|
||||
@@ -37,37 +40,36 @@ class Remark3Parser
|
||||
public:
|
||||
virtual ~Remark3Parser() {}
|
||||
|
||||
static bool parse(const std::string& expMethod, PDBRecord* r, cif::Datablock& db);
|
||||
static bool parse(const std::string &expMethod, PDBRecord *r, cif::datablock &db);
|
||||
|
||||
virtual std::string program();
|
||||
virtual std::string version();
|
||||
|
||||
protected:
|
||||
|
||||
Remark3Parser(const std::string& name, const std::string& expMethod, PDBRecord* r, cif::Datablock& db,
|
||||
const TemplateLine templatelines[], uint32_t templateLineCount, std::regex programVersion);
|
||||
Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db,
|
||||
const TemplateLine templatelines[], uint32_t templateLineCount, std::regex programVersion);
|
||||
|
||||
virtual float parse();
|
||||
std::string nextLine();
|
||||
|
||||
bool match(const char* expr, int nextState);
|
||||
void storeCapture(const char* category, std::initializer_list<const char*> items, bool createNew = false);
|
||||
void storeRefineLsRestr(const char* type, std::initializer_list<const char*> values);
|
||||
void updateRefineLsRestr(const char* type, std::initializer_list<const char*> values);
|
||||
bool match(const char *expr, int nextState);
|
||||
void storeCapture(const char *category, std::initializer_list<const char *> items, bool createNew = false);
|
||||
void storeRefineLsRestr(const char *type, std::initializer_list<const char *> values);
|
||||
void updateRefineLsRestr(const char *type, std::initializer_list<const char *> values);
|
||||
|
||||
virtual void fixup() {}
|
||||
|
||||
std::string mName;
|
||||
std::string mExpMethod;
|
||||
PDBRecord* mRec;
|
||||
cif::Datablock mDb;
|
||||
std::string mLine;
|
||||
std::smatch mM;
|
||||
uint32_t mState;
|
||||
std::string mName;
|
||||
std::string mExpMethod;
|
||||
PDBRecord *mRec;
|
||||
cif::datablock mDb;
|
||||
std::string mLine;
|
||||
std::smatch mM;
|
||||
uint32_t mState;
|
||||
|
||||
const TemplateLine* mTemplate;
|
||||
uint32_t mTemplateCount;
|
||||
std::regex mProgramVersion;
|
||||
const TemplateLine *mTemplate;
|
||||
uint32_t mTemplateCount;
|
||||
std::regex mProgramVersion;
|
||||
};
|
||||
|
||||
|
||||
} // namespace pdbx
|
||||
@@ -1,17 +1,17 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
@@ -26,32 +26,30 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#include "cif++/Cif++.hpp"
|
||||
#include <cif++.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
|
||||
extern const int
|
||||
kResidueNrWildcard,
|
||||
kNoSeqNum;
|
||||
|
||||
struct TLSSelection;
|
||||
typedef std::unique_ptr<TLSSelection> TLSSelectionPtr;
|
||||
struct tls_selection;
|
||||
struct tls_residue;
|
||||
|
||||
struct TLSResidue;
|
||||
|
||||
struct TLSSelection
|
||||
struct tls_selection
|
||||
{
|
||||
virtual ~TLSSelection() {}
|
||||
virtual void CollectResidues(cif::Datablock& db, std::vector<TLSResidue>& residues, std::size_t indentLevel = 0) const = 0;
|
||||
std::vector<std::tuple<std::string,int,int>> GetRanges(cif::Datablock& db, bool pdbNamespace) const;
|
||||
virtual ~tls_selection() {}
|
||||
virtual void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, std::size_t indentLevel = 0) const = 0;
|
||||
std::vector<std::tuple<std::string, int, int>> get_ranges(cif::datablock &db, bool pdbNamespace) const;
|
||||
};
|
||||
|
||||
// Low level: get the selections
|
||||
TLSSelectionPtr ParseSelectionDetails(const std::string& program, const std::string& selection);
|
||||
std::unique_ptr<tls_selection> parse_tls_selection_details(const std::string &program, const std::string &selection);
|
||||
|
||||
}
|
||||
} // namespace cif
|
||||
736
include/cif++/point.hpp
Normal file
736
include/cif++/point.hpp
Normal file
@@ -0,0 +1,736 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
#include <functional>
|
||||
#include <valarray>
|
||||
|
||||
#if __has_include(<clipper/core/coords.h>)
|
||||
#define HAVE_LIBCLIPPER 1
|
||||
#include <clipper/core/coords.h>
|
||||
#endif
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
const double
|
||||
kPI = 3.141592653589793238462643383279502884;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// A stripped down quaternion implementation, based on boost::math::quaternion
|
||||
// We use quaternions to do rotations in 3d space
|
||||
|
||||
template <typename T>
|
||||
class quaternion_type
|
||||
{
|
||||
public:
|
||||
using value_type = T;
|
||||
|
||||
constexpr explicit quaternion_type(value_type const &value_a = value_type(), value_type const &value_b = value_type(), value_type const &value_c = value_type(), value_type const &value_d = value_type())
|
||||
: a(value_a)
|
||||
, b(value_b)
|
||||
, c(value_c)
|
||||
, d(value_d)
|
||||
{
|
||||
}
|
||||
|
||||
constexpr explicit quaternion_type(std::complex<value_type> const &z0, std::complex<value_type> const &z1 = std::complex<value_type>())
|
||||
: a(z0.real())
|
||||
, b(z0.imag())
|
||||
, c(z1.real())
|
||||
, d(z1.imag())
|
||||
{
|
||||
}
|
||||
|
||||
constexpr quaternion_type(quaternion_type const &) = default;
|
||||
constexpr quaternion_type(quaternion_type &&) = default;
|
||||
|
||||
template <typename X>
|
||||
constexpr explicit quaternion_type(quaternion_type<X> const &rhs)
|
||||
: a(static_cast<value_type>(rhs.a))
|
||||
, b(static_cast<value_type>(rhs.b))
|
||||
, c(static_cast<value_type>(rhs.c))
|
||||
, d(static_cast<value_type>(rhs.d))
|
||||
{
|
||||
}
|
||||
|
||||
// accessors
|
||||
//
|
||||
// Note: Like complex number, quaternions do have a meaningful notion of "real part",
|
||||
// but unlike them there is no meaningful notion of "imaginary part".
|
||||
// Instead there is an "unreal part" which itself is a quaternion, and usually
|
||||
// nothing simpler (as opposed to the complex number case).
|
||||
// However, for practicality, there are accessors for the other components
|
||||
// (these are necessary for the templated copy constructor, for instance).
|
||||
|
||||
constexpr value_type real() const
|
||||
{
|
||||
return a;
|
||||
}
|
||||
|
||||
constexpr quaternion_type unreal() const
|
||||
{
|
||||
return { 0, b, c, d };
|
||||
}
|
||||
|
||||
constexpr void swap(quaternion_type &o)
|
||||
{
|
||||
std::swap(a, o.a);
|
||||
std::swap(b, o.b);
|
||||
std::swap(c, o.c);
|
||||
std::swap(d, o.d);
|
||||
}
|
||||
|
||||
// assignment operators
|
||||
|
||||
template <typename X>
|
||||
constexpr quaternion_type &operator=(quaternion_type<X> const &rhs)
|
||||
{
|
||||
a = static_cast<value_type>(rhs.a);
|
||||
b = static_cast<value_type>(rhs.b);
|
||||
c = static_cast<value_type>(rhs.c);
|
||||
d = static_cast<value_type>(rhs.d);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator=(quaternion_type const &rhs)
|
||||
{
|
||||
a = rhs.a;
|
||||
b = rhs.b;
|
||||
c = rhs.c;
|
||||
d = rhs.d;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator=(value_type const &rhs)
|
||||
{
|
||||
a = rhs;
|
||||
|
||||
b = c = d = static_cast<value_type>(0);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator=(std::complex<value_type> const &rhs)
|
||||
{
|
||||
a = rhs.real();
|
||||
b = rhs.imag();
|
||||
|
||||
c = d = static_cast<value_type>(0);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
// other assignment-related operators
|
||||
//
|
||||
// NOTE: Quaternion multiplication is *NOT* commutative;
|
||||
// symbolically, "q *= rhs;" means "q = q * rhs;"
|
||||
// and "q /= rhs;" means "q = q * inverse_of(rhs);"
|
||||
|
||||
constexpr quaternion_type &operator+=(value_type const &rhs)
|
||||
{
|
||||
a += rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator+=(std::complex<value_type> const &rhs)
|
||||
{
|
||||
a += std::real(rhs);
|
||||
b += std::imag(rhs);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <class X>
|
||||
constexpr quaternion_type &operator+=(quaternion_type<X> const &rhs)
|
||||
{
|
||||
a += rhs.a;
|
||||
b += rhs.b;
|
||||
c += rhs.c;
|
||||
d += rhs.d;
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator-=(value_type const &rhs)
|
||||
{
|
||||
a -= rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator-=(std::complex<value_type> const &rhs)
|
||||
{
|
||||
a -= std::real(rhs);
|
||||
b -= std::imag(rhs);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <class X>
|
||||
constexpr quaternion_type &operator-=(quaternion_type<X> const &rhs)
|
||||
{
|
||||
a -= rhs.a;
|
||||
b -= rhs.b;
|
||||
c -= rhs.c;
|
||||
d -= rhs.d;
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator*=(value_type const &rhs)
|
||||
{
|
||||
a *= rhs;
|
||||
b *= rhs;
|
||||
c *= rhs;
|
||||
d *= rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator*=(std::complex<value_type> const &rhs)
|
||||
{
|
||||
value_type ar = rhs.real();
|
||||
value_type br = rhs.imag();
|
||||
quaternion_type result(a * ar - b * br, a * br + b * ar, c * ar + d * br, -c * br + d * ar);
|
||||
swap(result);
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr friend quaternion_type operator*(const quaternion_type &a, const quaternion_type &b)
|
||||
{
|
||||
auto result = a;
|
||||
result *= b;
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename X>
|
||||
constexpr quaternion_type &operator*=(quaternion_type<X> const &rhs)
|
||||
{
|
||||
value_type ar = static_cast<value_type>(rhs.a);
|
||||
value_type br = static_cast<value_type>(rhs.b);
|
||||
value_type cr = static_cast<value_type>(rhs.c);
|
||||
value_type dr = static_cast<value_type>(rhs.d);
|
||||
|
||||
quaternion_type result(a * ar - b * br - c * cr - d * dr, a * br + b * ar + c * dr - d * cr, a * cr - b * dr + c * ar + d * br, a * dr + b * cr - c * br + d * ar);
|
||||
swap(result);
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator/=(value_type const &rhs)
|
||||
{
|
||||
a /= rhs;
|
||||
b /= rhs;
|
||||
c /= rhs;
|
||||
d /= rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr quaternion_type &operator/=(std::complex<value_type> const &rhs)
|
||||
{
|
||||
value_type ar = rhs.real();
|
||||
value_type br = rhs.imag();
|
||||
value_type denominator = ar * ar + br * br;
|
||||
quaternion_type result((+a * ar + b * br) / denominator, (-a * br + b * ar) / denominator, (+c * ar - d * br) / denominator, (+c * br + d * ar) / denominator);
|
||||
swap(result);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename X>
|
||||
constexpr quaternion_type &operator/=(quaternion_type<X> const &rhs)
|
||||
{
|
||||
value_type ar = static_cast<value_type>(rhs.a);
|
||||
value_type br = static_cast<value_type>(rhs.b);
|
||||
value_type cr = static_cast<value_type>(rhs.c);
|
||||
value_type dr = static_cast<value_type>(rhs.d);
|
||||
|
||||
value_type denominator = ar * ar + br * br + cr * cr + dr * dr;
|
||||
quaternion_type result((+a * ar + b * br + c * cr + d * dr) / denominator, (-a * br + b * ar - c * dr + d * cr) / denominator, (-a * cr + b * dr + c * ar - d * br) / denominator, (-a * dr - b * cr + c * br + d * ar) / denominator);
|
||||
swap(result);
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr friend quaternion_type normalize(quaternion_type q)
|
||||
{
|
||||
std::valarray<value_type> t(4);
|
||||
|
||||
t[0] = q.a;
|
||||
t[1] = q.b;
|
||||
t[2] = q.c;
|
||||
t[3] = q.d;
|
||||
|
||||
t *= t;
|
||||
|
||||
value_type length = std::sqrt(t.sum());
|
||||
|
||||
if (length > 0.001)
|
||||
q /= static_cast<value_type>(length);
|
||||
else
|
||||
q = quaternion_type(1, 0, 0, 0);
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
constexpr friend quaternion_type conj(quaternion_type q)
|
||||
{
|
||||
return quaternion_type{ +q.a, -q.b, -q.c, -q.d };
|
||||
}
|
||||
|
||||
constexpr value_type get_a() const { return a; }
|
||||
constexpr value_type get_b() const { return b; }
|
||||
constexpr value_type get_c() const { return c; }
|
||||
constexpr value_type get_d() const { return d; }
|
||||
|
||||
private:
|
||||
value_type a, b, c, d;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline quaternion_type<T> spherical(T const &rho, T const &theta, T const &phi1, T const &phi2)
|
||||
{
|
||||
T cos_phi1 = std::cos(phi1);
|
||||
T cos_phi2 = std::cos(phi2);
|
||||
|
||||
T a = std::cos(theta) * cos_phi1 * cos_phi2;
|
||||
T b = std::sin(theta) * cos_phi1 * cos_phi2;
|
||||
T c = std::sin(phi1) * cos_phi2;
|
||||
T d = std::sin(phi2);
|
||||
|
||||
quaternion_type result(a, b, c, d);
|
||||
result *= rho;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
using quaternion = quaternion_type<float>;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
// point, a location with x, y and z coordinates as floating point.
|
||||
// This one is derived from a tuple<float,float,float> so
|
||||
// you can do things like:
|
||||
//
|
||||
// float x, y, z;
|
||||
// tie(x, y, z) = atom.loc();
|
||||
|
||||
template <typename F>
|
||||
struct point_type
|
||||
{
|
||||
using value_type = F;
|
||||
|
||||
value_type m_x, m_y, m_z;
|
||||
|
||||
constexpr point_type()
|
||||
: m_x(0)
|
||||
, m_y(0)
|
||||
, m_z(0)
|
||||
{
|
||||
}
|
||||
|
||||
constexpr point_type(value_type x, value_type y, value_type z)
|
||||
: m_x(x)
|
||||
, m_y(y)
|
||||
, m_z(z)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename PF>
|
||||
constexpr point_type(const point_type<PF> &pt)
|
||||
: m_x(static_cast<F>(pt.m_x))
|
||||
, m_y(static_cast<F>(pt.m_y))
|
||||
, m_z(static_cast<F>(pt.m_z))
|
||||
{
|
||||
}
|
||||
|
||||
constexpr point_type(const std::tuple<value_type, value_type, value_type> &pt)
|
||||
: point_type(std::get<0>(pt), std::get<1>(pt), std::get<2>(pt))
|
||||
{
|
||||
}
|
||||
|
||||
#if HAVE_LIBCLIPPER
|
||||
constexpr point_type(const clipper::Coord_orth &pt)
|
||||
: m_x(pt[0])
|
||||
, m_y(pt[1])
|
||||
, m_z(pt[2])
|
||||
{
|
||||
}
|
||||
|
||||
constexpr point_type &operator=(const clipper::Coord_orth &rhs)
|
||||
{
|
||||
m_x = rhs[0];
|
||||
m_y = rhs[1];
|
||||
m_z = rhs[2];
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
template <typename PF>
|
||||
constexpr point_type &operator=(const point_type<PF> &rhs)
|
||||
{
|
||||
m_x = static_cast<F>(rhs.m_x);
|
||||
m_y = static_cast<F>(rhs.m_y);
|
||||
m_z = static_cast<F>(rhs.m_z);
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr value_type &get_x() { return m_x; }
|
||||
constexpr value_type get_x() const { return m_x; }
|
||||
constexpr void set_x(value_type x) { m_x = x; }
|
||||
|
||||
constexpr value_type &get_y() { return m_y; }
|
||||
constexpr value_type get_y() const { return m_y; }
|
||||
constexpr void set_y(value_type y) { m_y = y; }
|
||||
|
||||
constexpr value_type &get_z() { return m_z; }
|
||||
constexpr value_type get_z() const { return m_z; }
|
||||
constexpr void set_z(value_type z) { m_z = z; }
|
||||
|
||||
constexpr point_type &operator+=(const point_type &rhs)
|
||||
{
|
||||
m_x += rhs.m_x;
|
||||
m_y += rhs.m_y;
|
||||
m_z += rhs.m_z;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr point_type &operator+=(value_type d)
|
||||
{
|
||||
m_x += d;
|
||||
m_y += d;
|
||||
m_z += d;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr point_type &operator-=(const point_type &rhs)
|
||||
{
|
||||
m_x -= rhs.m_x;
|
||||
m_y -= rhs.m_y;
|
||||
m_z -= rhs.m_z;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr point_type &operator-=(value_type d)
|
||||
{
|
||||
m_x -= d;
|
||||
m_y -= d;
|
||||
m_z -= d;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr point_type &operator*=(value_type rhs)
|
||||
{
|
||||
m_x *= rhs;
|
||||
m_y *= rhs;
|
||||
m_z *= rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr point_type &operator/=(value_type rhs)
|
||||
{
|
||||
m_x /= rhs;
|
||||
m_y /= rhs;
|
||||
m_z /= rhs;
|
||||
return *this;
|
||||
}
|
||||
|
||||
constexpr value_type normalize()
|
||||
{
|
||||
auto length = m_x * m_x + m_y * m_y + m_z * m_z;
|
||||
if (length > 0)
|
||||
{
|
||||
length = std::sqrt(length);
|
||||
operator/=(length);
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
constexpr void rotate(const quaternion &q)
|
||||
{
|
||||
quaternion_type<value_type> p(0, m_x, m_y, m_z);
|
||||
|
||||
p = q * p * conj(q);
|
||||
|
||||
m_x = p.get_b();
|
||||
m_y = p.get_c();
|
||||
m_z = p.get_d();
|
||||
}
|
||||
|
||||
#if HAVE_LIBCLIPPER
|
||||
operator clipper::Coord_orth() const
|
||||
{
|
||||
return clipper::Coord_orth(m_x, m_y, m_z);
|
||||
}
|
||||
#endif
|
||||
|
||||
constexpr operator std::tuple<const value_type &, const value_type &, const value_type &>() const
|
||||
{
|
||||
return std::make_tuple(std::ref(m_x), std::ref(m_y), std::ref(m_z));
|
||||
}
|
||||
|
||||
constexpr operator std::tuple<value_type &, value_type &, value_type &>()
|
||||
{
|
||||
return std::make_tuple(std::ref(m_x), std::ref(m_y), std::ref(m_z));
|
||||
}
|
||||
|
||||
constexpr bool operator==(const point_type &rhs) const
|
||||
{
|
||||
return m_x == rhs.m_x and m_y == rhs.m_y and m_z == rhs.m_z;
|
||||
}
|
||||
|
||||
// consider point as a vector... perhaps I should rename point?
|
||||
constexpr value_type length_sq() const
|
||||
{
|
||||
return m_x * m_x + m_y * m_y + m_z * m_z;
|
||||
}
|
||||
|
||||
constexpr value_type length() const
|
||||
{
|
||||
return std::sqrt(m_x * m_x + m_y * m_y + m_z * m_z);
|
||||
}
|
||||
};
|
||||
|
||||
using point = point_type<float>;
|
||||
|
||||
template <typename F>
|
||||
inline constexpr std::ostream &operator<<(std::ostream &os, const point_type<F> &pt)
|
||||
{
|
||||
os << '(' << pt.m_x << ',' << pt.m_y << ',' << pt.m_z << ')';
|
||||
return os;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline constexpr point_type<F> operator+(const point_type<F> &lhs, const point_type<F> &rhs)
|
||||
{
|
||||
return point_type<F>(lhs.m_x + rhs.m_x, lhs.m_y + rhs.m_y, lhs.m_z + rhs.m_z);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline constexpr point_type<F> operator-(const point_type<F> &lhs, const point_type<F> &rhs)
|
||||
{
|
||||
return point_type<F>(lhs.m_x - rhs.m_x, lhs.m_y - rhs.m_y, lhs.m_z - rhs.m_z);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline constexpr point_type<F> operator-(const point_type<F> &pt)
|
||||
{
|
||||
return point_type<F>(-pt.m_x, -pt.m_y, -pt.m_z);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline constexpr point_type<F> operator*(const point_type<F> &pt, F f)
|
||||
{
|
||||
return point_type<F>(pt.m_x * f, pt.m_y * f, pt.m_z * f);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline constexpr point_type<F> operator*(F f, const point_type<F> &pt)
|
||||
{
|
||||
return point_type<F>(pt.m_x * f, pt.m_y * f, pt.m_z * f);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline constexpr point_type<F> operator/(const point_type<F> &pt, F f)
|
||||
{
|
||||
return point_type<F>(pt.m_x / f, pt.m_y / f, pt.m_z / f);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// several standard 3d operations
|
||||
|
||||
template <typename F>
|
||||
inline constexpr auto distance_squared(const point_type<F> &a, const point_type<F> &b)
|
||||
{
|
||||
return (a.m_x - b.m_x) * (a.m_x - b.m_x) +
|
||||
(a.m_y - b.m_y) * (a.m_y - b.m_y) +
|
||||
(a.m_z - b.m_z) * (a.m_z - b.m_z);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline constexpr auto distance(const point_type<F> &a, const point_type<F> &b)
|
||||
{
|
||||
return std::sqrt(
|
||||
(a.m_x - b.m_x) * (a.m_x - b.m_x) +
|
||||
(a.m_y - b.m_y) * (a.m_y - b.m_y) +
|
||||
(a.m_z - b.m_z) * (a.m_z - b.m_z));
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline constexpr auto dot_product(const point_type<F> &a, const point_type<F> &b)
|
||||
{
|
||||
return a.m_x * b.m_x + a.m_y * b.m_y + a.m_z * b.m_z;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
inline constexpr point_type<F> cross_product(const point_type<F> &a, const point_type<F> &b)
|
||||
{
|
||||
return point_type<F>(a.m_y * b.m_z - b.m_y * a.m_z,
|
||||
a.m_z * b.m_x - b.m_z * a.m_x,
|
||||
a.m_x * b.m_y - b.m_x * a.m_y);
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
constexpr auto angle(const point_type<F> &p1, const point_type<F> &p2, const point_type<F> &p3)
|
||||
{
|
||||
point_type<F> v1 = p1 - p2;
|
||||
point_type<F> v2 = p3 - p2;
|
||||
|
||||
return std::acos(dot_product(v1, v2) / (v1.length() * v2.length())) * 180 / kPI;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
constexpr auto dihedral_angle(const point_type<F> &p1, const point_type<F> &p2, const point_type<F> &p3, const point_type<F> &p4)
|
||||
{
|
||||
point_type<F> v12 = p1 - p2; // vector from p2 to p1
|
||||
point_type<F> v43 = p4 - p3; // vector from p3 to p4
|
||||
|
||||
point_type<F> z = p2 - p3; // vector from p3 to p2
|
||||
|
||||
point_type<F> p = cross_product(z, v12);
|
||||
point_type<F> x = cross_product(z, v43);
|
||||
point_type<F> y = cross_product(z, x);
|
||||
|
||||
auto u = dot_product(x, x);
|
||||
auto v = dot_product(y, y);
|
||||
|
||||
F result = 360;
|
||||
if (u > 0 and v > 0)
|
||||
{
|
||||
u = dot_product(p, x) / std::sqrt(u);
|
||||
v = dot_product(p, y) / std::sqrt(v);
|
||||
if (u != 0 or v != 0)
|
||||
result = std::atan2(v, u) * static_cast<F>(180 / kPI);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
constexpr auto cosinus_angle(const point_type<F> &p1, const point_type<F> &p2, const point_type<F> &p3, const point_type<F> &p4)
|
||||
{
|
||||
point_type<F> v12 = p1 - p2;
|
||||
point_type<F> v34 = p3 - p4;
|
||||
|
||||
auto x = dot_product(v12, v12) * dot_product(v34, v34);
|
||||
|
||||
return x > 0 ? dot_product(v12, v34) / std::sqrt(x) : 0;
|
||||
}
|
||||
|
||||
template <typename F>
|
||||
constexpr auto distance_point_to_line(const point_type<F> &l1, const point_type<F> &l2, const point_type<F> &p)
|
||||
{
|
||||
auto line = l2 - l1;
|
||||
auto p_to_l1 = p - l1;
|
||||
auto p_to_l2 = p - l2;
|
||||
auto cross = cross_product(p_to_l1, p_to_l2);
|
||||
return cross.length() / line.length();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// For e.g. simulated annealing, returns a new point that is moved in
|
||||
// a random direction with a distance randomly chosen from a normal
|
||||
// distribution with a stddev of offset.
|
||||
|
||||
point nudge(point p, float offset);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
quaternion construct_from_angle_axis(float angle, point axis);
|
||||
std::tuple<double, point> quaternion_to_angle_axis(quaternion q);
|
||||
|
||||
point centroid(const std::vector<point> &Points);
|
||||
point center_points(std::vector<point> &Points);
|
||||
|
||||
/// \brief Returns how the two sets of points \a a and \b b can be aligned
|
||||
///
|
||||
/// \param a The first set of points
|
||||
/// \param b The second set of points
|
||||
/// \result The quaternion which should be applied to the points in \a a to
|
||||
/// obtain the best superposition.
|
||||
quaternion align_points(const std::vector<point> &a, const std::vector<point> &b);
|
||||
|
||||
/// \brief The RMSd for the points in \a a and \a b
|
||||
double RMSd(const std::vector<point> &a, const std::vector<point> &b);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Helper class to generate evenly divided points on a sphere
|
||||
// we use a fibonacci sphere to calculate even distribution of the dots
|
||||
|
||||
template <int N>
|
||||
class spherical_dots
|
||||
{
|
||||
public:
|
||||
|
||||
constexpr static int P = 2 * N * 1;
|
||||
|
||||
using array_type = typename std::array<point, P>;
|
||||
using iterator = typename array_type::const_iterator;
|
||||
|
||||
static spherical_dots &instance()
|
||||
{
|
||||
static spherical_dots sInstance;
|
||||
return sInstance;
|
||||
}
|
||||
|
||||
size_t size() const { return m_points.size(); }
|
||||
const point operator[](uint32_t inIx) const { return m_points[inIx]; }
|
||||
iterator begin() const { return m_points.begin(); }
|
||||
iterator end() const { return m_points.end(); }
|
||||
|
||||
double weight() const { return m_weight; }
|
||||
|
||||
spherical_dots()
|
||||
{
|
||||
const double
|
||||
kGoldenRatio = (1 + std::sqrt(5.0)) / 2;
|
||||
|
||||
m_weight = (4 * kPI) / P;
|
||||
|
||||
auto p = m_points.begin();
|
||||
|
||||
for (int32_t i = -N; i <= N; ++i)
|
||||
{
|
||||
double lat = std::asin((2.0 * i) / P);
|
||||
double lon = std::fmod(i, kGoldenRatio) * 2 * kPI / kGoldenRatio;
|
||||
|
||||
p->m_x = std::sin(lon) * std::cos(lat);
|
||||
p->m_y = std::cos(lon) * std::cos(lat);
|
||||
p->m_z = std::sin(lat);
|
||||
|
||||
++p;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
array_type m_points;
|
||||
double m_weight;
|
||||
};
|
||||
|
||||
} // namespace cif
|
||||
305
include/cif++/row.hpp
Normal file
305
include/cif++/row.hpp
Normal file
@@ -0,0 +1,305 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cif++/item.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
// some helper classes to help create tuple result types
|
||||
template <typename... C>
|
||||
struct get_row_result
|
||||
{
|
||||
static constexpr size_t N = sizeof...(C);
|
||||
|
||||
get_row_result(const row_handle &r, std::array<size_t, N> &&columns)
|
||||
: m_row(r)
|
||||
, m_columns(std::move(columns))
|
||||
{
|
||||
}
|
||||
|
||||
const item_handle operator[](size_t ix) const
|
||||
{
|
||||
return m_row[m_columns[ix]];
|
||||
}
|
||||
|
||||
template <typename... Ts, std::enable_if_t<N == sizeof...(Ts), int> = 0>
|
||||
operator std::tuple<Ts...>() const
|
||||
{
|
||||
return get<Ts...>(std::index_sequence_for<Ts...>{});
|
||||
}
|
||||
|
||||
template <typename... Ts, std::size_t... Is>
|
||||
std::tuple<Ts...> get(std::index_sequence<Is...>) const
|
||||
{
|
||||
return std::tuple<Ts...>{ m_row[m_columns[Is]].template as<Ts>()... };
|
||||
}
|
||||
|
||||
const row_handle &m_row;
|
||||
std::array<size_t, N> m_columns;
|
||||
};
|
||||
|
||||
// we want to be able to tie some variables to a get_row_result, for this we use tiewraps
|
||||
template <typename... Ts>
|
||||
struct tie_wrap
|
||||
{
|
||||
tie_wrap(Ts... args)
|
||||
: m_value(args...)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename RR>
|
||||
void operator=(const RR &&rr)
|
||||
{
|
||||
// get_row_result will do the conversion, but only if the types
|
||||
// are compatible. That means the number of parameters to the get()
|
||||
// of the row should be equal to the number of items in the tuple
|
||||
// you are trying to tie.
|
||||
|
||||
using RType = std::tuple<typename std::remove_reference<Ts>::type...>;
|
||||
|
||||
m_value = static_cast<RType>(rr);
|
||||
}
|
||||
|
||||
std::tuple<Ts...> m_value;
|
||||
};
|
||||
|
||||
} // namespace detail
|
||||
|
||||
template <typename... Ts>
|
||||
auto tie(Ts &...v)
|
||||
{
|
||||
return detail::tie_wrap<Ts &...>(std::forward<Ts &>(v)...);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
/// \brief the row class, this one is not directly accessible from the outside
|
||||
|
||||
class row : public std::vector<item_value>
|
||||
{
|
||||
public:
|
||||
row() = default;
|
||||
|
||||
item_value* get(size_t ix)
|
||||
{
|
||||
return ix < size() ? &at(ix) : nullptr;
|
||||
}
|
||||
|
||||
const item_value* get(size_t ix) const
|
||||
{
|
||||
return ix < size() ? &at(ix) : nullptr;
|
||||
}
|
||||
|
||||
private:
|
||||
friend class category;
|
||||
friend class category_index;
|
||||
|
||||
template <typename, typename...>
|
||||
friend class iterator_impl;
|
||||
|
||||
void append(size_t ix, item_value &&iv)
|
||||
{
|
||||
if (ix >= size())
|
||||
resize(ix + 1);
|
||||
|
||||
at(ix) = std::move(iv);
|
||||
}
|
||||
|
||||
void remove(size_t ix)
|
||||
{
|
||||
if (ix < size())
|
||||
at(ix) = item_value{};
|
||||
}
|
||||
|
||||
row *m_next = nullptr;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
/// \brief row_handle is the way to access data stored in rows
|
||||
|
||||
class row_handle
|
||||
{
|
||||
public:
|
||||
friend class item_handle;
|
||||
friend class category;
|
||||
friend class category_index;
|
||||
friend class row_initializer;
|
||||
|
||||
row_handle() = default;
|
||||
|
||||
row_handle(const row_handle &) = default;
|
||||
row_handle(row_handle &&) = default;
|
||||
|
||||
row_handle &operator=(const row_handle &) = default;
|
||||
row_handle &operator=(row_handle &&) = default;
|
||||
|
||||
row_handle(const category &cat, const row &r)
|
||||
: m_category(const_cast<category *>(&cat))
|
||||
, m_row(const_cast<row *>(&r))
|
||||
{
|
||||
}
|
||||
|
||||
const category &get_category() const
|
||||
{
|
||||
return *m_category;
|
||||
}
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
return m_category == nullptr or m_row == nullptr;
|
||||
}
|
||||
|
||||
explicit operator bool() const
|
||||
{
|
||||
return not empty();
|
||||
}
|
||||
|
||||
item_handle operator[](uint32_t column_ix)
|
||||
{
|
||||
return empty() ? item_handle::s_null_item : item_handle(column_ix, *this);
|
||||
}
|
||||
|
||||
const item_handle operator[](uint32_t column_ix) const
|
||||
{
|
||||
return empty() ? item_handle::s_null_item : item_handle(column_ix, const_cast<row_handle &>(*this));
|
||||
}
|
||||
|
||||
item_handle operator[](std::string_view column_name)
|
||||
{
|
||||
return empty() ? item_handle::s_null_item : item_handle(add_column(column_name), *this);
|
||||
}
|
||||
|
||||
const item_handle operator[](std::string_view column_name) const
|
||||
{
|
||||
return empty() ? item_handle::s_null_item : item_handle(get_column_ix(column_name), const_cast<row_handle &>(*this));
|
||||
}
|
||||
|
||||
template <typename... C>
|
||||
auto get(C... columns) const
|
||||
{
|
||||
return detail::get_row_result<C...>(*this, { get_column_ix(columns)... });
|
||||
}
|
||||
|
||||
template <typename... Ts, typename... C, std::enable_if_t<sizeof...(Ts) == sizeof...(C), int> = 0>
|
||||
std::tuple<Ts...> get(C... columns) const
|
||||
{
|
||||
return detail::get_row_result<Ts...>(*this, { get_column_ix(columns)... });
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T get(const char *column)
|
||||
{
|
||||
return operator[](get_column_ix(column)).template as<T>();
|
||||
}
|
||||
|
||||
void assign(const std::vector<item> &values)
|
||||
{
|
||||
for (auto &value : values)
|
||||
assign(value, true);
|
||||
}
|
||||
|
||||
void assign(std::string_view name, std::string_view value, bool updateLinked, bool validate = true)
|
||||
{
|
||||
assign(add_column(name), value, updateLinked, validate);
|
||||
}
|
||||
|
||||
void assign(size_t column, std::string_view value, bool updateLinked, bool validate = true);
|
||||
|
||||
bool operator==(const row_handle &rhs) const { return m_category == rhs.m_category and m_row == rhs.m_row; }
|
||||
bool operator!=(const row_handle &rhs) const { return m_category != rhs.m_category or m_row != rhs.m_row; }
|
||||
|
||||
private:
|
||||
uint16_t get_column_ix(std::string_view name) const;
|
||||
std::string_view get_column_name(uint16_t ix) const;
|
||||
|
||||
uint16_t add_column(std::string_view name);
|
||||
|
||||
row *get_row()
|
||||
{
|
||||
return m_row;
|
||||
}
|
||||
|
||||
const row *get_row() const
|
||||
{
|
||||
return m_row;
|
||||
}
|
||||
|
||||
void assign(const item &i, bool updateLinked)
|
||||
{
|
||||
assign(i.name(), i.value(), updateLinked);
|
||||
}
|
||||
|
||||
void swap(size_t column, row_handle &r);
|
||||
|
||||
category *m_category = nullptr;
|
||||
row *m_row = nullptr;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class row_initializer : public std::vector<item>
|
||||
{
|
||||
public:
|
||||
friend class category;
|
||||
|
||||
row_initializer() = default;
|
||||
row_initializer(const row_initializer &) = default;
|
||||
row_initializer(row_initializer &&) = default;
|
||||
row_initializer &operator=(const row_initializer &) = default;
|
||||
row_initializer &operator=(row_initializer &&) = default;
|
||||
|
||||
row_initializer(std::initializer_list<item> items)
|
||||
: std::vector<item>(items)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename ItemIter, std::enable_if_t<std::is_same_v<typename ItemIter::value_type, item>, int> = 0>
|
||||
row_initializer(ItemIter b, ItemIter e)
|
||||
: std::vector<item>(b, e)
|
||||
{
|
||||
}
|
||||
|
||||
row_initializer(row_handle rh);
|
||||
|
||||
void set_value(std::string_view name, std::string_view value);
|
||||
void set_value(const item &i)
|
||||
{
|
||||
set_value(i.name(), i.value());
|
||||
}
|
||||
|
||||
void set_value_if_empty(std::string_view name, std::string_view value);
|
||||
void set_value_if_empty(const item &i)
|
||||
{
|
||||
set_value_if_empty(i.name(), i.value());
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace cif
|
||||
@@ -1,17 +1,17 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
@@ -26,57 +26,62 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <string>
|
||||
#include <cstdint>
|
||||
#include <array>
|
||||
#include <cstdint>
|
||||
#include <string>
|
||||
|
||||
#include "CifUtils.hpp"
|
||||
|
||||
namespace mmcif
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
struct Spacegroup
|
||||
enum class space_group_name
|
||||
{
|
||||
const char* name;
|
||||
const char* xHM;
|
||||
const char* Hall;
|
||||
full,
|
||||
xHM,
|
||||
Hall
|
||||
};
|
||||
|
||||
struct space_group
|
||||
{
|
||||
const char *name;
|
||||
const char *xHM;
|
||||
const char *Hall;
|
||||
int nr;
|
||||
};
|
||||
|
||||
CIFPP_EXPORT extern const Spacegroup kSpaceGroups[];
|
||||
CIFPP_EXPORT extern const std::size_t kNrOfSpaceGroups;
|
||||
extern const space_group kSpaceGroups[];
|
||||
extern const std::size_t kNrOfSpaceGroups;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
struct SymopData
|
||||
struct symop_data
|
||||
{
|
||||
constexpr SymopData(const std::array<int,15>& data)
|
||||
: m_packed((data[ 0] & 0x03ULL) << 34 bitor
|
||||
(data[ 1] & 0x03ULL) << 32 bitor
|
||||
(data[ 2] & 0x03ULL) << 30 bitor
|
||||
(data[ 3] & 0x03ULL) << 28 bitor
|
||||
(data[ 4] & 0x03ULL) << 26 bitor
|
||||
(data[ 5] & 0x03ULL) << 24 bitor
|
||||
(data[ 6] & 0x03ULL) << 22 bitor
|
||||
(data[ 7] & 0x03ULL) << 20 bitor
|
||||
(data[ 8] & 0x03ULL) << 18 bitor
|
||||
(data[ 9] & 0x07ULL) << 15 bitor
|
||||
constexpr symop_data(const std::array<int, 15> &data)
|
||||
: m_packed((data[0] & 0x03ULL) << 34 bitor
|
||||
(data[1] & 0x03ULL) << 32 bitor
|
||||
(data[2] & 0x03ULL) << 30 bitor
|
||||
(data[3] & 0x03ULL) << 28 bitor
|
||||
(data[4] & 0x03ULL) << 26 bitor
|
||||
(data[5] & 0x03ULL) << 24 bitor
|
||||
(data[6] & 0x03ULL) << 22 bitor
|
||||
(data[7] & 0x03ULL) << 20 bitor
|
||||
(data[8] & 0x03ULL) << 18 bitor
|
||||
(data[9] & 0x07ULL) << 15 bitor
|
||||
(data[10] & 0x07ULL) << 12 bitor
|
||||
(data[11] & 0x07ULL) << 9 bitor
|
||||
(data[12] & 0x07ULL) << 6 bitor
|
||||
(data[13] & 0x07ULL) << 3 bitor
|
||||
(data[14] & 0x07ULL) << 0)
|
||||
(data[11] & 0x07ULL) << 9 bitor
|
||||
(data[12] & 0x07ULL) << 6 bitor
|
||||
(data[13] & 0x07ULL) << 3 bitor
|
||||
(data[14] & 0x07ULL) << 0)
|
||||
{
|
||||
}
|
||||
|
||||
bool operator==(const SymopData& rhs) const
|
||||
bool operator==(const symop_data &rhs) const
|
||||
{
|
||||
return m_packed == rhs.m_packed;
|
||||
}
|
||||
|
||||
std::array<int,15> data() const
|
||||
std::array<int, 15> data() const
|
||||
{
|
||||
return {
|
||||
static_cast<int>(m_packed >> 34) bitand 0x03,
|
||||
@@ -90,49 +95,51 @@ struct SymopData
|
||||
static_cast<int>(m_packed >> 18) bitand 0x03,
|
||||
static_cast<int>(m_packed >> 15) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 12) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 9) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 6) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 3) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 0) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 9) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 6) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 3) bitand 0x07,
|
||||
static_cast<int>(m_packed >> 0) bitand 0x07,
|
||||
};
|
||||
}
|
||||
|
||||
private:
|
||||
friend struct symop_datablock;
|
||||
|
||||
friend struct SymopDataBlock;
|
||||
const uint64_t kPackMask = (~0ULL >> (64 - 36));
|
||||
|
||||
const uint64_t kPackMask = (~0ULL >> (64-36));
|
||||
|
||||
SymopData(uint64_t v)
|
||||
: m_packed(v bitand kPackMask) {}
|
||||
symop_data(uint64_t v)
|
||||
: m_packed(v bitand kPackMask)
|
||||
{
|
||||
}
|
||||
|
||||
uint64_t m_packed;
|
||||
};
|
||||
|
||||
struct SymopDataBlock
|
||||
struct symop_datablock
|
||||
{
|
||||
constexpr SymopDataBlock(int spacegroup, int rotational_number, const std::array<int,15>& rt_data)
|
||||
constexpr symop_datablock(int spacegroup, int rotational_number, const std::array<int, 15> &rt_data)
|
||||
: m_v((spacegroup & 0xffffULL) << 48 bitor
|
||||
(rotational_number & 0xffULL) << 40 bitor
|
||||
SymopData(rt_data).m_packed)
|
||||
symop_data(rt_data).m_packed)
|
||||
{
|
||||
}
|
||||
|
||||
uint16_t spacegroup() const { return m_v >> 48; }
|
||||
SymopData symop() const { return SymopData(m_v); }
|
||||
uint8_t rotational_number() const { return (m_v >> 40) bitand 0xff; }
|
||||
uint16_t spacegroup() const { return m_v >> 48; }
|
||||
symop_data symop() const { return symop_data(m_v); }
|
||||
uint8_t rotational_number() const { return (m_v >> 40) bitand 0xff; }
|
||||
|
||||
private:
|
||||
uint64_t m_v;
|
||||
};
|
||||
|
||||
static_assert(sizeof(SymopDataBlock) == sizeof(uint64_t), "Size of SymopData is wrong");
|
||||
static_assert(sizeof(symop_datablock) == sizeof(uint64_t), "Size of symop_data is wrong");
|
||||
|
||||
CIFPP_EXPORT extern const SymopDataBlock kSymopNrTable[];
|
||||
CIFPP_EXPORT extern const std::size_t kSymopNrTableSize;
|
||||
extern const symop_datablock kSymopNrTable[];
|
||||
extern const std::size_t kSymopNrTableSize;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
int GetSpacegroupNumber(std::string spacegroup); // alternative for clipper's parsing code
|
||||
int get_space_group_number(std::string spacegroup); // alternative for clipper's parsing code, using space_group_name::full
|
||||
int get_space_group_number(std::string spacegroup, space_group_name type); // alternative for clipper's parsing code
|
||||
|
||||
}
|
||||
} // namespace cif
|
||||
458
include/cif++/text.hpp
Normal file
458
include/cif++/text.hpp
Normal file
@@ -0,0 +1,458 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <charconv>
|
||||
#include <cmath>
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
|
||||
#if __has_include(<experimental/type_traits>)
|
||||
#include <experimental/type_traits>
|
||||
#endif
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
// some basic utilities: Since we're using ASCII input only, we define for optimisation
|
||||
// our own case conversion routines.
|
||||
|
||||
bool iequals(std::string_view a, std::string_view b);
|
||||
int icompare(std::string_view a, std::string_view b);
|
||||
|
||||
bool iequals(const char *a, const char *b);
|
||||
int icompare(const char *a, const char *b);
|
||||
|
||||
void to_lower(std::string &s);
|
||||
std::string to_lower_copy(std::string_view s);
|
||||
|
||||
void to_upper(std::string &s);
|
||||
// std::string toUpperCopy(const std::string &s);
|
||||
|
||||
template <typename IterType>
|
||||
std::string join(IterType b, IterType e, std::string_view sep)
|
||||
{
|
||||
std::ostringstream s;
|
||||
|
||||
if (b != e)
|
||||
{
|
||||
auto ai = b;
|
||||
auto ni = std::next(ai);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
s << *ai;
|
||||
|
||||
if (ni == e)
|
||||
break;
|
||||
|
||||
ai = ni;
|
||||
ni = std::next(ai);
|
||||
|
||||
s << sep;
|
||||
}
|
||||
}
|
||||
|
||||
return s.str();
|
||||
}
|
||||
|
||||
template <typename V>
|
||||
std::string join(const V &arr, std::string_view sep)
|
||||
{
|
||||
return join(arr.begin(), arr.end(), sep);
|
||||
}
|
||||
|
||||
template <typename StringType = std::string_view>
|
||||
std::vector<StringType> split(std::string_view s, std::string_view separators, bool suppress_empty = false)
|
||||
{
|
||||
std::vector<StringType> result;
|
||||
|
||||
auto b = s.begin();
|
||||
auto e = b;
|
||||
|
||||
while (e != s.end())
|
||||
{
|
||||
if (separators.find(*e) != std::string_view::npos)
|
||||
{
|
||||
if (e > b or not suppress_empty)
|
||||
result.emplace_back(b, e - b);
|
||||
b = e = e + 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
++e;
|
||||
}
|
||||
|
||||
if (e > b or not suppress_empty)
|
||||
result.emplace_back(b, e - b);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void replace_all(std::string &s, std::string_view what, std::string_view with = {});
|
||||
|
||||
#if defined(__cpp_lib_starts_ends_with)
|
||||
|
||||
inline bool starts_with(std::string s, std::string_view with)
|
||||
{
|
||||
return s.starts_with(with);
|
||||
}
|
||||
|
||||
inline bool ends_with(std::string_view s, std::string_view with)
|
||||
{
|
||||
return s.ends_with(with);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
inline bool starts_with(std::string s, std::string_view with)
|
||||
{
|
||||
return s.compare(0, with.length(), with) == 0;
|
||||
}
|
||||
|
||||
inline bool ends_with(std::string_view s, std::string_view with)
|
||||
{
|
||||
return s.length() >= with.length() and s.compare(s.length() - with.length(), with.length(), with) == 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(__cpp_lib_string_contains)
|
||||
|
||||
inline bool contains(std::string_view s, std::string_view q)
|
||||
{
|
||||
return s.contains(q);
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
inline bool contains(std::string_view s, std::string_view q)
|
||||
{
|
||||
return s.find(q) != std::string_view::npos;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
bool icontains(std::string_view s, std::string_view q);
|
||||
|
||||
void trim_left(std::string &s);
|
||||
void trim_right(std::string &s);
|
||||
void trim(std::string &s);
|
||||
|
||||
std::string trim_left_copy(std::string_view s);
|
||||
std::string trim_right_copy(std::string_view s);
|
||||
std::string trim_copy(std::string_view s);
|
||||
|
||||
// To make life easier, we also define iless and iset using iequals
|
||||
|
||||
struct iless
|
||||
{
|
||||
bool operator()(const std::string &a, const std::string &b) const
|
||||
{
|
||||
return icompare(a, b) < 0;
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::set<std::string, iless> iset;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// This really makes a difference, having our own tolower routines
|
||||
|
||||
extern const uint8_t kCharToLowerMap[256];
|
||||
|
||||
inline char tolower(int ch)
|
||||
{
|
||||
return static_cast<char>(kCharToLowerMap[static_cast<uint8_t>(ch)]);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::tuple<std::string, std::string> split_tag_name(std::string_view tag);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// generate a cif name, mainly used to generate asym_id's
|
||||
|
||||
std::string cif_id_for_number(int number);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// custom wordwrapping routine
|
||||
|
||||
std::vector<std::string> word_wrap(const std::string &text, size_t width);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
/// std::from_chars for floating point types.
|
||||
/// These are optional, there's a selected_charconv class below that selects
|
||||
/// the best option to used based on support by the stl library
|
||||
/// I.e. that in case of GNU < 12 (or something) the cif implementation will
|
||||
/// be used, all other cases will use the stl version.
|
||||
|
||||
template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
|
||||
std::from_chars_result from_chars(const char *first, const char *last, FloatType &value)
|
||||
{
|
||||
std::from_chars_result result{ first, {} };
|
||||
|
||||
enum State
|
||||
{
|
||||
IntegerSign,
|
||||
Integer,
|
||||
Fraction,
|
||||
ExponentSign,
|
||||
Exponent
|
||||
} state = IntegerSign;
|
||||
int sign = 1;
|
||||
unsigned long long vi = 0;
|
||||
long double f = 1;
|
||||
int exponent_sign = 1;
|
||||
int exponent = 0;
|
||||
bool done = false;
|
||||
|
||||
while (not done and result.ec == std::errc())
|
||||
{
|
||||
char ch = result.ptr != last ? *result.ptr : 0;
|
||||
++result.ptr;
|
||||
|
||||
switch (state)
|
||||
{
|
||||
case IntegerSign:
|
||||
if (ch == '-')
|
||||
{
|
||||
sign = -1;
|
||||
state = Integer;
|
||||
}
|
||||
else if (ch == '+')
|
||||
state = Integer;
|
||||
else if (ch >= '0' and ch <= '9')
|
||||
{
|
||||
vi = ch - '0';
|
||||
state = Integer;
|
||||
}
|
||||
else if (ch == '.')
|
||||
state = Fraction;
|
||||
else
|
||||
result.ec = std::errc::invalid_argument;
|
||||
break;
|
||||
|
||||
case Integer:
|
||||
if (ch >= '0' and ch <= '9')
|
||||
vi = 10 * vi + (ch - '0');
|
||||
else if (ch == 'e' or ch == 'E')
|
||||
state = ExponentSign;
|
||||
else if (ch == '.')
|
||||
state = Fraction;
|
||||
else
|
||||
{
|
||||
done = true;
|
||||
--result.ptr;
|
||||
}
|
||||
break;
|
||||
|
||||
case Fraction:
|
||||
if (ch >= '0' and ch <= '9')
|
||||
{
|
||||
vi = 10 * vi + (ch - '0');
|
||||
f /= 10;
|
||||
}
|
||||
else if (ch == 'e' or ch == 'E')
|
||||
state = ExponentSign;
|
||||
else
|
||||
{
|
||||
done = true;
|
||||
--result.ptr;
|
||||
}
|
||||
break;
|
||||
|
||||
case ExponentSign:
|
||||
if (ch == '-')
|
||||
{
|
||||
exponent_sign = -1;
|
||||
state = Exponent;
|
||||
}
|
||||
else if (ch == '+')
|
||||
state = Exponent;
|
||||
else if (ch >= '0' and ch <= '9')
|
||||
{
|
||||
exponent = ch - '0';
|
||||
state = Exponent;
|
||||
}
|
||||
else
|
||||
result.ec = std::errc::invalid_argument;
|
||||
break;
|
||||
|
||||
case Exponent:
|
||||
if (ch >= '0' and ch <= '9')
|
||||
exponent = 10 * exponent + (ch - '0');
|
||||
else
|
||||
{
|
||||
done = true;
|
||||
--result.ptr;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (result.ec == std::errc())
|
||||
{
|
||||
long double v = f * vi * sign;
|
||||
if (exponent != 0)
|
||||
v *= std::pow(10, exponent * exponent_sign);
|
||||
|
||||
if (std::isnan(v))
|
||||
result.ec = std::errc::invalid_argument;
|
||||
else if (std::abs(v) > std::numeric_limits<FloatType>::max())
|
||||
result.ec = std::errc::result_out_of_range;
|
||||
|
||||
value = static_cast<FloatType>(v);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
enum class chars_format
|
||||
{
|
||||
scientific = 1,
|
||||
fixed = 2,
|
||||
// hex,
|
||||
general = fixed | scientific
|
||||
};
|
||||
|
||||
template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
|
||||
std::to_chars_result to_chars(char *first, char *last, FloatType &value, chars_format fmt)
|
||||
{
|
||||
int size = last - first;
|
||||
int r;
|
||||
|
||||
switch (fmt)
|
||||
{
|
||||
case chars_format::scientific:
|
||||
if constexpr (std::is_same_v<FloatType, long double>)
|
||||
r = snprintf(first, last - first, "%le", value);
|
||||
else
|
||||
r = snprintf(first, last - first, "%e", value);
|
||||
break;
|
||||
|
||||
case chars_format::fixed:
|
||||
if constexpr (std::is_same_v<FloatType, long double>)
|
||||
r = snprintf(first, last - first, "%lf", value);
|
||||
else
|
||||
r = snprintf(first, last - first, "%f", value);
|
||||
break;
|
||||
|
||||
case chars_format::general:
|
||||
if constexpr (std::is_same_v<FloatType, long double>)
|
||||
r = snprintf(first, last - first, "%lg", value);
|
||||
else
|
||||
r = snprintf(first, last - first, "%g", value);
|
||||
break;
|
||||
}
|
||||
|
||||
std::to_chars_result result;
|
||||
if (r < 0 or r >= size)
|
||||
result = { first, std::errc::value_too_large };
|
||||
else
|
||||
result = { first + r, std::errc() };
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
|
||||
std::to_chars_result to_chars(char *first, char *last, FloatType &value, chars_format fmt, int precision)
|
||||
{
|
||||
int size = last - first;
|
||||
int r;
|
||||
|
||||
switch (fmt)
|
||||
{
|
||||
case chars_format::scientific:
|
||||
if constexpr (std::is_same_v<FloatType, long double>)
|
||||
r = snprintf(first, last - first, "%.*le", precision, value);
|
||||
else
|
||||
r = snprintf(first, last - first, "%.*e", precision, value);
|
||||
break;
|
||||
|
||||
case chars_format::fixed:
|
||||
if constexpr (std::is_same_v<FloatType, long double>)
|
||||
r = snprintf(first, last - first, "%.*lf", precision, value);
|
||||
else
|
||||
r = snprintf(first, last - first, "%.*f", precision, value);
|
||||
break;
|
||||
|
||||
case chars_format::general:
|
||||
if constexpr (std::is_same_v<FloatType, long double>)
|
||||
r = snprintf(first, last - first, "%.*lg", precision, value);
|
||||
else
|
||||
r = snprintf(first, last - first, "%.*g", precision, value);
|
||||
break;
|
||||
}
|
||||
|
||||
std::to_chars_result result;
|
||||
if (r < 0 or r >= size)
|
||||
result = { first, std::errc::value_too_large };
|
||||
else
|
||||
result = { first + r, std::errc() };
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct my_charconv
|
||||
{
|
||||
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
|
||||
{
|
||||
return cif::from_chars(a, b, d);
|
||||
}
|
||||
|
||||
static std::to_chars_result to_chars(char *first, char *last, T &value, chars_format fmt)
|
||||
{
|
||||
return cif::to_chars(first, last, value, fmt);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct std_charconv
|
||||
{
|
||||
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
|
||||
{
|
||||
return std::from_chars(a, b, d);
|
||||
}
|
||||
|
||||
static std::to_chars_result to_chars(char *first, char *last, T &value, chars_format fmt)
|
||||
{
|
||||
return std::to_chars(first, last, value, fmt);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using from_chars_function = decltype(std::from_chars(std::declval<const char *>(), std::declval<const char *>(), std::declval<T &>()));
|
||||
|
||||
template <typename T>
|
||||
using selected_charconv = typename std::conditional_t<std::experimental::is_detected_v<from_chars_function, T>, std_charconv<T>, my_charconv<T>>;
|
||||
|
||||
} // namespace cif
|
||||
@@ -26,13 +26,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <list>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
#ifndef STDOUT_FILENO
|
||||
#define STDOUT_FILENO 1
|
||||
@@ -45,8 +39,6 @@
|
||||
#include <unistd.h>
|
||||
#endif
|
||||
|
||||
#include "cif++/Cif++Export.hpp"
|
||||
|
||||
#if _MSC_VER
|
||||
#pragma warning(disable : 4996) // unsafe function or variable (strcpy e.g.)
|
||||
#pragma warning(disable : 4068) // unknown pragma
|
||||
@@ -58,60 +50,12 @@
|
||||
namespace cif
|
||||
{
|
||||
|
||||
extern int VERBOSE;
|
||||
|
||||
// the git 'build' number
|
||||
std::string get_version_nr();
|
||||
// std::string get_version_date();
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
// some basic utilities: Since we're using ASCII input only, we define for optimisation
|
||||
// our own case conversion routines.
|
||||
|
||||
bool iequals(const std::string &a, const std::string &b);
|
||||
int icompare(const std::string &a, const std::string &b);
|
||||
|
||||
bool iequals(const char *a, const char *b);
|
||||
int icompare(const char *a, const char *b);
|
||||
|
||||
void toLower(std::string &s);
|
||||
std::string toLowerCopy(const std::string &s);
|
||||
|
||||
// To make life easier, we also define iless and iset using iequals
|
||||
|
||||
struct iless
|
||||
{
|
||||
bool operator()(const std::string &a, const std::string &b) const
|
||||
{
|
||||
return icompare(a, b) < 0;
|
||||
}
|
||||
};
|
||||
|
||||
typedef std::set<std::string, iless> iset;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// This really makes a difference, having our own tolower routines
|
||||
|
||||
extern const uint8_t kCharToLowerMap[256];
|
||||
|
||||
inline char tolower(int ch)
|
||||
{
|
||||
return static_cast<char>(kCharToLowerMap[static_cast<uint8_t>(ch)]);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::tuple<std::string, std::string> splitTagName(const std::string &tag);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// generate a cif name, mainly used to generate asym_id's
|
||||
|
||||
std::string cifIdForNumber(int number);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// custom wordwrapping routine
|
||||
|
||||
std::vector<std::string> wordWrap(const std::string &text, size_t width);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Code helping with terminal i/o
|
||||
|
||||
@@ -226,14 +170,14 @@ class Progress
|
||||
Progress(const Progress &) = delete;
|
||||
Progress &operator=(const Progress &) = delete;
|
||||
|
||||
struct ProgressImpl *mImpl;
|
||||
struct ProgressImpl *m_impl;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Resources
|
||||
|
||||
std::unique_ptr<std::istream> loadResource(std::filesystem::path name);
|
||||
void addFileResource(const std::string &name, std::filesystem::path dataFile);
|
||||
void addDataDirectory(std::filesystem::path dataDir);
|
||||
std::unique_ptr<std::istream> load_resource(std::filesystem::path name);
|
||||
void add_file_resource(const std::string &name, std::filesystem::path dataFile);
|
||||
void add_data_directory(std::filesystem::path dataDir);
|
||||
|
||||
} // namespace cif
|
||||
242
include/cif++/validate.hpp
Normal file
242
include/cif++/validate.hpp
Normal file
@@ -0,0 +1,242 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <filesystem>
|
||||
#include <list>
|
||||
#include <mutex>
|
||||
#include <utility>
|
||||
|
||||
#include <cif++/text.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
struct category_validator;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class validation_error : public std::exception
|
||||
{
|
||||
public:
|
||||
validation_error(const std::string &msg);
|
||||
validation_error(const std::string &cat, const std::string &item,
|
||||
const std::string &msg);
|
||||
const char *what() const noexcept { return m_msg.c_str(); }
|
||||
std::string m_msg;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
enum class DDL_PrimitiveType
|
||||
{
|
||||
Char,
|
||||
UChar,
|
||||
Numb
|
||||
};
|
||||
|
||||
DDL_PrimitiveType map_to_primitive_type(std::string_view s);
|
||||
|
||||
struct regex_impl;
|
||||
|
||||
struct type_validator
|
||||
{
|
||||
std::string m_name;
|
||||
DDL_PrimitiveType m_primitive_type;
|
||||
regex_impl *m_rx;
|
||||
|
||||
type_validator() = delete;
|
||||
type_validator(std::string_view name, DDL_PrimitiveType type, std::string_view rx);
|
||||
|
||||
type_validator(const type_validator &) = delete;
|
||||
type_validator(type_validator &&rhs)
|
||||
: m_name(std::move(rhs.m_name))
|
||||
, m_primitive_type(rhs.m_primitive_type)
|
||||
{
|
||||
m_rx = std::exchange(rhs.m_rx, nullptr);
|
||||
}
|
||||
|
||||
type_validator &operator=(const type_validator &) = delete;
|
||||
type_validator &operator=(type_validator &&rhs)
|
||||
{
|
||||
m_name = std::move(rhs.m_name);
|
||||
m_primitive_type = rhs.m_primitive_type;
|
||||
m_rx = std::exchange(rhs.m_rx, nullptr);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
~type_validator();
|
||||
|
||||
bool operator<(const type_validator &rhs) const
|
||||
{
|
||||
return icompare(m_name, rhs.m_name) < 0;
|
||||
}
|
||||
|
||||
int compare(std::string_view a, std::string_view b) const;
|
||||
};
|
||||
|
||||
struct item_validator
|
||||
{
|
||||
std::string m_tag;
|
||||
bool m_mandatory;
|
||||
const type_validator *m_type;
|
||||
cif::iset m_enums;
|
||||
std::string m_default;
|
||||
bool m_default_is_null;
|
||||
category_validator *m_category = nullptr;
|
||||
|
||||
// ItemLinked is used for non-key links
|
||||
struct item_link
|
||||
{
|
||||
item_validator *m_parent;
|
||||
std::string m_parent_item;
|
||||
std::string m_child_item;
|
||||
};
|
||||
|
||||
std::vector<item_link> mLinked;
|
||||
|
||||
bool operator<(const item_validator &rhs) const
|
||||
{
|
||||
return icompare(m_tag, rhs.m_tag) < 0;
|
||||
}
|
||||
|
||||
bool operator==(const item_validator &rhs) const
|
||||
{
|
||||
return iequals(m_tag, rhs.m_tag);
|
||||
}
|
||||
|
||||
void operator()(std::string_view value) const;
|
||||
};
|
||||
|
||||
struct category_validator
|
||||
{
|
||||
std::string m_name;
|
||||
std::vector<std::string> m_keys;
|
||||
cif::iset m_groups;
|
||||
cif::iset m_mandatory_fields;
|
||||
std::set<item_validator> m_item_validators;
|
||||
|
||||
bool operator<(const category_validator &rhs) const
|
||||
{
|
||||
return icompare(m_name, rhs.m_name) < 0;
|
||||
}
|
||||
|
||||
void addItemValidator(item_validator &&v);
|
||||
|
||||
const item_validator *get_validator_for_item(std::string_view tag) const;
|
||||
|
||||
const std::set<item_validator> &item_validators() const
|
||||
{
|
||||
return m_item_validators;
|
||||
}
|
||||
};
|
||||
|
||||
struct link_validator
|
||||
{
|
||||
int m_link_group_id;
|
||||
std::string m_parent_category;
|
||||
std::vector<std::string> m_parent_keys;
|
||||
std::string m_child_category;
|
||||
std::vector<std::string> m_child_keys;
|
||||
std::string m_link_group_label;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class validator
|
||||
{
|
||||
public:
|
||||
validator(std::string_view name)
|
||||
: m_name(name)
|
||||
{
|
||||
}
|
||||
|
||||
~validator() = default;
|
||||
|
||||
validator(const validator &rhs) = delete;
|
||||
validator &operator=(const validator &rhs) = delete;
|
||||
|
||||
validator(validator &&rhs) = default;
|
||||
validator &operator=(validator &&rhs) = default;
|
||||
|
||||
friend class dictionary_parser;
|
||||
|
||||
void add_type_validator(type_validator &&v);
|
||||
const type_validator *get_validator_for_type(std::string_view type_code) const;
|
||||
|
||||
void add_category_validator(category_validator &&v);
|
||||
const category_validator *get_validator_for_category(std::string_view category) const;
|
||||
|
||||
void add_link_validator(link_validator &&v);
|
||||
std::vector<const link_validator *> get_links_for_parent(std::string_view category) const;
|
||||
std::vector<const link_validator *> get_links_for_child(std::string_view category) const;
|
||||
|
||||
void report_error(const std::string &msg, bool fatal) const;
|
||||
|
||||
const std::string &name() const { return m_name; }
|
||||
void set_name(const std::string &name) { m_name = name; }
|
||||
|
||||
const std::string &version() const { return m_version; }
|
||||
void version(const std::string &version) { m_version = version; }
|
||||
|
||||
private:
|
||||
// name is fully qualified here:
|
||||
item_validator *get_validator_for_item(std::string_view name) const;
|
||||
|
||||
std::string m_name;
|
||||
std::string m_version;
|
||||
bool m_strict = false;
|
||||
std::set<type_validator> m_type_validators;
|
||||
std::set<category_validator> m_category_validators;
|
||||
std::vector<link_validator> m_link_validators;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
class validator_factory
|
||||
{
|
||||
public:
|
||||
static validator_factory &instance()
|
||||
{
|
||||
static validator_factory s_instance;
|
||||
return s_instance;
|
||||
}
|
||||
|
||||
const validator &operator[](std::string_view dictionary_name);
|
||||
|
||||
private:
|
||||
void construct_validator(std::string_view name, std::istream &is);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
validator_factory() = default;
|
||||
|
||||
std::mutex m_mutex;
|
||||
std::list<validator> m_validators;
|
||||
};
|
||||
|
||||
} // namespace cif
|
||||
@@ -8,5 +8,6 @@ Name: libcifpp
|
||||
Description: C++ library for the manipulation of mmCIF files.
|
||||
Version: @PACKAGE_VERSION@
|
||||
|
||||
Libs: -L${libdir} -lcifpp @PRIVATE_LIBS@
|
||||
Cflags: -I${includedir} @PRIVATE_INC_DIRS@
|
||||
Requires.private: zlib, liblzma
|
||||
Libs: -L${libdir} -lcifpp
|
||||
Cflags: -I${includedir} -pthread
|
||||
|
||||
1
regex
Submodule
1
regex
Submodule
Submodule regex added at e5979ae1af
150867
rsrc/mmcif_ma.dic
Normal file
150867
rsrc/mmcif_ma.dic
Normal file
File diff suppressed because it is too large
Load Diff
1318
src/AtomType.cpp
1318
src/AtomType.cpp
File diff suppressed because it is too large
Load Diff
627
src/BondMap.cpp
627
src/BondMap.cpp
@@ -1,627 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <fstream>
|
||||
#include <algorithm>
|
||||
#include <mutex>
|
||||
|
||||
#include "cif++/Cif++.hpp"
|
||||
#include "cif++/Compound.hpp"
|
||||
#include "cif++/CifUtils.hpp"
|
||||
#include "cif++/BondMap.hpp"
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
union IDType
|
||||
{
|
||||
IDType() : id_n(0){}
|
||||
IDType(const IDType& rhs) : id_n(rhs.id_n) {}
|
||||
IDType(const std::string& s)
|
||||
: IDType()
|
||||
{
|
||||
assert(s.length() <= 4);
|
||||
if (s.length() > 4)
|
||||
throw BondMapException("Atom ID '" + s + "' is too long");
|
||||
std::copy(s.begin(), s.end(), id_s);
|
||||
}
|
||||
|
||||
IDType& operator=(const IDType& rhs)
|
||||
{
|
||||
id_n = rhs.id_n;
|
||||
return *this;
|
||||
}
|
||||
|
||||
IDType& operator=(const std::string& s)
|
||||
{
|
||||
id_n = 0;
|
||||
assert(s.length() <= 4);
|
||||
if (s.length() > 4)
|
||||
throw BondMapException("Atom ID '" + s + "' is too long");
|
||||
std::copy(s.begin(), s.end(), id_s);
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool operator<(const IDType& rhs) const
|
||||
{
|
||||
return id_n < rhs.id_n;
|
||||
}
|
||||
|
||||
bool operator<=(const IDType& rhs) const
|
||||
{
|
||||
return id_n <= rhs.id_n;
|
||||
}
|
||||
|
||||
bool operator==(const IDType& rhs) const
|
||||
{
|
||||
return id_n == rhs.id_n;
|
||||
}
|
||||
|
||||
bool operator!=(const IDType& rhs) const
|
||||
{
|
||||
return id_n != rhs.id_n;
|
||||
}
|
||||
|
||||
char id_s[4];
|
||||
uint32_t id_n;
|
||||
};
|
||||
|
||||
static_assert(sizeof(IDType) == 4, "atom_id_type should be 4 bytes");
|
||||
}
|
||||
|
||||
// // --------------------------------------------------------------------
|
||||
|
||||
// void createBondInfoFile(const fs::path& components, const fs::path& infofile)
|
||||
// {
|
||||
// std::ofstream outfile(infofile.string() + ".tmp", std::ios::binary);
|
||||
// if (not outfile.is_open())
|
||||
// throw BondMapException("Could not create bond info file " + infofile.string() + ".tmp");
|
||||
|
||||
// cif::File infile(components);
|
||||
|
||||
// std::set<atom_id_type> atomIDs;
|
||||
// std::vector<atom_id_type> compoundIDs;
|
||||
|
||||
// for (auto& db: infile)
|
||||
// {
|
||||
// auto chem_comp_bond = db.get("chem_comp_bond");
|
||||
// if (not chem_comp_bond)
|
||||
// {
|
||||
// if (cif::VERBOSE > 1)
|
||||
// std::cerr << "Missing chem_comp_bond category in data block " << db.getName() << std::endl;
|
||||
// continue;
|
||||
// }
|
||||
|
||||
// for (const auto& [atom_id_1, atom_id_2]: chem_comp_bond->rows<std::string,std::string>({"atom_id_1", "atom_id_2"}))
|
||||
// {
|
||||
// atomIDs.insert(atom_id_1);
|
||||
// atomIDs.insert(atom_id_2);
|
||||
// }
|
||||
|
||||
// compoundIDs.push_back({ db.getName() });
|
||||
// }
|
||||
|
||||
// if (cif::VERBOSE)
|
||||
// std::cout << "Number of unique atom names is " << atomIDs.size() << std::endl
|
||||
// << "Number of unique residue names is " << compoundIDs.size() << std::endl;
|
||||
|
||||
// CompoundBondInfoFileHeader header = {};
|
||||
// header.indexEntries = compoundIDs.size();
|
||||
// header.atomEntries = atomIDs.size();
|
||||
|
||||
// outfile << header;
|
||||
|
||||
// for (auto atomID: atomIDs)
|
||||
// outfile << atomID;
|
||||
|
||||
// auto dataOffset = outfile.tellp();
|
||||
|
||||
// std::vector<CompoundBondInfo> entries;
|
||||
// entries.reserve(compoundIDs.size());
|
||||
|
||||
// std::map<atom_id_type, uint16_t> atomIDMap;
|
||||
// for (auto& atomID: atomIDs)
|
||||
// atomIDMap[atomID] = atomIDMap.size();
|
||||
|
||||
// for (auto& db: infile)
|
||||
// {
|
||||
// auto chem_comp_bond = db.get("chem_comp_bond");
|
||||
// if (not chem_comp_bond)
|
||||
// continue;
|
||||
|
||||
// std::set<uint16_t> bondedAtoms;
|
||||
|
||||
// for (const auto& [atom_id_1, atom_id_2]: chem_comp_bond->rows<std::string,std::string>({"atom_id_1", "atom_id_2"}))
|
||||
// {
|
||||
// bondedAtoms.insert(atomIDMap[atom_id_1]);
|
||||
// bondedAtoms.insert(atomIDMap[atom_id_2]);
|
||||
// }
|
||||
|
||||
// std::map<uint16_t, int32_t> bondedAtomMap;
|
||||
// for (auto id: bondedAtoms)
|
||||
// bondedAtomMap[id] = static_cast<int32_t>(bondedAtomMap.size());
|
||||
|
||||
// CompoundBondInfo info = {
|
||||
// db.getName(),
|
||||
// static_cast<uint32_t>(bondedAtomMap.size()),
|
||||
// outfile.tellp() - dataOffset
|
||||
// };
|
||||
|
||||
// entries.push_back(info);
|
||||
|
||||
// // An now first write the array of atom ID's in this compound
|
||||
// for (uint16_t id: bondedAtoms)
|
||||
// write(outfile, id);
|
||||
|
||||
// // And then the symmetric matrix with bonds
|
||||
// size_t N = bondedAtoms.size();
|
||||
// size_t M = (N * (N - 1)) / 2;
|
||||
|
||||
// size_t K = M / 8;
|
||||
// if (M % 8)
|
||||
// K += 1;
|
||||
|
||||
// std::vector<uint8_t> m(K);
|
||||
|
||||
// for (const auto& [atom_id_1, atom_id_2]: chem_comp_bond->rows<std::string,std::string>({"atom_id_1", "atom_id_2"}))
|
||||
// {
|
||||
// auto a = bondedAtomMap[atomIDMap[atom_id_1]];
|
||||
// auto b = bondedAtomMap[atomIDMap[atom_id_2]];
|
||||
|
||||
// assert(a != b);
|
||||
// assert((int)b < (int)N);
|
||||
|
||||
// if (a > b)
|
||||
// std::swap(a, b);
|
||||
|
||||
// size_t ix = ((b - 1) * b) / 2 + a;
|
||||
// assert(ix < M);
|
||||
|
||||
// auto Bix = ix / 8;
|
||||
// auto bix = ix % 8;
|
||||
|
||||
// m[Bix] |= 1 << bix;
|
||||
// }
|
||||
|
||||
// outfile.write(reinterpret_cast<char*>(m.data()), m.size());
|
||||
// }
|
||||
|
||||
// header.dataSize = outfile.tellp() - dataOffset;
|
||||
|
||||
// std::sort(entries.begin(), entries.end(), [](CompoundBondInfo& a, CompoundBondInfo& b)
|
||||
// {
|
||||
// return a.id < b.id;
|
||||
// });
|
||||
|
||||
// for (auto& info: entries)
|
||||
// outfile << info;
|
||||
|
||||
// outfile.seekp(0);
|
||||
// outfile << header;
|
||||
|
||||
// // compress
|
||||
// outfile.close();
|
||||
|
||||
// std::ifstream in(infofile.string() + ".tmp", std::ios::binary);
|
||||
// std::ofstream out(infofile, std::ios::binary);
|
||||
|
||||
// {
|
||||
// io::filtering_stream<io::output> os;
|
||||
// os.push(io::gzip_compressor());
|
||||
// os.push(out);
|
||||
// io::copy(in, os);
|
||||
// }
|
||||
|
||||
// in.close();
|
||||
// out.close();
|
||||
|
||||
// fs::remove(infofile.string() + ".tmp");
|
||||
// }
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
struct CompoundBondInfo
|
||||
{
|
||||
IDType mID;
|
||||
std::set<std::tuple<uint32_t,uint32_t>> mBonded;
|
||||
|
||||
bool bonded(uint32_t a1, uint32_t a2) const
|
||||
{
|
||||
return mBonded.count({ a1, a2 }) > 0;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class CompoundBondMap
|
||||
{
|
||||
public:
|
||||
|
||||
static CompoundBondMap &instance()
|
||||
{
|
||||
static std::unique_ptr<CompoundBondMap> s_instance(new CompoundBondMap);
|
||||
return *s_instance;
|
||||
}
|
||||
|
||||
bool bonded(const std::string& compoundID, const std::string& atomID1, const std::string& atomID2);
|
||||
|
||||
private:
|
||||
|
||||
CompoundBondMap() {}
|
||||
|
||||
uint32_t getAtomID(const std::string& atomID)
|
||||
{
|
||||
IDType id(atomID);
|
||||
|
||||
uint32_t result;
|
||||
|
||||
auto i = mAtomIDIndex.find(id);
|
||||
if (i == mAtomIDIndex.end())
|
||||
{
|
||||
result = uint32_t(mAtomIDIndex.size());
|
||||
mAtomIDIndex[id] = result;
|
||||
}
|
||||
else
|
||||
result = i->second;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::map<IDType,uint32_t> mAtomIDIndex;
|
||||
std::vector<CompoundBondInfo> mCompounds;
|
||||
std::mutex mMutex;
|
||||
};
|
||||
|
||||
bool CompoundBondMap::bonded(const std::string &compoundID, const std::string& atomID1, const std::string& atomID2)
|
||||
{
|
||||
std::lock_guard lock(mMutex);
|
||||
|
||||
using namespace std::literals;
|
||||
|
||||
IDType id(compoundID);
|
||||
uint32_t a1 = getAtomID(atomID1);
|
||||
uint32_t a2 = getAtomID(atomID2);
|
||||
if (a1 > a2)
|
||||
std::swap(a1, a2);
|
||||
|
||||
for (auto &bi: mCompounds)
|
||||
{
|
||||
if (bi.mID != id)
|
||||
continue;
|
||||
|
||||
return bi.bonded(a1, a2);
|
||||
}
|
||||
|
||||
bool result = false;
|
||||
|
||||
// not found in our cache, calculate
|
||||
CompoundBondInfo bondInfo{ id };
|
||||
|
||||
auto compound = mmcif::CompoundFactory::instance().create(compoundID);
|
||||
if (not compound)
|
||||
std::cerr << "Missing compound bond info for " << compoundID << std::endl;
|
||||
else
|
||||
{
|
||||
for (auto &atom: compound->bonds())
|
||||
{
|
||||
uint32_t ca1 = getAtomID(atom.atomID[0]);
|
||||
uint32_t ca2 = getAtomID(atom.atomID[1]);
|
||||
if (ca1 > ca2)
|
||||
std::swap(ca1, ca2);
|
||||
|
||||
bondInfo.mBonded.insert({ca1, ca2});
|
||||
result = result or (a1 == ca1 and a2 == ca2);
|
||||
}
|
||||
}
|
||||
|
||||
mCompounds.push_back(bondInfo);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BondMap::BondMap(const Structure& p)
|
||||
{
|
||||
auto& compoundBondInfo = CompoundBondMap::instance();
|
||||
|
||||
auto atoms = p.atoms();
|
||||
dim = uint32_t(atoms.size());
|
||||
|
||||
// bond = std::vector<bool>(dim * (dim - 1), false);
|
||||
|
||||
for (auto& atom: atoms)
|
||||
index[atom.id()] = uint32_t(index.size());
|
||||
|
||||
auto bindAtoms = [this](const std::string& a, const std::string& b)
|
||||
{
|
||||
uint32_t ixa = index[a];
|
||||
uint32_t ixb = index[b];
|
||||
|
||||
bond.insert(key(ixa, ixb));
|
||||
};
|
||||
|
||||
auto linkAtoms = [this,&bindAtoms](const std::string& a, const std::string& b)
|
||||
{
|
||||
bindAtoms(a, b);
|
||||
|
||||
link[a].insert(b);
|
||||
link[b].insert(a);
|
||||
};
|
||||
|
||||
cif::Datablock& db = p.getFile().data();
|
||||
|
||||
// collect all compounds first
|
||||
std::set<std::string> compounds;
|
||||
for (auto c: db["chem_comp"])
|
||||
compounds.insert(c["id"].as<std::string>());
|
||||
|
||||
// make sure we also have all residues in the polyseq
|
||||
for (auto m: db["entity_poly_seq"])
|
||||
{
|
||||
std::string c = m["mon_id"].as<std::string>();
|
||||
if (compounds.count(c))
|
||||
continue;
|
||||
|
||||
if (cif::VERBOSE > 1)
|
||||
std::cerr << "Warning: mon_id " << c << " is missing in the chem_comp category" << std::endl;
|
||||
compounds.insert(c);
|
||||
}
|
||||
|
||||
cif::Progress progress(compounds.size(), "Creating bond map");
|
||||
|
||||
// some helper indices to speed things up a bit
|
||||
std::map<std::tuple<std::string,int,std::string>,std::string> atomMapByAsymSeqAndAtom;
|
||||
for (auto& a: p.atoms())
|
||||
{
|
||||
auto key = make_tuple(a.labelAsymID(), a.labelSeqID(), a.labelAtomID());
|
||||
atomMapByAsymSeqAndAtom[key] = a.id();
|
||||
}
|
||||
|
||||
// first link all residues in a polyseq
|
||||
|
||||
std::string lastAsymID;
|
||||
int lastSeqID = 0;
|
||||
for (auto r: db["pdbx_poly_seq_scheme"])
|
||||
{
|
||||
std::string asymID;
|
||||
int seqID;
|
||||
|
||||
cif::tie(asymID, seqID) = r.get("asym_id", "seq_id");
|
||||
|
||||
if (asymID != lastAsymID) // first in a new sequece
|
||||
{
|
||||
lastAsymID = asymID;
|
||||
lastSeqID = seqID;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto c = atomMapByAsymSeqAndAtom[make_tuple(asymID, lastSeqID, "C")];
|
||||
auto n = atomMapByAsymSeqAndAtom[make_tuple(asymID, seqID, "N")];
|
||||
|
||||
if (not (c.empty() or n.empty()))
|
||||
bindAtoms(c, n);
|
||||
|
||||
lastSeqID = seqID;
|
||||
}
|
||||
|
||||
for (auto l: db["struct_conn"])
|
||||
{
|
||||
std::string asym1, asym2, atomId1, atomId2;
|
||||
int seqId1 = 0, seqId2 = 0;
|
||||
cif::tie(asym1, asym2, atomId1, atomId2, seqId1, seqId2) =
|
||||
l.get("ptnr1_label_asym_id", "ptnr2_label_asym_id",
|
||||
"ptnr1_label_atom_id", "ptnr2_label_atom_id",
|
||||
"ptnr1_label_seq_id", "ptnr2_label_seq_id");
|
||||
|
||||
std::string a = atomMapByAsymSeqAndAtom[make_tuple(asym1, seqId1, atomId1)];
|
||||
std::string b = atomMapByAsymSeqAndAtom[make_tuple(asym2, seqId2, atomId2)];
|
||||
|
||||
if (not (a.empty() or b.empty()))
|
||||
linkAtoms(a, b);
|
||||
}
|
||||
|
||||
// then link all atoms in the compounds
|
||||
|
||||
for (auto c: compounds)
|
||||
{
|
||||
if (c == "HOH" or c == "H2O" or c == "WAT")
|
||||
{
|
||||
if (cif::VERBOSE)
|
||||
std::cerr << "skipping water in bond map calculation" << std::endl;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto bonded = [c, &compoundBondInfo](const Atom& a, const Atom& b)
|
||||
{
|
||||
auto label_a = a.labelAtomID();
|
||||
auto label_b = b.labelAtomID();
|
||||
|
||||
return compoundBondInfo.bonded(c, label_a, label_b);
|
||||
};
|
||||
|
||||
// loop over poly_seq_scheme
|
||||
for (auto r: db["pdbx_poly_seq_scheme"].find(cif::Key("mon_id") == c))
|
||||
{
|
||||
std::string asymID;
|
||||
int seqID;
|
||||
cif::tie(asymID, seqID) = r.get("asym_id", "seq_id");
|
||||
|
||||
std::vector<Atom> rAtoms;
|
||||
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
|
||||
[&](auto& a) { return a.labelAsymID() == asymID and a.labelSeqID() == seqID; });
|
||||
|
||||
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
|
||||
{
|
||||
for (uint32_t j = i + 1; j < rAtoms.size(); ++j)
|
||||
{
|
||||
if (bonded(rAtoms[i], rAtoms[j]))
|
||||
bindAtoms(rAtoms[i].id(), rAtoms[j].id());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// loop over pdbx_nonpoly_scheme
|
||||
for (auto r: db["pdbx_nonpoly_scheme"].find(cif::Key("mon_id") == c))
|
||||
{
|
||||
std::string asymID;
|
||||
cif::tie(asymID) = r.get("asym_id");
|
||||
|
||||
std::vector<Atom> rAtoms;
|
||||
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
|
||||
[&](auto& a) { return a.labelAsymID() == asymID; });
|
||||
|
||||
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
|
||||
{
|
||||
for (uint32_t j = i + 1; j < rAtoms.size(); ++j)
|
||||
{
|
||||
if (bonded(rAtoms[i], rAtoms[j]))
|
||||
{
|
||||
uint32_t ixa = index[rAtoms[i].id()];
|
||||
uint32_t ixb = index[rAtoms[j].id()];
|
||||
|
||||
bond.insert(key(ixa, ixb));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// loop over pdbx_branch_scheme
|
||||
for (auto r: db["pdbx_branch_scheme"].find(cif::Key("mon_id") == c))
|
||||
{
|
||||
std::string asymID;
|
||||
cif::tie(asymID) = r.get("asym_id");
|
||||
|
||||
std::vector<Atom> rAtoms;
|
||||
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
|
||||
[&](auto& a) { return a.labelAsymID() == asymID; });
|
||||
|
||||
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
|
||||
{
|
||||
for (uint32_t j = i + 1; j < rAtoms.size(); ++j)
|
||||
{
|
||||
if (bonded(rAtoms[i], rAtoms[j]))
|
||||
{
|
||||
uint32_t ixa = index[rAtoms[i].id()];
|
||||
uint32_t ixb = index[rAtoms[j].id()];
|
||||
|
||||
bond.insert(key(ixa, ixb));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// start by creating an index for single bonds
|
||||
|
||||
std::multimap<uint32_t,uint32_t> b1_2;
|
||||
for (auto& bk: bond)
|
||||
{
|
||||
uint32_t a, b;
|
||||
std::tie(a, b) = dekey(bk);
|
||||
|
||||
b1_2.insert({ a, b });
|
||||
b1_2.insert({ b, a });
|
||||
}
|
||||
|
||||
std::multimap<uint32_t,uint32_t> b1_3;
|
||||
for (uint32_t i = 0; i < dim; ++i)
|
||||
{
|
||||
auto a = b1_2.equal_range(i);
|
||||
|
||||
std::vector<uint32_t> s;
|
||||
for (auto j = a.first; j != a.second; ++j)
|
||||
s.push_back(j->second);
|
||||
|
||||
for (size_t si1 = 0; si1 + 1 < s.size(); ++si1)
|
||||
{
|
||||
for (size_t si2 = si1 + 1; si2 < s.size(); ++si2)
|
||||
{
|
||||
uint32_t x = s[si1];
|
||||
uint32_t y = s[si2];
|
||||
|
||||
if (isBonded(x, y))
|
||||
continue;
|
||||
|
||||
b1_3.insert({ x, y });
|
||||
b1_3.insert({ y, x });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < dim; ++i)
|
||||
{
|
||||
auto a1 = b1_2.equal_range(i);
|
||||
auto a2 = b1_3.equal_range(i);
|
||||
|
||||
for (auto ai1 = a1.first; ai1 != a1.second; ++ai1)
|
||||
{
|
||||
for (auto ai2 = a2.first; ai2 != a2.second; ++ai2)
|
||||
{
|
||||
uint32_t b1 = ai1->second;
|
||||
uint32_t b2 = ai2->second;
|
||||
|
||||
if (isBonded(b1, b2))
|
||||
continue;
|
||||
|
||||
bond_1_4.insert(key(b1, b2));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::string> BondMap::linked(const Atom& a) const
|
||||
{
|
||||
auto i = link.find(a.id());
|
||||
|
||||
std::vector<std::string> result;
|
||||
|
||||
if (i != link.end())
|
||||
result = std::vector<std::string>(i->second.begin(), i->second.end());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::string> BondMap::atomIDsForCompound(const std::string& compoundID)
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
|
||||
auto* compound = mmcif::CompoundFactory::instance().create(compoundID);
|
||||
|
||||
if (compound == nullptr)
|
||||
throw BondMapException("Missing bond information for compound " + compoundID);
|
||||
|
||||
for (auto& compAtom: compound->atoms())
|
||||
result.push_back(compAtom.id);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
3696
src/Cif++.cpp
3696
src/Cif++.cpp
File diff suppressed because it is too large
Load Diff
3981
src/Cif2PDB.cpp
3981
src/Cif2PDB.cpp
File diff suppressed because it is too large
Load Diff
1326
src/CifParser.cpp
1326
src/CifParser.cpp
File diff suppressed because it is too large
Load Diff
1300
src/CifUtils.cpp
1300
src/CifUtils.cpp
File diff suppressed because it is too large
Load Diff
@@ -1,351 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
#include "cif++/Cif++.hpp"
|
||||
#include "cif++/CifParser.hpp"
|
||||
#include "cif++/CifValidator.hpp"
|
||||
|
||||
namespace ba = boost::algorithm;
|
||||
|
||||
extern int VERBOSE;
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
ValidationError::ValidationError(const std::string& msg)
|
||||
: mMsg(msg)
|
||||
{
|
||||
}
|
||||
|
||||
ValidationError::ValidationError(const std::string& cat, const std::string& item, const std::string& msg)
|
||||
: mMsg("When validating _" + cat + '.' + item + ": " + msg)
|
||||
{
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
DDL_PrimitiveType mapToPrimitiveType(const std::string& s)
|
||||
{
|
||||
DDL_PrimitiveType result;
|
||||
if (iequals(s, "char"))
|
||||
result = DDL_PrimitiveType::Char;
|
||||
else if (iequals(s, "uchar"))
|
||||
result = DDL_PrimitiveType::UChar;
|
||||
else if (iequals(s, "numb"))
|
||||
result = DDL_PrimitiveType::Numb;
|
||||
else
|
||||
throw ValidationError("Not a known primitive type");
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
int ValidateType::compare(const char* a, const char* b) const
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
if (*a == 0)
|
||||
result = *b == 0 ? 0 : -1;
|
||||
else if (*b == 0)
|
||||
result = *a == 0 ? 0 : +1;
|
||||
else
|
||||
{
|
||||
try
|
||||
{
|
||||
switch (mPrimitiveType)
|
||||
{
|
||||
case DDL_PrimitiveType::Numb:
|
||||
{
|
||||
double da = strtod(a, nullptr);
|
||||
double db = strtod(b, nullptr);
|
||||
|
||||
auto d = da - db;
|
||||
if (std::abs(d) > std::numeric_limits<double>::epsilon())
|
||||
{
|
||||
if (d > 0)
|
||||
result = 1;
|
||||
else if (d < 0)
|
||||
result = -1;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case DDL_PrimitiveType::UChar:
|
||||
case DDL_PrimitiveType::Char:
|
||||
{
|
||||
// CIF is guaranteed to have ascii only, therefore this primitive code will do
|
||||
// also, we're collapsing spaces
|
||||
|
||||
auto ai = a, bi = b;
|
||||
for (;;)
|
||||
{
|
||||
if (*ai == 0)
|
||||
{
|
||||
if (*bi != 0)
|
||||
result = -1;
|
||||
break;
|
||||
}
|
||||
else if (*bi == 0)
|
||||
{
|
||||
result = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
char ca = *ai;
|
||||
char cb = *bi;
|
||||
|
||||
if (mPrimitiveType == DDL_PrimitiveType::UChar)
|
||||
{
|
||||
ca = tolower(ca);
|
||||
cb = tolower(cb);
|
||||
}
|
||||
|
||||
result = ca - cb;
|
||||
|
||||
if (result != 0)
|
||||
break;
|
||||
|
||||
if (ca == ' ')
|
||||
{
|
||||
while (ai[1] == ' ')
|
||||
++ai;
|
||||
while (bi[1] == ' ')
|
||||
++bi;
|
||||
}
|
||||
|
||||
++ai;
|
||||
++bi;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (const std::invalid_argument& ex)
|
||||
{
|
||||
result = 1;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
//void ValidateItem::addLinked(ValidateItem* parent, const std::string& parentItem, const std::string& childItem)
|
||||
//{
|
||||
//// if (mParent != nullptr and VERBOSE)
|
||||
//// cerr << "replacing parent in " << mCategory->mName << " from " << mParent->mCategory->mName << " to " << parent->mCategory->mName << endl;
|
||||
//// mParent = parent;
|
||||
//
|
||||
// if (mType == nullptr and parent != nullptr)
|
||||
// mType = parent->mType;
|
||||
//
|
||||
// if (parent != nullptr)
|
||||
// {
|
||||
// mLinked.push_back({parent, parentItem, childItem});
|
||||
//
|
||||
// parent->mChildren.insert(this);
|
||||
////
|
||||
//// if (mCategory->mKeys == std::vector<std::string>{mTag})
|
||||
//// parent->mForeignKeys.insert(this);
|
||||
// }
|
||||
//}
|
||||
|
||||
void ValidateItem::operator()(std::string value) const
|
||||
{
|
||||
if (not value.empty() and value != "?" and value != ".")
|
||||
{
|
||||
if (mType != nullptr and not regex_match(value, mType->mRx))
|
||||
throw ValidationError(mCategory->mName, mTag, "Value '" + value + "' does not match type expression for type " + mType->mName);
|
||||
|
||||
if (not mEnums.empty())
|
||||
{
|
||||
if (mEnums.count(value) == 0)
|
||||
throw ValidationError(mCategory->mName, mTag, "Value '" + value + "' is not in the list of allowed values");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void ValidateCategory::addItemValidator(ValidateItem&& v)
|
||||
{
|
||||
if (v.mMandatory)
|
||||
mMandatoryFields.insert(v.mTag);
|
||||
|
||||
v.mCategory = this;
|
||||
|
||||
auto r = mItemValidators.insert(std::move(v));
|
||||
if (not r.second and VERBOSE >= 4)
|
||||
std::cout << "Could not add validator for item " << v.mTag << " to category " << mName << std::endl;
|
||||
}
|
||||
|
||||
const ValidateItem* ValidateCategory::getValidatorForItem(std::string tag) const
|
||||
{
|
||||
const ValidateItem* result = nullptr;
|
||||
auto i = mItemValidators.find(ValidateItem{tag});
|
||||
if (i != mItemValidators.end())
|
||||
result = &*i;
|
||||
else if (VERBOSE > 4)
|
||||
std::cout << "No validator for tag " << tag << std::endl;
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
Validator::Validator()
|
||||
{
|
||||
}
|
||||
|
||||
Validator::~Validator()
|
||||
{
|
||||
}
|
||||
|
||||
void Validator::addTypeValidator(ValidateType&& v)
|
||||
{
|
||||
auto r = mTypeValidators.insert(std::move(v));
|
||||
if (not r.second and VERBOSE > 4)
|
||||
std::cout << "Could not add validator for type " << v.mName << std::endl;
|
||||
}
|
||||
|
||||
const ValidateType* Validator::getValidatorForType(std::string typeCode) const
|
||||
{
|
||||
const ValidateType* result = nullptr;
|
||||
|
||||
auto i = mTypeValidators.find(ValidateType{ typeCode, DDL_PrimitiveType::Char, boost::regex() });
|
||||
if (i != mTypeValidators.end())
|
||||
result = &*i;
|
||||
else if (VERBOSE > 4)
|
||||
std::cout << "No validator for type " << typeCode << std::endl;
|
||||
return result;
|
||||
}
|
||||
|
||||
void Validator::addCategoryValidator(ValidateCategory&& v)
|
||||
{
|
||||
auto r = mCategoryValidators.insert(std::move(v));
|
||||
if (not r.second and VERBOSE > 4)
|
||||
std::cout << "Could not add validator for category " << v.mName << std::endl;
|
||||
}
|
||||
|
||||
const ValidateCategory* Validator::getValidatorForCategory(std::string category) const
|
||||
{
|
||||
const ValidateCategory* result = nullptr;
|
||||
auto i = mCategoryValidators.find(ValidateCategory{category});
|
||||
if (i != mCategoryValidators.end())
|
||||
result = &*i;
|
||||
else if (VERBOSE > 4)
|
||||
std::cout << "No validator for category " << category << std::endl;
|
||||
return result;
|
||||
}
|
||||
|
||||
ValidateItem* Validator::getValidatorForItem(std::string tag) const
|
||||
{
|
||||
ValidateItem* result = nullptr;
|
||||
|
||||
std::string cat, item;
|
||||
std::tie(cat, item) = splitTagName(tag);
|
||||
|
||||
auto* cv = getValidatorForCategory(cat);
|
||||
if (cv != nullptr)
|
||||
result = const_cast<ValidateItem*>(cv->getValidatorForItem(item));
|
||||
|
||||
if (result == nullptr and VERBOSE > 4)
|
||||
std::cout << "No validator for item " << tag << std::endl;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void Validator::addLinkValidator(ValidateLink&& v)
|
||||
{
|
||||
assert(v.mParentKeys.size() == v.mChildKeys.size());
|
||||
if (v.mParentKeys.size() != v.mChildKeys.size())
|
||||
throw std::runtime_error("unequal number of keys for parent and child in link");
|
||||
|
||||
auto pcv = getValidatorForCategory(v.mParentCategory);
|
||||
auto ccv = getValidatorForCategory(v.mChildCategory);
|
||||
|
||||
if (pcv == nullptr)
|
||||
throw std::runtime_error("unknown parent category " + v.mParentCategory);
|
||||
|
||||
if (ccv == nullptr)
|
||||
throw std::runtime_error("unknown child category " + v.mChildCategory);
|
||||
|
||||
for (size_t i = 0; i < v.mParentKeys.size(); ++i)
|
||||
{
|
||||
auto piv = pcv->getValidatorForItem(v.mParentKeys[i]);
|
||||
|
||||
if (piv == nullptr)
|
||||
throw std::runtime_error("unknown parent tag _" + v.mParentCategory + '.' + v.mParentKeys[i]);
|
||||
|
||||
auto civ = ccv->getValidatorForItem(v.mChildKeys[i]);
|
||||
if (civ == nullptr)
|
||||
throw std::runtime_error("unknown child tag _" + v.mChildCategory + '.' + v.mChildKeys[i]);
|
||||
|
||||
if (civ->mType == nullptr and piv->mType != nullptr)
|
||||
const_cast<ValidateItem*>(civ)->mType = piv->mType;
|
||||
}
|
||||
|
||||
mLinkValidators.emplace_back(std::move(v));
|
||||
}
|
||||
|
||||
std::vector<const ValidateLink*> Validator::getLinksForParent(const std::string& category) const
|
||||
{
|
||||
std::vector<const ValidateLink*> result;
|
||||
|
||||
for (auto& l: mLinkValidators)
|
||||
{
|
||||
if (l.mParentCategory == category)
|
||||
result.push_back(&l);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<const ValidateLink*> Validator::getLinksForChild(const std::string& category) const
|
||||
{
|
||||
std::vector<const ValidateLink*> result;
|
||||
|
||||
for (auto& l: mLinkValidators)
|
||||
{
|
||||
if (l.mChildCategory == category)
|
||||
result.push_back(&l);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void Validator::reportError(const std::string& msg, bool fatal)
|
||||
{
|
||||
if (mStrict or fatal)
|
||||
throw ValidationError(msg);
|
||||
else if (VERBOSE)
|
||||
std::cerr << msg << std::endl;
|
||||
}
|
||||
|
||||
}
|
||||
745
src/Compound.cpp
745
src/Compound.cpp
@@ -1,745 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <numeric>
|
||||
#include <shared_mutex>
|
||||
|
||||
#include <boost/algorithm/string.hpp>
|
||||
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
|
||||
#include "cif++/Cif++.hpp"
|
||||
#include "cif++/CifParser.hpp"
|
||||
#include "cif++/CifUtils.hpp"
|
||||
#include "cif++/Compound.hpp"
|
||||
#include "cif++/Point.hpp"
|
||||
|
||||
namespace ba = boost::algorithm;
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::string to_string(BondType bondType)
|
||||
{
|
||||
switch (bondType)
|
||||
{
|
||||
case BondType::sing: return "sing";
|
||||
case BondType::doub: return "doub";
|
||||
case BondType::trip: return "trip";
|
||||
case BondType::quad: return "quad";
|
||||
case BondType::arom: return "arom";
|
||||
case BondType::poly: return "poly";
|
||||
case BondType::delo: return "delo";
|
||||
case BondType::pi: return "pi";
|
||||
}
|
||||
throw std::invalid_argument("Invalid bondType");
|
||||
}
|
||||
|
||||
BondType from_string(const std::string &bondType)
|
||||
{
|
||||
if (cif::iequals(bondType, "sing"))
|
||||
return BondType::sing;
|
||||
if (cif::iequals(bondType, "doub"))
|
||||
return BondType::doub;
|
||||
if (cif::iequals(bondType, "trip"))
|
||||
return BondType::trip;
|
||||
if (cif::iequals(bondType, "quad"))
|
||||
return BondType::quad;
|
||||
if (cif::iequals(bondType, "arom"))
|
||||
return BondType::arom;
|
||||
if (cif::iequals(bondType, "poly"))
|
||||
return BondType::poly;
|
||||
if (cif::iequals(bondType, "delo"))
|
||||
return BondType::delo;
|
||||
if (cif::iequals(bondType, "pi"))
|
||||
return BondType::pi;
|
||||
throw std::invalid_argument("Invalid bondType: " + bondType);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Compound helper classes
|
||||
|
||||
struct CompoundAtomLess
|
||||
{
|
||||
bool operator()(const CompoundAtom &a, const CompoundAtom &b) const
|
||||
{
|
||||
int d = a.id.compare(b.id);
|
||||
if (d == 0)
|
||||
d = a.typeSymbol - b.typeSymbol;
|
||||
return d < 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct CompoundBondLess
|
||||
{
|
||||
bool operator()(const CompoundBond &a, const CompoundBond &b) const
|
||||
{
|
||||
int d = a.atomID[0].compare(b.atomID[0]);
|
||||
if (d == 0)
|
||||
d = a.atomID[1].compare(b.atomID[1]);
|
||||
if (d == 0)
|
||||
d = static_cast<int>(a.type) - static_cast<int>(b.type);
|
||||
return d < 0;
|
||||
}
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Compound
|
||||
|
||||
Compound::Compound(cif::Datablock &db)
|
||||
{
|
||||
auto &chemComp = db["chem_comp"];
|
||||
|
||||
if (chemComp.size() != 1)
|
||||
throw std::runtime_error("Invalid compound file, chem_comp should contain a single row");
|
||||
|
||||
cif::tie(mID, mName, mType, mFormula, mFormulaWeight, mFormalCharge) =
|
||||
chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge");
|
||||
|
||||
auto &chemCompAtom = db["chem_comp_atom"];
|
||||
for (auto row : chemCompAtom)
|
||||
{
|
||||
CompoundAtom atom;
|
||||
std::string typeSymbol;
|
||||
cif::tie(atom.id, typeSymbol, atom.charge, atom.aromatic, atom.leavingAtom, atom.stereoConfig, atom.x, atom.y, atom.z) =
|
||||
row.get("atom_id", "type_symbol", "charge", "pdbx_aromatic_flag", "pdbx_leaving_atom_flag", "pdbx_stereo_config",
|
||||
"model_Cartn_x", "model_Cartn_y", "model_Cartn_z");
|
||||
atom.typeSymbol = AtomTypeTraits(typeSymbol).type();
|
||||
mAtoms.push_back(std::move(atom));
|
||||
}
|
||||
|
||||
auto &chemCompBond = db["chem_comp_bond"];
|
||||
for (auto row : chemCompBond)
|
||||
{
|
||||
CompoundBond bond;
|
||||
std::string valueOrder;
|
||||
cif::tie(bond.atomID[0], bond.atomID[1], valueOrder, bond.aromatic, bond.stereoConfig) = row.get("atom_id_1", "atom_id_2", "value_order", "pdbx_aromatic_flag", "pdbx_stereo_config");
|
||||
bond.type = from_string(valueOrder);
|
||||
mBonds.push_back(std::move(bond));
|
||||
}
|
||||
}
|
||||
|
||||
Compound::Compound(cif::Datablock &db, const std::string &id, const std::string &name, const std::string &type)
|
||||
: mID(id)
|
||||
, mName(name)
|
||||
, mType(type)
|
||||
{
|
||||
auto &chemCompAtom = db["chem_comp_atom"];
|
||||
for (auto row : chemCompAtom)
|
||||
{
|
||||
CompoundAtom atom;
|
||||
std::string typeSymbol;
|
||||
cif::tie(atom.id, typeSymbol, atom.charge, atom.x, atom.y, atom.z) =
|
||||
row.get("atom_id", "type_symbol", "charge", "x", "y", "z");
|
||||
atom.typeSymbol = AtomTypeTraits(typeSymbol).type();
|
||||
|
||||
mFormalCharge += atom.charge;
|
||||
mFormulaWeight += AtomTypeTraits(atom.typeSymbol).weight();
|
||||
|
||||
mAtoms.push_back(std::move(atom));
|
||||
}
|
||||
|
||||
auto &chemCompBond = db["chem_comp_bond"];
|
||||
for (auto row : chemCompBond)
|
||||
{
|
||||
CompoundBond bond;
|
||||
std::string btype;
|
||||
cif::tie(bond.atomID[0], bond.atomID[1], btype, bond.aromatic) = row.get("atom_id_1", "atom_id_2", "type", "aromatic");
|
||||
|
||||
using cif::iequals;
|
||||
|
||||
if (iequals(btype, "single"))
|
||||
bond.type = BondType::sing;
|
||||
else if (iequals(btype, "double"))
|
||||
bond.type = BondType::doub;
|
||||
else if (iequals(btype, "triple"))
|
||||
bond.type = BondType::trip;
|
||||
else if (iequals(btype, "deloc") or iequals(btype, "aromat") or iequals(btype, "aromatic"))
|
||||
bond.type = BondType::delo;
|
||||
else
|
||||
{
|
||||
if (cif::VERBOSE)
|
||||
std::cerr << "Unimplemented chem_comp_bond.type " << btype << " in " << id << std::endl;
|
||||
bond.type = BondType::sing;
|
||||
}
|
||||
mBonds.push_back(std::move(bond));
|
||||
}
|
||||
}
|
||||
|
||||
CompoundAtom Compound::getAtomByID(const std::string &atomID) const
|
||||
{
|
||||
CompoundAtom result = {};
|
||||
for (auto &a : mAtoms)
|
||||
{
|
||||
if (a.id == atomID)
|
||||
{
|
||||
result = a;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (result.id != atomID)
|
||||
throw std::out_of_range("No atom " + atomID + " in Compound " + mID);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool Compound::atomsBonded(const std::string &atomId_1, const std::string &atomId_2) const
|
||||
{
|
||||
auto i = find_if(mBonds.begin(), mBonds.end(),
|
||||
[&](const CompoundBond &b) {
|
||||
return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2) or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
|
||||
});
|
||||
|
||||
return i != mBonds.end();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// a factory class to generate compounds
|
||||
|
||||
CIFPP_EXPORT const std::map<std::string, char> kAAMap{
|
||||
{"ALA", 'A'},
|
||||
{"ARG", 'R'},
|
||||
{"ASN", 'N'},
|
||||
{"ASP", 'D'},
|
||||
{"CYS", 'C'},
|
||||
{"GLN", 'Q'},
|
||||
{"GLU", 'E'},
|
||||
{"GLY", 'G'},
|
||||
{"HIS", 'H'},
|
||||
{"ILE", 'I'},
|
||||
{"LEU", 'L'},
|
||||
{"LYS", 'K'},
|
||||
{"MET", 'M'},
|
||||
{"PHE", 'F'},
|
||||
{"PRO", 'P'},
|
||||
{"SER", 'S'},
|
||||
{"THR", 'T'},
|
||||
{"TRP", 'W'},
|
||||
{"TYR", 'Y'},
|
||||
{"VAL", 'V'},
|
||||
{"GLX", 'Z'},
|
||||
{"ASX", 'B'}};
|
||||
|
||||
CIFPP_EXPORT const std::map<std::string, char> kBaseMap{
|
||||
{"A", 'A'},
|
||||
{"C", 'C'},
|
||||
{"G", 'G'},
|
||||
{"T", 'T'},
|
||||
{"U", 'U'},
|
||||
{"DA", 'A'},
|
||||
{"DC", 'C'},
|
||||
{"DG", 'G'},
|
||||
{"DT", 'T'}};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class CompoundFactoryImpl : public std::enable_shared_from_this<CompoundFactoryImpl>
|
||||
{
|
||||
public:
|
||||
CompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next);
|
||||
|
||||
CompoundFactoryImpl(const std::filesystem::path &file, std::shared_ptr<CompoundFactoryImpl> next);
|
||||
|
||||
virtual ~CompoundFactoryImpl()
|
||||
{
|
||||
for (auto c: mCompounds)
|
||||
delete c;
|
||||
}
|
||||
|
||||
Compound *get(std::string id)
|
||||
{
|
||||
std::shared_lock lock(mMutex);
|
||||
|
||||
ba::to_upper(id);
|
||||
|
||||
Compound *result = nullptr;
|
||||
|
||||
// walk the list, see if any of us has the compound already
|
||||
for (auto impl = shared_from_this(); impl; impl = impl->mNext)
|
||||
{
|
||||
for (auto cmp : impl->mCompounds)
|
||||
{
|
||||
if (cmp->id() == id)
|
||||
{
|
||||
result = cmp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (result)
|
||||
break;
|
||||
}
|
||||
|
||||
if (result == nullptr and mMissing.count(id) == 0)
|
||||
{
|
||||
for (auto impl = shared_from_this(); impl; impl = impl->mNext)
|
||||
{
|
||||
result = impl->create(id);
|
||||
if (result != nullptr)
|
||||
break;
|
||||
}
|
||||
|
||||
if (result == nullptr)
|
||||
mMissing.insert(id);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::shared_ptr<CompoundFactoryImpl> next() const
|
||||
{
|
||||
return mNext;
|
||||
}
|
||||
|
||||
bool isKnownPeptide(const std::string &resName)
|
||||
{
|
||||
return mKnownPeptides.count(resName) or
|
||||
(mNext and mNext->isKnownPeptide(resName));
|
||||
}
|
||||
|
||||
bool isKnownBase(const std::string &resName)
|
||||
{
|
||||
return mKnownBases.count(resName) or
|
||||
(mNext and mNext->isKnownBase(resName));
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
virtual Compound *create(const std::string &id)
|
||||
{
|
||||
// For the base class we assume every compound is preloaded
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::shared_timed_mutex mMutex;
|
||||
|
||||
std::vector<Compound *> mCompounds;
|
||||
std::set<std::string> mKnownPeptides;
|
||||
std::set<std::string> mKnownBases;
|
||||
std::set<std::string> mMissing;
|
||||
std::shared_ptr<CompoundFactoryImpl> mNext;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
CompoundFactoryImpl::CompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next)
|
||||
: mNext(next)
|
||||
{
|
||||
for (const auto &[key, value] : kAAMap)
|
||||
mKnownPeptides.insert(key);
|
||||
|
||||
for (const auto &[key, value] : kBaseMap)
|
||||
mKnownBases.insert(key);
|
||||
}
|
||||
|
||||
CompoundFactoryImpl::CompoundFactoryImpl(const std::filesystem::path &file, std::shared_ptr<CompoundFactoryImpl> next)
|
||||
: mNext(next)
|
||||
{
|
||||
cif::File cifFile(file);
|
||||
|
||||
auto compList = cifFile.get("comp_list");
|
||||
if (compList) // So this is a CCP4 restraints file, special handling
|
||||
{
|
||||
auto &chemComp = (*compList)["chem_comp"];
|
||||
|
||||
for (const auto &[id, name, group] : chemComp.rows<std::string, std::string, std::string>("id", "name", "group"))
|
||||
{
|
||||
std::string type;
|
||||
|
||||
// known groups are (counted from ccp4 monomer dictionary)
|
||||
|
||||
// D-pyranose
|
||||
// DNA
|
||||
// L-PEPTIDE LINKING
|
||||
// L-SACCHARIDE
|
||||
// L-peptide
|
||||
// L-pyranose
|
||||
// M-peptide
|
||||
// NON-POLYMER
|
||||
// P-peptide
|
||||
// RNA
|
||||
// furanose
|
||||
// non-polymer
|
||||
// non_polymer
|
||||
// peptide
|
||||
// pyranose
|
||||
// saccharide
|
||||
|
||||
if (cif::iequals(id, "gly"))
|
||||
type = "peptide linking";
|
||||
else if (cif::iequals(group, "l-peptide") or cif::iequals(group, "L-peptide linking") or cif::iequals(group, "peptide"))
|
||||
type = "L-peptide linking";
|
||||
else if (cif::iequals(group, "DNA"))
|
||||
type = "DNA linking";
|
||||
else if (cif::iequals(group, "RNA"))
|
||||
type = "RNA linking";
|
||||
else
|
||||
type = "non-polymer";
|
||||
|
||||
auto &db = cifFile["comp_" + id];
|
||||
|
||||
mCompounds.push_back(new Compound(db, id, name, type));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// A CCD components file, validate it first
|
||||
cifFile.loadDictionary("mmcif_pdbx_v50");
|
||||
|
||||
if (not cifFile.isValid())
|
||||
throw std::runtime_error("Invalid compound file");
|
||||
|
||||
for (auto &db : cifFile)
|
||||
mCompounds.push_back(new Compound(db));
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Version for the default compounds, based on the cached components.cif file from CCD
|
||||
|
||||
class CCDCompoundFactoryImpl : public CompoundFactoryImpl
|
||||
{
|
||||
public:
|
||||
CCDCompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next, const fs::path& file)
|
||||
: CompoundFactoryImpl(next)
|
||||
, mCompoundsFile(file)
|
||||
{
|
||||
}
|
||||
|
||||
CCDCompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next)
|
||||
: CompoundFactoryImpl(next)
|
||||
{
|
||||
}
|
||||
|
||||
Compound *create(const std::string &id) override;
|
||||
|
||||
cif::DatablockIndex mIndex;
|
||||
fs::path mCompoundsFile;
|
||||
};
|
||||
|
||||
Compound *CCDCompoundFactoryImpl::create(const std::string &id)
|
||||
{
|
||||
Compound *result = nullptr;
|
||||
|
||||
std::unique_ptr<std::istream> ccd;
|
||||
|
||||
if (mCompoundsFile.empty())
|
||||
{
|
||||
ccd = cif::loadResource("components.cif");
|
||||
if (not ccd)
|
||||
throw std::runtime_error("Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-dictionary-script to fetch the data.");
|
||||
}
|
||||
else
|
||||
ccd.reset(new std::ifstream(mCompoundsFile));
|
||||
|
||||
cif::File file;
|
||||
|
||||
if (mIndex.empty())
|
||||
{
|
||||
if (cif::VERBOSE > 1)
|
||||
{
|
||||
std::cout << "Creating component index "
|
||||
<< "...";
|
||||
std::cout.flush();
|
||||
}
|
||||
|
||||
cif::Parser parser(*ccd, file, false);
|
||||
mIndex = parser.indexDatablocks();
|
||||
|
||||
if (cif::VERBOSE > 1)
|
||||
std::cout << " done" << std::endl;
|
||||
|
||||
// reload the resource, perhaps this should be improved...
|
||||
if (mCompoundsFile.empty())
|
||||
{
|
||||
ccd = cif::loadResource("components.cif");
|
||||
if (not ccd)
|
||||
throw std::runtime_error("Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-dictionary-script to fetch the data.");
|
||||
}
|
||||
else
|
||||
ccd.reset(new std::ifstream(mCompoundsFile));
|
||||
}
|
||||
|
||||
if (cif::VERBOSE > 1)
|
||||
{
|
||||
std::cout << "Loading component " << id << "...";
|
||||
std::cout.flush();
|
||||
}
|
||||
|
||||
cif::Parser parser(*ccd, file, false);
|
||||
parser.parseSingleDatablock(id, mIndex);
|
||||
|
||||
if (cif::VERBOSE > 1)
|
||||
std::cout << " done" << std::endl;
|
||||
|
||||
if (not file.empty())
|
||||
{
|
||||
auto &db = file.firstDatablock();
|
||||
if (db.getName() == id)
|
||||
{
|
||||
result = new Compound(db);
|
||||
|
||||
std::shared_lock lock(mMutex);
|
||||
mCompounds.push_back(result);
|
||||
}
|
||||
}
|
||||
|
||||
if (result == nullptr and cif::VERBOSE)
|
||||
std::cerr << "Could not locate compound " << id << " in the CCD components file" << std::endl;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Version for the default compounds, based on the data found in CCP4's monomers lib
|
||||
|
||||
class CCP4CompoundFactoryImpl : public CompoundFactoryImpl
|
||||
{
|
||||
public:
|
||||
CCP4CompoundFactoryImpl(const fs::path &clibd_mon, std::shared_ptr<CompoundFactoryImpl> next = nullptr);
|
||||
|
||||
Compound *create(const std::string &id) override;
|
||||
|
||||
private:
|
||||
cif::File mFile;
|
||||
fs::path mCLIBD_MON;
|
||||
};
|
||||
|
||||
CCP4CompoundFactoryImpl::CCP4CompoundFactoryImpl(const fs::path &clibd_mon, std::shared_ptr<CompoundFactoryImpl> next)
|
||||
: CompoundFactoryImpl(next)
|
||||
, mFile((clibd_mon / "list" / "mon_lib_list.cif").string())
|
||||
, mCLIBD_MON(clibd_mon)
|
||||
{
|
||||
const std::regex peptideRx("(?:[lmp]-)?peptide", std::regex::icase);
|
||||
|
||||
auto &chemComps = mFile["comp_list"]["chem_comp"];
|
||||
|
||||
for (const auto &[group, threeLetterCode] : chemComps.rows<std::string, std::string>("group", "three_letter_code"))
|
||||
{
|
||||
if (std::regex_match(group, peptideRx))
|
||||
mKnownPeptides.insert(threeLetterCode);
|
||||
else if (ba::iequals(group, "DNA") or ba::iequals(group, "RNA"))
|
||||
mKnownBases.insert(threeLetterCode);
|
||||
}
|
||||
}
|
||||
|
||||
Compound *CCP4CompoundFactoryImpl::create(const std::string &id)
|
||||
{
|
||||
Compound *result = nullptr;
|
||||
|
||||
auto &cat = mFile["comp_list"]["chem_comp"];
|
||||
|
||||
auto rs = cat.find(cif::Key("three_letter_code") == id);
|
||||
|
||||
if (rs.size() == 1)
|
||||
{
|
||||
auto row = rs.front();
|
||||
|
||||
std::string name, group;
|
||||
uint32_t numberAtomsAll, numberAtomsNh;
|
||||
cif::tie(name, group, numberAtomsAll, numberAtomsNh) =
|
||||
row.get("name", "group", "number_atoms_all", "number_atoms_nh");
|
||||
|
||||
fs::path resFile = mCLIBD_MON / ba::to_lower_copy(id.substr(0, 1)) / (id + ".cif");
|
||||
|
||||
if (not fs::exists(resFile) and (id == "COM" or id == "CON" or "PRN")) // seriously...
|
||||
resFile = mCLIBD_MON / ba::to_lower_copy(id.substr(0, 1)) / (id + '_' + id + ".cif");
|
||||
|
||||
if (fs::exists(resFile))
|
||||
{
|
||||
cif::File cf(resFile.string());
|
||||
|
||||
// locate the datablock
|
||||
auto &db = cf["comp_" + id];
|
||||
|
||||
std::string type;
|
||||
|
||||
// known groups are (counted from ccp4 monomer dictionary)
|
||||
|
||||
// D-pyranose
|
||||
// DNA
|
||||
// L-PEPTIDE LINKING
|
||||
// L-SACCHARIDE
|
||||
// L-peptide
|
||||
// L-pyranose
|
||||
// M-peptide
|
||||
// NON-POLYMER
|
||||
// P-peptide
|
||||
// RNA
|
||||
// furanose
|
||||
// non-polymer
|
||||
// non_polymer
|
||||
// peptide
|
||||
// pyranose
|
||||
// saccharide
|
||||
|
||||
if (cif::iequals(id, "gly"))
|
||||
type = "peptide linking";
|
||||
else if (cif::iequals(group, "l-peptide") or cif::iequals(group, "L-peptide linking") or cif::iequals(group, "peptide"))
|
||||
type = "L-peptide linking";
|
||||
else if (cif::iequals(group, "DNA"))
|
||||
type = "DNA linking";
|
||||
else if (cif::iequals(group, "RNA"))
|
||||
type = "RNA linking";
|
||||
else
|
||||
type = "non-polymer";
|
||||
|
||||
mCompounds.push_back(new Compound(db, id, name, type));
|
||||
result = mCompounds.back();
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::unique_ptr<CompoundFactory> CompoundFactory::sInstance;
|
||||
thread_local std::unique_ptr<CompoundFactory> CompoundFactory::tlInstance;
|
||||
bool CompoundFactory::sUseThreadLocalInstance;
|
||||
|
||||
void CompoundFactory::init(bool useThreadLocalInstanceOnly)
|
||||
{
|
||||
sUseThreadLocalInstance = useThreadLocalInstanceOnly;
|
||||
}
|
||||
|
||||
CompoundFactory::CompoundFactory()
|
||||
: mImpl(nullptr)
|
||||
{
|
||||
const char *clibd_mon = getenv("CLIBD_MON");
|
||||
if (clibd_mon != nullptr and fs::is_directory(clibd_mon))
|
||||
mImpl.reset(new CCP4CompoundFactoryImpl(clibd_mon));
|
||||
else if (cif::VERBOSE)
|
||||
std::cerr << "CCP4 monomers library not found, CLIBD_MON is not defined" << std::endl;
|
||||
|
||||
auto ccd = cif::loadResource("components.cif");
|
||||
if (ccd)
|
||||
mImpl.reset(new CCDCompoundFactoryImpl(mImpl));
|
||||
else if (cif::VERBOSE)
|
||||
std::cerr << "CCD components.cif file was not found" << std::endl;
|
||||
}
|
||||
|
||||
CompoundFactory::~CompoundFactory()
|
||||
{
|
||||
}
|
||||
|
||||
CompoundFactory &CompoundFactory::instance()
|
||||
{
|
||||
if (sUseThreadLocalInstance)
|
||||
{
|
||||
if (not tlInstance)
|
||||
tlInstance.reset(new CompoundFactory());
|
||||
return *tlInstance;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (not sInstance)
|
||||
sInstance.reset(new CompoundFactory());
|
||||
return *sInstance;
|
||||
}
|
||||
}
|
||||
|
||||
void CompoundFactory::clear()
|
||||
{
|
||||
if (sUseThreadLocalInstance)
|
||||
tlInstance.reset(nullptr);
|
||||
else
|
||||
sInstance.reset();
|
||||
}
|
||||
|
||||
void CompoundFactory::setDefaultDictionary(const std::filesystem::path &inDictFile)
|
||||
{
|
||||
if (not fs::exists(inDictFile))
|
||||
throw std::runtime_error("file not found: " + inDictFile.string());
|
||||
|
||||
try
|
||||
{
|
||||
mImpl.reset(new CCDCompoundFactoryImpl(mImpl, inDictFile));
|
||||
}
|
||||
catch (const std::exception &)
|
||||
{
|
||||
std::cerr << "Error loading dictionary " << inDictFile << std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void CompoundFactory::pushDictionary(const std::filesystem::path &inDictFile)
|
||||
{
|
||||
if (not fs::exists(inDictFile))
|
||||
throw std::runtime_error("file not found: " + inDictFile.string());
|
||||
|
||||
// ifstream file(inDictFile);
|
||||
// if (not file.is_open())
|
||||
// throw std::runtime_error("Could not open peptide list " + inDictFile);
|
||||
|
||||
try
|
||||
{
|
||||
mImpl.reset(new CompoundFactoryImpl(inDictFile, mImpl));
|
||||
}
|
||||
catch (const std::exception &)
|
||||
{
|
||||
std::cerr << "Error loading dictionary " << inDictFile << std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void CompoundFactory::popDictionary()
|
||||
{
|
||||
if (mImpl)
|
||||
mImpl = mImpl->next();
|
||||
}
|
||||
|
||||
const Compound *CompoundFactory::create(std::string id)
|
||||
{
|
||||
// static bool warned = false;
|
||||
|
||||
// if (mImpl and warned == false)
|
||||
// {
|
||||
// std::cerr << "Warning: no compound information library was found, resulting data may be incorrect or incomplete" << std::endl;
|
||||
// warned = true;
|
||||
// }
|
||||
|
||||
return mImpl ? mImpl->get(id) : nullptr;
|
||||
}
|
||||
|
||||
bool CompoundFactory::isKnownPeptide(const std::string &resName) const
|
||||
{
|
||||
return mImpl ? mImpl->isKnownPeptide(resName) : kAAMap.count(resName) > 0;
|
||||
}
|
||||
|
||||
bool CompoundFactory::isKnownBase(const std::string &resName) const
|
||||
{
|
||||
return mImpl ? mImpl->isKnownBase(resName) : kBaseMap.count(resName) > 0;
|
||||
}
|
||||
|
||||
} // namespace mmcif
|
||||
@@ -1,11 +0,0 @@
|
||||
/* Define to the name of this package. */
|
||||
#cmakedefine PACKAGE_NAME "@PACKAGE_NAME@"
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#cmakedefine PACKAGE_VERSION "@PACKAGE_VERSION@"
|
||||
|
||||
/* Define the complete package string */
|
||||
#cmakedefine PACKAGE_STRING "@PACKAGE_STRING@"
|
||||
|
||||
/* Using resources? */
|
||||
#cmakedefine USE_RSRC @USE_RSRC@
|
||||
@@ -1,113 +0,0 @@
|
||||
/* src/Config.hpp.in. Generated from configure.ac by autoheader. */
|
||||
|
||||
/* define if the Boost library is available */
|
||||
#undef HAVE_BOOST
|
||||
|
||||
/* define if the Boost::Date_Time library is available */
|
||||
#undef HAVE_BOOST_DATE_TIME
|
||||
|
||||
/* define if the Boost::IOStreams library is available */
|
||||
#undef HAVE_BOOST_IOSTREAMS
|
||||
|
||||
/* define if the Boost::Regex library is available */
|
||||
#undef HAVE_BOOST_REGEX
|
||||
|
||||
/* define if the compiler supports basic C++17 syntax */
|
||||
#undef HAVE_CXX17
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
#undef HAVE_DLFCN_H
|
||||
|
||||
/* Define to 1 if you have the `floor' function. */
|
||||
#undef HAVE_FLOOR
|
||||
|
||||
/* Define to 1 if you have the <inttypes.h> header file. */
|
||||
#undef HAVE_INTTYPES_H
|
||||
|
||||
/* Define to 1 if you have the <memory.h> header file. */
|
||||
#undef HAVE_MEMORY_H
|
||||
|
||||
/* Define to 1 if you have the `pow' function. */
|
||||
#undef HAVE_POW
|
||||
|
||||
/* Define if you have POSIX threads libraries and header files. */
|
||||
#undef HAVE_PTHREAD
|
||||
|
||||
/* Have PTHREAD_PRIO_INHERIT. */
|
||||
#undef HAVE_PTHREAD_PRIO_INHERIT
|
||||
|
||||
/* Define to 1 if the system has the type `ptrdiff_t'. */
|
||||
#undef HAVE_PTRDIFF_T
|
||||
|
||||
/* Define to 1 if you have the `rint' function. */
|
||||
#undef HAVE_RINT
|
||||
|
||||
/* Define to 1 if you have the `sqrt' function. */
|
||||
#undef HAVE_SQRT
|
||||
|
||||
/* Define to 1 if you have the <stdint.h> header file. */
|
||||
#undef HAVE_STDINT_H
|
||||
|
||||
/* Define to 1 if you have the <stdlib.h> header file. */
|
||||
#undef HAVE_STDLIB_H
|
||||
|
||||
/* Define to 1 if you have the `strchr' function. */
|
||||
#undef HAVE_STRCHR
|
||||
|
||||
/* Define to 1 if you have the `strerror' function. */
|
||||
#undef HAVE_STRERROR
|
||||
|
||||
/* Define to 1 if you have the <strings.h> header file. */
|
||||
#undef HAVE_STRINGS_H
|
||||
|
||||
/* Define to 1 if you have the <string.h> header file. */
|
||||
#undef HAVE_STRING_H
|
||||
|
||||
/* Define to 1 if you have the <sys/ioctl.h> header file. */
|
||||
#undef HAVE_SYS_IOCTL_H
|
||||
|
||||
/* Define to 1 if you have the <sys/stat.h> header file. */
|
||||
#undef HAVE_SYS_STAT_H
|
||||
|
||||
/* Define to 1 if you have the <sys/types.h> header file. */
|
||||
#undef HAVE_SYS_TYPES_H
|
||||
|
||||
/* Define to 1 if you have the <termios.h> header file. */
|
||||
#undef HAVE_TERMIOS_H
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
#undef HAVE_UNISTD_H
|
||||
|
||||
/* Define to 1 if the system has the type `_Bool'. */
|
||||
#undef HAVE__BOOL
|
||||
|
||||
/* Define to the sub-directory where libtool stores uninstalled libraries. */
|
||||
#undef LT_OBJDIR
|
||||
|
||||
/* Define to the address where bug reports for this package should be sent. */
|
||||
#undef PACKAGE_BUGREPORT
|
||||
|
||||
/* Define to the full name of this package. */
|
||||
#undef PACKAGE_NAME
|
||||
|
||||
/* Define to the full name and version of this package. */
|
||||
#undef PACKAGE_STRING
|
||||
|
||||
/* Define to the one symbol short name of this package. */
|
||||
#undef PACKAGE_TARNAME
|
||||
|
||||
/* Define to the home page for this package. */
|
||||
#undef PACKAGE_URL
|
||||
|
||||
/* Define to the version of this package. */
|
||||
#undef PACKAGE_VERSION
|
||||
|
||||
/* Define to necessary symbol if this constant uses a non-standard name on
|
||||
your system. */
|
||||
#undef PTHREAD_CREATE_JOINABLE
|
||||
|
||||
/* Define to 1 if you have the ANSI C header files. */
|
||||
#undef STDC_HEADERS
|
||||
|
||||
/* Use mrc to store resources */
|
||||
#undef USE_RSRC
|
||||
File diff suppressed because it is too large
Load Diff
306
src/Point.cpp
306
src/Point.cpp
@@ -1,306 +0,0 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <random>
|
||||
#include <valarray>
|
||||
|
||||
#include "cif++/Point.hpp"
|
||||
#include "cif++/Matrix.hpp"
|
||||
|
||||
namespace mmcif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
Quaternion Normalize(Quaternion q)
|
||||
{
|
||||
std::valarray<double> t(4);
|
||||
|
||||
t[0] = q.R_component_1();
|
||||
t[1] = q.R_component_2();
|
||||
t[2] = q.R_component_3();
|
||||
t[3] = q.R_component_4();
|
||||
|
||||
t *= t;
|
||||
|
||||
double length = std::sqrt(t.sum());
|
||||
|
||||
if (length > 0.001)
|
||||
q /= static_cast<Quaternion::value_type>(length);
|
||||
else
|
||||
q = Quaternion(1, 0, 0, 0);
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::tuple<double,Point> QuaternionToAngleAxis(Quaternion q)
|
||||
{
|
||||
if (q.R_component_1() > 1)
|
||||
q = Normalize(q);
|
||||
|
||||
// angle:
|
||||
double angle = 2 * acos(q.R_component_1());
|
||||
angle = angle * 180 / kPI;
|
||||
|
||||
// axis:
|
||||
float s = std::sqrt(1 - q.R_component_1() * q.R_component_1());
|
||||
if (s < 0.001)
|
||||
s = 1;
|
||||
|
||||
Point axis(q.R_component_2() / s, q.R_component_3() / s, q.R_component_4() / s);
|
||||
|
||||
return std::make_tuple(angle, axis);
|
||||
}
|
||||
|
||||
Point CenterPoints(std::vector<Point>& Points)
|
||||
{
|
||||
Point t;
|
||||
|
||||
for (Point& pt : Points)
|
||||
{
|
||||
t.mX += pt.mX;
|
||||
t.mY += pt.mY;
|
||||
t.mZ += pt.mZ;
|
||||
}
|
||||
|
||||
t.mX /= Points.size();
|
||||
t.mY /= Points.size();
|
||||
t.mZ /= Points.size();
|
||||
|
||||
for (Point& pt : Points)
|
||||
{
|
||||
pt.mX -= t.mX;
|
||||
pt.mY -= t.mY;
|
||||
pt.mZ -= t.mZ;
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
Point Centroid(std::vector<Point>& Points)
|
||||
{
|
||||
Point result;
|
||||
|
||||
for (Point& pt : Points)
|
||||
result += pt;
|
||||
|
||||
result /= static_cast<float>(Points.size());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
double RMSd(const std::vector<Point>& a, const std::vector<Point>& b)
|
||||
{
|
||||
double sum = 0;
|
||||
for (uint32_t i = 0; i < a.size(); ++i)
|
||||
{
|
||||
std::valarray<double> d(3);
|
||||
|
||||
d[0] = b[i].mX - a[i].mX;
|
||||
d[1] = b[i].mY - a[i].mY;
|
||||
d[2] = b[i].mZ - a[i].mZ;
|
||||
|
||||
d *= d;
|
||||
|
||||
sum += d.sum();
|
||||
}
|
||||
|
||||
return std::sqrt(sum / a.size());
|
||||
}
|
||||
|
||||
// The next function returns the largest solution for a quartic equation
|
||||
// based on Ferrari's algorithm.
|
||||
// A depressed quartic is of the form:
|
||||
//
|
||||
// x^4 + ax^2 + bx + c = 0
|
||||
//
|
||||
// (since I'm too lazy to find out a better way, I've implemented the
|
||||
// routine using complex values to avoid nan's as a result of taking
|
||||
// sqrt of a negative number)
|
||||
double LargestDepressedQuarticSolution(double a, double b, double c)
|
||||
{
|
||||
std::complex<double> P = - (a * a) / 12 - c;
|
||||
std::complex<double> Q = - (a * a * a) / 108 + (a * c) / 3 - (b * b) / 8;
|
||||
std::complex<double> R = - Q / 2.0 + std::sqrt((Q * Q) / 4.0 + (P * P * P) / 27.0);
|
||||
|
||||
std::complex<double> U = std::pow(R, 1 / 3.0);
|
||||
|
||||
std::complex<double> y;
|
||||
if (U == 0.0)
|
||||
y = -5.0 * a / 6.0 + U - std::pow(Q, 1.0 / 3.0);
|
||||
else
|
||||
y = -5.0 * a / 6.0 + U - P / (3.0 * U);
|
||||
|
||||
std::complex<double> W = std::sqrt(a + 2.0 * y);
|
||||
|
||||
// And to get the final result:
|
||||
// result = (±W + std::sqrt(-(3 * alpha + 2 * y ± 2 * beta / W))) / 2;
|
||||
// We want the largest result, so:
|
||||
|
||||
std::valarray<double> t(4);
|
||||
|
||||
t[0] = (( W + std::sqrt(-(3.0 * a + 2.0 * y + 2.0 * b / W))) / 2.0).real();
|
||||
t[1] = (( W + std::sqrt(-(3.0 * a + 2.0 * y - 2.0 * b / W))) / 2.0).real();
|
||||
t[2] = ((-W + std::sqrt(-(3.0 * a + 2.0 * y + 2.0 * b / W))) / 2.0).real();
|
||||
t[3] = ((-W + std::sqrt(-(3.0 * a + 2.0 * y - 2.0 * b / W))) / 2.0).real();
|
||||
|
||||
return t.max();
|
||||
}
|
||||
|
||||
Quaternion AlignPoints(const std::vector<Point>& pa, const std::vector<Point>& pb)
|
||||
{
|
||||
// First calculate M, a 3x3 Matrix containing the sums of products of the coordinates of A and B
|
||||
Matrix<double> M(3, 3, 0);
|
||||
|
||||
for (uint32_t i = 0; i < pa.size(); ++i)
|
||||
{
|
||||
const Point& a = pa[i];
|
||||
const Point& b = pb[i];
|
||||
|
||||
M(0, 0) += a.mX * b.mX; M(0, 1) += a.mX * b.mY; M(0, 2) += a.mX * b.mZ;
|
||||
M(1, 0) += a.mY * b.mX; M(1, 1) += a.mY * b.mY; M(1, 2) += a.mY * b.mZ;
|
||||
M(2, 0) += a.mZ * b.mX; M(2, 1) += a.mZ * b.mY; M(2, 2) += a.mZ * b.mZ;
|
||||
}
|
||||
|
||||
// Now calculate N, a symmetric 4x4 Matrix
|
||||
SymmetricMatrix<double> N(4);
|
||||
|
||||
N(0, 0) = M(0, 0) + M(1, 1) + M(2, 2);
|
||||
N(0, 1) = M(1, 2) - M(2, 1);
|
||||
N(0, 2) = M(2, 0) - M(0, 2);
|
||||
N(0, 3) = M(0, 1) - M(1, 0);
|
||||
|
||||
N(1, 1) = M(0, 0) - M(1, 1) - M(2, 2);
|
||||
N(1, 2) = M(0, 1) + M(1, 0);
|
||||
N(1, 3) = M(0, 2) + M(2, 0);
|
||||
|
||||
N(2, 2) = -M(0, 0) + M(1, 1) - M(2, 2);
|
||||
N(2, 3) = M(1, 2) + M(2, 1);
|
||||
|
||||
N(3, 3) = -M(0, 0) - M(1, 1) + M(2, 2);
|
||||
|
||||
// det(N - λI) = 0
|
||||
// find the largest λ (λm)
|
||||
//
|
||||
// Aλ4 + Bλ3 + Cλ2 + Dλ + E = 0
|
||||
// A = 1
|
||||
// B = 0
|
||||
// and so this is a so-called depressed quartic
|
||||
// solve it using Ferrari's algorithm
|
||||
|
||||
double C = -2 * (
|
||||
M(0, 0) * M(0, 0) + M(0, 1) * M(0, 1) + M(0, 2) * M(0, 2) +
|
||||
M(1, 0) * M(1, 0) + M(1, 1) * M(1, 1) + M(1, 2) * M(1, 2) +
|
||||
M(2, 0) * M(2, 0) + M(2, 1) * M(2, 1) + M(2, 2) * M(2, 2));
|
||||
|
||||
double D = 8 * (M(0, 0) * M(1, 2) * M(2, 1) +
|
||||
M(1, 1) * M(2, 0) * M(0, 2) +
|
||||
M(2, 2) * M(0, 1) * M(1, 0)) -
|
||||
8 * (M(0, 0) * M(1, 1) * M(2, 2) +
|
||||
M(1, 2) * M(2, 0) * M(0, 1) +
|
||||
M(2, 1) * M(1, 0) * M(0, 2));
|
||||
|
||||
double E =
|
||||
(N(0,0) * N(1,1) - N(0,1) * N(0,1)) * (N(2,2) * N(3,3) - N(2,3) * N(2,3)) +
|
||||
(N(0,1) * N(0,2) - N(0,0) * N(2,1)) * (N(2,1) * N(3,3) - N(2,3) * N(1,3)) +
|
||||
(N(0,0) * N(1,3) - N(0,1) * N(0,3)) * (N(2,1) * N(2,3) - N(2,2) * N(1,3)) +
|
||||
(N(0,1) * N(2,1) - N(1,1) * N(0,2)) * (N(0,2) * N(3,3) - N(2,3) * N(0,3)) +
|
||||
(N(1,1) * N(0,3) - N(0,1) * N(1,3)) * (N(0,2) * N(2,3) - N(2,2) * N(0,3)) +
|
||||
(N(0,2) * N(1,3) - N(2,1) * N(0,3)) * (N(0,2) * N(1,3) - N(2,1) * N(0,3));
|
||||
|
||||
// solve quartic
|
||||
double lm = LargestDepressedQuarticSolution(C, D, E);
|
||||
|
||||
// calculate t = (N - λI)
|
||||
Matrix<double> li = IdentityMatrix<double>(4) * lm;
|
||||
Matrix<double> t = N - li;
|
||||
|
||||
// calculate a Matrix of cofactors for t
|
||||
Matrix<double> cf(4, 4);
|
||||
|
||||
const uint32_t ixs[4][3] =
|
||||
{
|
||||
{ 1, 2, 3 },
|
||||
{ 0, 2, 3 },
|
||||
{ 0, 1, 3 },
|
||||
{ 0, 1, 2 }
|
||||
};
|
||||
|
||||
uint32_t maxR = 0;
|
||||
for (uint32_t r = 0; r < 4; ++r)
|
||||
{
|
||||
const uint32_t* ir = ixs[r];
|
||||
|
||||
for (uint32_t c = 0; c < 4; ++c)
|
||||
{
|
||||
const uint32_t* ic = ixs[c];
|
||||
|
||||
cf(r, c) =
|
||||
t(ir[0], ic[0]) * t(ir[1], ic[1]) * t(ir[2], ic[2]) +
|
||||
t(ir[0], ic[1]) * t(ir[1], ic[2]) * t(ir[2], ic[0]) +
|
||||
t(ir[0], ic[2]) * t(ir[1], ic[0]) * t(ir[2], ic[1]) -
|
||||
t(ir[0], ic[2]) * t(ir[1], ic[1]) * t(ir[2], ic[0]) -
|
||||
t(ir[0], ic[1]) * t(ir[1], ic[0]) * t(ir[2], ic[2]) -
|
||||
t(ir[0], ic[0]) * t(ir[1], ic[2]) * t(ir[2], ic[1]);
|
||||
}
|
||||
|
||||
if (r > maxR and cf(r, 0) > cf(maxR, 0))
|
||||
maxR = r;
|
||||
}
|
||||
|
||||
// NOTE the negation of the y here, why? Maybe I swapped r/c above?
|
||||
Quaternion q(cf(maxR, 0), cf(maxR, 1), -cf(maxR, 2), cf(maxR, 3));
|
||||
q = Normalize(q);
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
Point Nudge(Point p, float offset)
|
||||
{
|
||||
static std::random_device rd;
|
||||
static std::mt19937_64 rng(rd());
|
||||
|
||||
std::uniform_real_distribution<> randomAngle(0, 2 * kPI);
|
||||
std::normal_distribution<> randomOffset(0, offset);
|
||||
|
||||
float theta = static_cast<float>(randomAngle(rng));
|
||||
float phi1 = static_cast<float>(randomAngle(rng) - kPI);
|
||||
float phi2 = static_cast<float>(randomAngle(rng) - kPI);
|
||||
|
||||
Quaternion q = boost::math::spherical(1.0f, theta, phi1, phi2);
|
||||
|
||||
Point r{ 0, 0, 1 };
|
||||
r.rotate(q);
|
||||
r *= static_cast<float>(randomOffset(rng));
|
||||
|
||||
return p + r;
|
||||
}
|
||||
|
||||
}
|
||||
1528
src/Secondary.cpp
1528
src/Secondary.cpp
File diff suppressed because it is too large
Load Diff
2580
src/Structure.cpp
2580
src/Structure.cpp
File diff suppressed because it is too large
Load Diff
1892
src/TlsParser.cpp
1892
src/TlsParser.cpp
File diff suppressed because it is too large
Load Diff
1154
src/atom_type.cpp
Normal file
1154
src/atom_type.cpp
Normal file
File diff suppressed because it is too large
Load Diff
2064
src/category.cpp
Normal file
2064
src/category.cpp
Normal file
File diff suppressed because it is too large
Load Diff
748
src/compound.cpp
Normal file
748
src/compound.cpp
Normal file
@@ -0,0 +1,748 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020-2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <numeric>
|
||||
#include <shared_mutex>
|
||||
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
|
||||
#include <cif++/compound.hpp>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::string to_string(bond_type bondType)
|
||||
{
|
||||
switch (bondType)
|
||||
{
|
||||
case bond_type::sing: return "sing";
|
||||
case bond_type::doub: return "doub";
|
||||
case bond_type::trip: return "trip";
|
||||
case bond_type::quad: return "quad";
|
||||
case bond_type::arom: return "arom";
|
||||
case bond_type::poly: return "poly";
|
||||
case bond_type::delo: return "delo";
|
||||
case bond_type::pi: return "pi";
|
||||
}
|
||||
throw std::invalid_argument("Invalid bondType");
|
||||
}
|
||||
|
||||
bond_type from_string(const std::string &bondType)
|
||||
{
|
||||
if (cif::iequals(bondType, "sing"))
|
||||
return bond_type::sing;
|
||||
if (cif::iequals(bondType, "doub"))
|
||||
return bond_type::doub;
|
||||
if (cif::iequals(bondType, "trip"))
|
||||
return bond_type::trip;
|
||||
if (cif::iequals(bondType, "quad"))
|
||||
return bond_type::quad;
|
||||
if (cif::iequals(bondType, "arom"))
|
||||
return bond_type::arom;
|
||||
if (cif::iequals(bondType, "poly"))
|
||||
return bond_type::poly;
|
||||
if (cif::iequals(bondType, "delo"))
|
||||
return bond_type::delo;
|
||||
if (cif::iequals(bondType, "pi"))
|
||||
return bond_type::pi;
|
||||
throw std::invalid_argument("Invalid bondType: " + bondType);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// compound helper classes
|
||||
|
||||
struct compound_atom_less
|
||||
{
|
||||
bool operator()(const compound_atom &a, const compound_atom &b) const
|
||||
{
|
||||
int d = a.id.compare(b.id);
|
||||
if (d == 0)
|
||||
d = a.type_symbol - b.type_symbol;
|
||||
return d < 0;
|
||||
}
|
||||
};
|
||||
|
||||
struct compound_bond_less
|
||||
{
|
||||
bool operator()(const compound_bond &a, const compound_bond &b) const
|
||||
{
|
||||
int d = a.atom_id[0].compare(b.atom_id[0]);
|
||||
if (d == 0)
|
||||
d = a.atom_id[1].compare(b.atom_id[1]);
|
||||
if (d == 0)
|
||||
d = static_cast<int>(a.type) - static_cast<int>(b.type);
|
||||
return d < 0;
|
||||
}
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// compound
|
||||
|
||||
compound::compound(cif::datablock &db)
|
||||
{
|
||||
auto &chemComp = db["chem_comp"];
|
||||
|
||||
if (chemComp.size() != 1)
|
||||
throw std::runtime_error("Invalid compound file, chem_comp should contain a single row");
|
||||
|
||||
cif::tie(m_id, m_name, m_type, m_formula, m_formula_weight, m_formal_charge) =
|
||||
chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge");
|
||||
|
||||
// The name should not contain newline characters since that triggers validation errors later on
|
||||
cif::replace_all(m_name, "\n", "");
|
||||
|
||||
m_group = "non-polymer";
|
||||
|
||||
auto &chemCompAtom = db["chem_comp_atom"];
|
||||
for (auto row : chemCompAtom)
|
||||
{
|
||||
compound_atom atom;
|
||||
std::string type_symbol;
|
||||
cif::tie(atom.id, type_symbol, atom.charge, atom.aromatic, atom.leaving_atom, atom.stereo_config, atom.x, atom.y, atom.z) =
|
||||
row.get("atom_id", "type_symbol", "charge", "pdbx_aromatic_flag", "pdbx_leaving_atom_flag", "pdbx_stereo_config",
|
||||
"model_Cartn_x", "model_Cartn_y", "model_Cartn_z");
|
||||
atom.type_symbol = atom_type_traits(type_symbol).type();
|
||||
m_atoms.push_back(std::move(atom));
|
||||
}
|
||||
|
||||
auto &chemCompBond = db["chem_comp_bond"];
|
||||
for (auto row : chemCompBond)
|
||||
{
|
||||
compound_bond bond;
|
||||
std::string valueOrder;
|
||||
cif::tie(bond.atom_id[0], bond.atom_id[1], valueOrder, bond.aromatic, bond.stereo_config) = row.get("atom_id_1", "atom_id_2", "value_order", "pdbx_aromatic_flag", "pdbx_stereo_config");
|
||||
bond.type = from_string(valueOrder);
|
||||
m_bonds.push_back(std::move(bond));
|
||||
}
|
||||
}
|
||||
|
||||
compound::compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group)
|
||||
: m_id(id)
|
||||
, m_name(name)
|
||||
, m_type(type)
|
||||
, m_group(group)
|
||||
{
|
||||
auto &chemCompAtom = db["chem_comp_atom"];
|
||||
for (auto row : chemCompAtom)
|
||||
{
|
||||
compound_atom atom;
|
||||
std::string type_symbol;
|
||||
cif::tie(atom.id, type_symbol, atom.charge, atom.x, atom.y, atom.z) =
|
||||
row.get("atom_id", "type_symbol", "charge", "x", "y", "z");
|
||||
atom.type_symbol = atom_type_traits(type_symbol).type();
|
||||
|
||||
m_formal_charge += atom.charge;
|
||||
m_formula_weight += atom_type_traits(atom.type_symbol).weight();
|
||||
|
||||
m_atoms.push_back(std::move(atom));
|
||||
}
|
||||
|
||||
auto &chemCompBond = db["chem_comp_bond"];
|
||||
for (auto row : chemCompBond)
|
||||
{
|
||||
compound_bond bond;
|
||||
std::string btype;
|
||||
cif::tie(bond.atom_id[0], bond.atom_id[1], btype, bond.aromatic) = row.get("atom_id_1", "atom_id_2", "type", "aromatic");
|
||||
|
||||
using cif::iequals;
|
||||
|
||||
if (iequals(btype, "single"))
|
||||
bond.type = bond_type::sing;
|
||||
else if (iequals(btype, "double"))
|
||||
bond.type = bond_type::doub;
|
||||
else if (iequals(btype, "triple"))
|
||||
bond.type = bond_type::trip;
|
||||
else if (iequals(btype, "deloc") or iequals(btype, "aromat") or iequals(btype, "aromatic"))
|
||||
bond.type = bond_type::delo;
|
||||
else
|
||||
{
|
||||
if (cif::VERBOSE > 0)
|
||||
std::cerr << "Unimplemented chem_comp_bond.type " << btype << " in " << id << std::endl;
|
||||
bond.type = bond_type::sing;
|
||||
}
|
||||
m_bonds.push_back(std::move(bond));
|
||||
}
|
||||
}
|
||||
|
||||
compound_atom compound::get_atom_by_atom_id(const std::string &atom_id) const
|
||||
{
|
||||
compound_atom result = {};
|
||||
for (auto &a : m_atoms)
|
||||
{
|
||||
if (a.id == atom_id)
|
||||
{
|
||||
result = a;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (result.id != atom_id)
|
||||
throw std::out_of_range("No atom " + atom_id + " in compound " + m_id);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool compound::atoms_bonded(const std::string &atomId_1, const std::string &atomId_2) const
|
||||
{
|
||||
auto i = find_if(m_bonds.begin(), m_bonds.end(),
|
||||
[&](const compound_bond &b) {
|
||||
return (b.atom_id[0] == atomId_1 and b.atom_id[1] == atomId_2) or (b.atom_id[0] == atomId_2 and b.atom_id[1] == atomId_1);
|
||||
});
|
||||
|
||||
return i != m_bonds.end();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// known amino acids and bases
|
||||
|
||||
const std::map<std::string, char> compound_factory::kAAMap{
|
||||
{"ALA", 'A'},
|
||||
{"ARG", 'R'},
|
||||
{"ASN", 'N'},
|
||||
{"ASP", 'D'},
|
||||
{"CYS", 'C'},
|
||||
{"GLN", 'Q'},
|
||||
{"GLU", 'E'},
|
||||
{"GLY", 'G'},
|
||||
{"HIS", 'H'},
|
||||
{"ILE", 'I'},
|
||||
{"LEU", 'L'},
|
||||
{"LYS", 'K'},
|
||||
{"MET", 'M'},
|
||||
{"PHE", 'F'},
|
||||
{"PRO", 'P'},
|
||||
{"SER", 'S'},
|
||||
{"THR", 'T'},
|
||||
{"TRP", 'W'},
|
||||
{"TYR", 'Y'},
|
||||
{"VAL", 'V'},
|
||||
{"GLX", 'Z'},
|
||||
{"ASX", 'B'}};
|
||||
|
||||
const std::map<std::string, char> compound_factory::kBaseMap{
|
||||
{"A", 'A'},
|
||||
{"C", 'C'},
|
||||
{"G", 'G'},
|
||||
{"T", 'T'},
|
||||
{"U", 'U'},
|
||||
{"DA", 'A'},
|
||||
{"DC", 'C'},
|
||||
{"DG", 'G'},
|
||||
{"DT", 'T'}};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// a factory class to generate compounds
|
||||
|
||||
class compound_factory_impl : public std::enable_shared_from_this<compound_factory_impl>
|
||||
{
|
||||
public:
|
||||
compound_factory_impl(std::shared_ptr<compound_factory_impl> next);
|
||||
|
||||
compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next);
|
||||
|
||||
virtual ~compound_factory_impl()
|
||||
{
|
||||
for (auto c: m_compounds)
|
||||
delete c;
|
||||
}
|
||||
|
||||
compound *get(std::string id)
|
||||
{
|
||||
std::shared_lock lock(mMutex);
|
||||
|
||||
cif::to_upper(id);
|
||||
|
||||
compound *result = nullptr;
|
||||
|
||||
// walk the list, see if any of us has the compound already
|
||||
for (auto impl = shared_from_this(); impl; impl = impl->m_next)
|
||||
{
|
||||
for (auto cmp : impl->m_compounds)
|
||||
{
|
||||
if (cmp->id() == id)
|
||||
{
|
||||
result = cmp;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (result)
|
||||
break;
|
||||
}
|
||||
|
||||
if (result == nullptr and m_missing.count(id) == 0)
|
||||
{
|
||||
for (auto impl = shared_from_this(); impl; impl = impl->m_next)
|
||||
{
|
||||
result = impl->create(id);
|
||||
if (result != nullptr)
|
||||
break;
|
||||
}
|
||||
|
||||
if (result == nullptr)
|
||||
m_missing.insert(id);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::shared_ptr<compound_factory_impl> next() const
|
||||
{
|
||||
return m_next;
|
||||
}
|
||||
|
||||
bool is_known_peptide(const std::string &resName)
|
||||
{
|
||||
return m_known_peptides.count(resName) or
|
||||
(m_next and m_next->is_known_peptide(resName));
|
||||
}
|
||||
|
||||
bool is_known_base(const std::string &resName)
|
||||
{
|
||||
return m_known_bases.count(resName) or
|
||||
(m_next and m_next->is_known_base(resName));
|
||||
}
|
||||
|
||||
protected:
|
||||
|
||||
virtual compound *create(const std::string &id)
|
||||
{
|
||||
// For the base class we assume every compound is preloaded
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::shared_timed_mutex mMutex;
|
||||
|
||||
std::vector<compound *> m_compounds;
|
||||
std::set<std::string> m_known_peptides;
|
||||
std::set<std::string> m_known_bases;
|
||||
std::set<std::string> m_missing;
|
||||
std::shared_ptr<compound_factory_impl> m_next;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
compound_factory_impl::compound_factory_impl(std::shared_ptr<compound_factory_impl> next)
|
||||
: m_next(next)
|
||||
{
|
||||
for (const auto &[key, value] : compound_factory::kAAMap)
|
||||
m_known_peptides.insert(key);
|
||||
|
||||
for (const auto &[key, value] : compound_factory::kBaseMap)
|
||||
m_known_bases.insert(key);
|
||||
}
|
||||
|
||||
compound_factory_impl::compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next)
|
||||
: m_next(next)
|
||||
{
|
||||
cif::file cifFile(file);
|
||||
|
||||
if (cifFile.contains("comp_list")) // So this is a CCP4 restraints file, special handling
|
||||
{
|
||||
auto &compList = cifFile["comp_list"];
|
||||
auto &chemComp = compList["chem_comp"];
|
||||
|
||||
for (const auto &[id, name, group] : chemComp.rows<std::string, std::string, std::string>("id", "name", "group"))
|
||||
{
|
||||
std::string type;
|
||||
|
||||
// known groups are (counted from ccp4 monomer dictionary)
|
||||
|
||||
// D-pyranose
|
||||
// DNA
|
||||
// L-PEPTIDE LINKING
|
||||
// L-SACCHARIDE
|
||||
// L-peptide
|
||||
// L-pyranose
|
||||
// M-peptide
|
||||
// NON-POLYMER
|
||||
// P-peptide
|
||||
// RNA
|
||||
// furanose
|
||||
// non-polymer
|
||||
// non_polymer
|
||||
// peptide
|
||||
// pyranose
|
||||
// saccharide
|
||||
|
||||
if (cif::iequals(id, "gly"))
|
||||
type = "peptide linking";
|
||||
else if (cif::iequals(group, "l-peptide") or cif::iequals(group, "L-peptide linking") or cif::iequals(group, "peptide") or cif::iequals(group, "p-peptide"))
|
||||
type = "L-peptide linking";
|
||||
else if (cif::iequals(group, "DNA"))
|
||||
type = "DNA linking";
|
||||
else if (cif::iequals(group, "RNA"))
|
||||
type = "RNA linking";
|
||||
else
|
||||
type = "non-polymer";
|
||||
|
||||
auto &db = cifFile["comp_" + id];
|
||||
|
||||
m_compounds.push_back(new compound(db, id, name, type, group));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// A CCD components file, validate it first
|
||||
cifFile.load_dictionary("mmcif_pdbx");
|
||||
|
||||
if (not cifFile.is_valid())
|
||||
throw std::runtime_error("Invalid compound file");
|
||||
|
||||
for (auto &db : cifFile)
|
||||
m_compounds.push_back(new compound(db));
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Version for the default compounds, based on the cached components.cif file from CCD
|
||||
|
||||
class CCD_compound_factory_impl : public compound_factory_impl
|
||||
{
|
||||
public:
|
||||
CCD_compound_factory_impl(std::shared_ptr<compound_factory_impl> next, const fs::path& file)
|
||||
: compound_factory_impl(next)
|
||||
, mCompoundsFile(file)
|
||||
{
|
||||
}
|
||||
|
||||
CCD_compound_factory_impl(std::shared_ptr<compound_factory_impl> next)
|
||||
: compound_factory_impl(next)
|
||||
{
|
||||
}
|
||||
|
||||
compound *create(const std::string &id) override;
|
||||
|
||||
cif::parser::datablock_index mIndex;
|
||||
fs::path mCompoundsFile;
|
||||
};
|
||||
|
||||
compound *CCD_compound_factory_impl::create(const std::string &id)
|
||||
{
|
||||
compound *result = nullptr;
|
||||
|
||||
std::unique_ptr<std::istream> ccd;
|
||||
|
||||
if (mCompoundsFile.empty())
|
||||
{
|
||||
ccd = cif::load_resource("components.cif");
|
||||
if (not ccd)
|
||||
throw std::runtime_error("Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data.");
|
||||
}
|
||||
else
|
||||
ccd.reset(new std::ifstream(mCompoundsFile));
|
||||
|
||||
cif::file file;
|
||||
|
||||
if (mIndex.empty())
|
||||
{
|
||||
if (cif::VERBOSE > 1)
|
||||
{
|
||||
std::cout << "Creating component index "
|
||||
<< "...";
|
||||
std::cout.flush();
|
||||
}
|
||||
|
||||
cif::parser parser(*ccd, file);
|
||||
mIndex = parser.index_datablocks();
|
||||
|
||||
if (cif::VERBOSE > 1)
|
||||
std::cout << " done" << std::endl;
|
||||
|
||||
// reload the resource, perhaps this should be improved...
|
||||
if (mCompoundsFile.empty())
|
||||
{
|
||||
ccd = cif::load_resource("components.cif");
|
||||
if (not ccd)
|
||||
throw std::runtime_error("Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data.");
|
||||
}
|
||||
else
|
||||
ccd.reset(new std::ifstream(mCompoundsFile));
|
||||
}
|
||||
|
||||
if (cif::VERBOSE > 1)
|
||||
{
|
||||
std::cout << "Loading component " << id << "...";
|
||||
std::cout.flush();
|
||||
}
|
||||
|
||||
cif::parser parser(*ccd, file);
|
||||
parser.parse_single_datablock(id, mIndex);
|
||||
|
||||
if (cif::VERBOSE > 1)
|
||||
std::cout << " done" << std::endl;
|
||||
|
||||
if (not file.empty())
|
||||
{
|
||||
auto &db = file.front();
|
||||
if (db.name() == id)
|
||||
{
|
||||
result = new compound(db);
|
||||
|
||||
std::shared_lock lock(mMutex);
|
||||
m_compounds.push_back(result);
|
||||
}
|
||||
}
|
||||
|
||||
if (result == nullptr and cif::VERBOSE > 0)
|
||||
std::cerr << "Could not locate compound " << id << " in the CCD components file" << std::endl;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Version for the default compounds, based on the data found in CCP4's monomers lib
|
||||
|
||||
class CCP4_compound_factory_impl : public compound_factory_impl
|
||||
{
|
||||
public:
|
||||
CCP4_compound_factory_impl(const fs::path &clibd_mon, std::shared_ptr<compound_factory_impl> next = nullptr);
|
||||
|
||||
compound *create(const std::string &id) override;
|
||||
|
||||
private:
|
||||
cif::file m_file;
|
||||
fs::path m_CLIBD_MON;
|
||||
};
|
||||
|
||||
CCP4_compound_factory_impl::CCP4_compound_factory_impl(const fs::path &clibd_mon, std::shared_ptr<compound_factory_impl> next)
|
||||
: compound_factory_impl(next)
|
||||
, m_file((clibd_mon / "list" / "mon_lib_list.cif").string())
|
||||
, m_CLIBD_MON(clibd_mon)
|
||||
{
|
||||
const std::regex peptideRx("(?:[lmp]-)?peptide", std::regex::icase);
|
||||
|
||||
auto &chemComps = m_file["comp_list"]["chem_comp"];
|
||||
|
||||
for (const auto &[group, threeLetterCode] : chemComps.rows<std::string, std::string>("group", "three_letter_code"))
|
||||
{
|
||||
if (std::regex_match(group, peptideRx))
|
||||
m_known_peptides.insert(threeLetterCode);
|
||||
else if (cif::iequals(group, "DNA") or cif::iequals(group, "RNA"))
|
||||
m_known_bases.insert(threeLetterCode);
|
||||
}
|
||||
}
|
||||
|
||||
compound *CCP4_compound_factory_impl::create(const std::string &id)
|
||||
{
|
||||
compound *result = nullptr;
|
||||
|
||||
auto &cat = m_file["comp_list"]["chem_comp"];
|
||||
|
||||
auto rs = cat.find(cif::key("three_letter_code") == id);
|
||||
|
||||
if (rs.size() == 1)
|
||||
{
|
||||
auto row = rs.front();
|
||||
|
||||
std::string name, group;
|
||||
uint32_t numberAtomsAll, numberAtomsNh;
|
||||
cif::tie(name, group, numberAtomsAll, numberAtomsNh) =
|
||||
row.get("name", "group", "number_atoms_all", "number_atoms_nh");
|
||||
|
||||
fs::path resFile = m_CLIBD_MON / cif::to_lower_copy(id.substr(0, 1)) / (id + ".cif");
|
||||
|
||||
if (not fs::exists(resFile) and (id == "COM" or id == "CON" or "PRN")) // seriously...
|
||||
resFile = m_CLIBD_MON / cif::to_lower_copy(id.substr(0, 1)) / (id + '_' + id + ".cif");
|
||||
|
||||
if (fs::exists(resFile))
|
||||
{
|
||||
cif::file cf(resFile.string());
|
||||
|
||||
// locate the datablock
|
||||
auto &db = cf["comp_" + id];
|
||||
|
||||
std::string type;
|
||||
|
||||
// known groups are (counted from ccp4 monomer dictionary)
|
||||
|
||||
// D-pyranose
|
||||
// DNA
|
||||
// L-PEPTIDE LINKING
|
||||
// L-SACCHARIDE
|
||||
// L-peptide
|
||||
// L-pyranose
|
||||
// M-peptide
|
||||
// NON-POLYMER
|
||||
// P-peptide
|
||||
// RNA
|
||||
// furanose
|
||||
// non-polymer
|
||||
// non_polymer
|
||||
// peptide
|
||||
// pyranose
|
||||
// saccharide
|
||||
|
||||
if (cif::iequals(id, "gly"))
|
||||
type = "peptide linking";
|
||||
else if (cif::iequals(group, "l-peptide") or cif::iequals(group, "L-peptide linking") or cif::iequals(group, "peptide") or cif::iequals(group, "p-peptide"))
|
||||
type = "L-peptide linking";
|
||||
else if (cif::iequals(group, "DNA"))
|
||||
type = "DNA linking";
|
||||
else if (cif::iequals(group, "RNA"))
|
||||
type = "RNA linking";
|
||||
else
|
||||
type = "non-polymer";
|
||||
|
||||
m_compounds.push_back(new compound(db, id, name, type, group));
|
||||
result = m_compounds.back();
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::unique_ptr<compound_factory> compound_factory::s_instance;
|
||||
thread_local std::unique_ptr<compound_factory> compound_factory::tl_instance;
|
||||
bool compound_factory::s_use_thread_local_instance;
|
||||
|
||||
void compound_factory::init(bool useThreadLocalInstanceOnly)
|
||||
{
|
||||
s_use_thread_local_instance = useThreadLocalInstanceOnly;
|
||||
}
|
||||
|
||||
compound_factory::compound_factory()
|
||||
: m_impl(nullptr)
|
||||
{
|
||||
auto ccd = cif::load_resource("components.cif");
|
||||
if (ccd)
|
||||
m_impl.reset(new CCD_compound_factory_impl(m_impl));
|
||||
else if (cif::VERBOSE > 0)
|
||||
std::cerr << "CCD components.cif file was not found" << std::endl;
|
||||
|
||||
const char *clibd_mon = getenv("CLIBD_MON");
|
||||
if (clibd_mon != nullptr and fs::is_directory(clibd_mon))
|
||||
m_impl.reset(new CCP4_compound_factory_impl(clibd_mon));
|
||||
else if (cif::VERBOSE > 0)
|
||||
std::cerr << "CCP4 monomers library not found, CLIBD_MON is not defined" << std::endl;
|
||||
|
||||
}
|
||||
|
||||
compound_factory::~compound_factory()
|
||||
{
|
||||
}
|
||||
|
||||
compound_factory &compound_factory::instance()
|
||||
{
|
||||
if (s_use_thread_local_instance)
|
||||
{
|
||||
if (not tl_instance)
|
||||
tl_instance.reset(new compound_factory());
|
||||
return *tl_instance;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (not s_instance)
|
||||
s_instance.reset(new compound_factory());
|
||||
return *s_instance;
|
||||
}
|
||||
}
|
||||
|
||||
void compound_factory::clear()
|
||||
{
|
||||
if (s_use_thread_local_instance)
|
||||
tl_instance.reset(nullptr);
|
||||
else
|
||||
s_instance.reset();
|
||||
}
|
||||
|
||||
void compound_factory::set_default_dictionary(const fs::path &inDictFile)
|
||||
{
|
||||
if (not fs::exists(inDictFile))
|
||||
throw std::runtime_error("file not found: " + inDictFile.string());
|
||||
|
||||
try
|
||||
{
|
||||
m_impl.reset(new CCD_compound_factory_impl(m_impl, inDictFile));
|
||||
}
|
||||
catch (const std::exception &)
|
||||
{
|
||||
if (cif::VERBOSE >= 0)
|
||||
std::cerr << "Error loading dictionary " << inDictFile << std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void compound_factory::push_dictionary(const fs::path &inDictFile)
|
||||
{
|
||||
if (not fs::exists(inDictFile))
|
||||
throw std::runtime_error("file not found: " + inDictFile.string());
|
||||
|
||||
// ifstream file(inDictFile);
|
||||
// if (not file.is_open())
|
||||
// throw std::runtime_error("Could not open peptide list " + inDictFile);
|
||||
|
||||
try
|
||||
{
|
||||
m_impl.reset(new compound_factory_impl(inDictFile, m_impl));
|
||||
}
|
||||
catch (const std::exception &)
|
||||
{
|
||||
if (cif::VERBOSE >= 0)
|
||||
std::cerr << "Error loading dictionary " << inDictFile << std::endl;
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
void compound_factory::pop_dictionary()
|
||||
{
|
||||
if (m_impl)
|
||||
m_impl = m_impl->next();
|
||||
}
|
||||
|
||||
const compound *compound_factory::create(std::string id)
|
||||
{
|
||||
// static bool warned = false;
|
||||
|
||||
// if (m_impl and warned == false)
|
||||
// {
|
||||
// std::cerr << "Warning: no compound information library was found, resulting data may be incorrect or incomplete" << std::endl;
|
||||
// warned = true;
|
||||
// }
|
||||
|
||||
return m_impl ? m_impl->get(id) : nullptr;
|
||||
}
|
||||
|
||||
bool compound_factory::is_known_peptide(const std::string &resName) const
|
||||
{
|
||||
return m_impl ? m_impl->is_known_peptide(resName) : kAAMap.count(resName) > 0;
|
||||
}
|
||||
|
||||
bool compound_factory::is_known_base(const std::string &resName) const
|
||||
{
|
||||
return m_impl ? m_impl->is_known_base(resName) : kBaseMap.count(resName) > 0;
|
||||
}
|
||||
|
||||
} // namespace pdbx
|
||||
138
src/condition.cpp
Normal file
138
src/condition.cpp
Normal file
@@ -0,0 +1,138 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cif++/category.hpp>
|
||||
#include <cif++/condition.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
iset get_category_fields(const category &cat)
|
||||
{
|
||||
return cat.fields();
|
||||
}
|
||||
|
||||
uint16_t get_column_ix(const category &cat, std::string_view col)
|
||||
{
|
||||
return cat.get_column_ix(col);
|
||||
}
|
||||
|
||||
bool is_column_type_uchar(const category &cat, std::string_view col)
|
||||
{
|
||||
bool result = false;
|
||||
|
||||
auto cv = cat.get_cat_validator();
|
||||
if (cv)
|
||||
{
|
||||
auto iv = cv->get_validator_for_item(col);
|
||||
if (iv != nullptr and iv->m_type != nullptr)
|
||||
{
|
||||
auto type = iv->m_type;
|
||||
result = type->m_primitive_type == DDL_PrimitiveType::UChar;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
namespace detail
|
||||
{
|
||||
|
||||
condition_impl *key_equals_condition_impl::prepare(const category &c)
|
||||
{
|
||||
m_item_ix = get_column_ix(c, m_item_tag);
|
||||
m_icase = is_column_type_uchar(c, m_item_tag);
|
||||
|
||||
if (c.get_cat_validator() != nullptr and
|
||||
c.key_field_indices().contains(m_item_ix) and
|
||||
c.key_field_indices().size() == 1)
|
||||
{
|
||||
m_single_hit = c[{ { m_item_tag, m_value } }];
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
condition_impl *and_condition_impl::prepare(const category &c)
|
||||
{
|
||||
for (auto &sub : mSub)
|
||||
sub = sub->prepare(c);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
auto si = find_if(mSub.begin(), mSub.end(), [](condition_impl *sub) { return dynamic_cast<and_condition_impl *>(sub) != nullptr; });
|
||||
if (si == mSub.end())
|
||||
break;
|
||||
|
||||
and_condition_impl *sub_and = static_cast<and_condition_impl *>(*si);
|
||||
|
||||
mSub.erase(si);
|
||||
|
||||
mSub.insert(mSub.end(), sub_and->mSub.begin(), sub_and->mSub.end());
|
||||
sub_and->mSub.clear();
|
||||
delete sub_and;
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
condition_impl *or_condition_impl::prepare(const category &c)
|
||||
{
|
||||
condition_impl *result = this;
|
||||
|
||||
mA = mA->prepare(c);
|
||||
mB = mB->prepare(c);
|
||||
|
||||
key_equals_condition_impl *equals = dynamic_cast<key_equals_condition_impl*>(mA);
|
||||
key_is_empty_condition_impl *empty = dynamic_cast<key_is_empty_condition_impl*>(mB);
|
||||
|
||||
if (equals == nullptr and empty == nullptr)
|
||||
{
|
||||
equals = dynamic_cast<key_equals_condition_impl*>(mB);
|
||||
empty = dynamic_cast<key_is_empty_condition_impl*>(mA);
|
||||
}
|
||||
|
||||
if (equals != nullptr and empty != nullptr)
|
||||
{
|
||||
result = new detail::key_equals_or_empty_condition_impl(equals, empty);
|
||||
result = result->prepare(c);
|
||||
delete this;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace detail
|
||||
|
||||
void condition::prepare(const category &c)
|
||||
{
|
||||
if (m_impl)
|
||||
m_impl = m_impl->prepare(c);
|
||||
|
||||
m_prepared = true;
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
319
src/datablock.cpp
Normal file
319
src/datablock.cpp
Normal file
@@ -0,0 +1,319 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cif++/datablock.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
void datablock::set_validator(const validator *v)
|
||||
{
|
||||
m_validator = v;
|
||||
|
||||
for (auto &cat : *this)
|
||||
cat.set_validator(v, *this);
|
||||
}
|
||||
|
||||
const validator *datablock::get_validator() const
|
||||
{
|
||||
return m_validator;
|
||||
}
|
||||
|
||||
bool datablock::is_valid() const
|
||||
{
|
||||
if (m_validator == nullptr)
|
||||
throw std::runtime_error("Validator not specified");
|
||||
|
||||
bool result = true;
|
||||
for (auto &cat : *this)
|
||||
result = cat.is_valid() and result;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool datablock::validate_links() const
|
||||
{
|
||||
bool result = true;
|
||||
|
||||
for (auto &cat : *this)
|
||||
result = cat.validate_links() and result;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
category &datablock::operator[](std::string_view name)
|
||||
{
|
||||
auto i = std::find_if(begin(), end(), [name](const category &c)
|
||||
{ return iequals(c.name(), name); });
|
||||
|
||||
if (i != end())
|
||||
return *i;
|
||||
|
||||
auto &cat = emplace_back(name);
|
||||
|
||||
if (m_validator)
|
||||
cat.set_validator(m_validator, *this);
|
||||
|
||||
return back();
|
||||
}
|
||||
|
||||
const category &datablock::operator[](std::string_view name) const
|
||||
{
|
||||
static const category s_empty;
|
||||
auto i = std::find_if(begin(), end(), [name](const category &c)
|
||||
{ return iequals(c.name(), name); });
|
||||
return i == end() ? s_empty : *i;
|
||||
}
|
||||
|
||||
category *datablock::get(std::string_view name)
|
||||
{
|
||||
auto i = std::find_if(begin(), end(), [name](const category &c)
|
||||
{ return iequals(c.name(), name); });
|
||||
return i == end() ? nullptr : &*i;
|
||||
}
|
||||
|
||||
const category *datablock::get(std::string_view name) const
|
||||
{
|
||||
return const_cast<datablock *>(this)->get(name);
|
||||
}
|
||||
|
||||
std::tuple<datablock::iterator, bool> datablock::emplace(std::string_view name)
|
||||
{
|
||||
bool is_new = true;
|
||||
|
||||
auto i = begin();
|
||||
while (i != end())
|
||||
{
|
||||
if (iequals(name, i->name()))
|
||||
{
|
||||
is_new = false;
|
||||
|
||||
if (i != begin())
|
||||
{
|
||||
auto n = std::next(i);
|
||||
splice(begin(), *this, i, n);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
++i;
|
||||
}
|
||||
|
||||
if (is_new)
|
||||
{
|
||||
auto &c = emplace_front(name);
|
||||
c.set_validator(m_validator, *this);
|
||||
}
|
||||
|
||||
return std::make_tuple(begin(), is_new);
|
||||
}
|
||||
|
||||
std::vector<std::string> datablock::get_tag_order() const
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
|
||||
// for entry and audit_conform on top
|
||||
|
||||
auto ci = find_if(begin(), end(), [](const category &cat) { return cat.name() == "entry"; });
|
||||
if (ci != end())
|
||||
{
|
||||
auto cto = ci->get_tag_order();
|
||||
result.insert(result.end(), cto.begin(), cto.end());
|
||||
}
|
||||
|
||||
ci = find_if(begin(), end(), [](const category &cat) { return cat.name() == "audit_conform"; });
|
||||
if (ci != end())
|
||||
{
|
||||
auto cto = ci->get_tag_order();
|
||||
result.insert(result.end(), cto.begin(), cto.end());
|
||||
}
|
||||
|
||||
for (auto &cat : *this)
|
||||
{
|
||||
if (cat.name() == "entry" or cat.name() == "audit_conform")
|
||||
continue;
|
||||
auto cto = cat.get_tag_order();
|
||||
result.insert(result.end(), cto.begin(), cto.end());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void datablock::write(std::ostream &os) const
|
||||
{
|
||||
os << "data_" << m_name << std::endl
|
||||
<< "# " << std::endl;
|
||||
|
||||
// mmcif support, sort of. First write the 'entry' Category
|
||||
// and if it exists, _AND_ we have a Validator, write out the
|
||||
// audit_conform record.
|
||||
|
||||
for (auto &cat : *this)
|
||||
{
|
||||
if (cat.name() != "entry")
|
||||
continue;
|
||||
|
||||
cat.write(os);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// If the dictionary declares an audit_conform category, put it in,
|
||||
// but only if it does not exist already!
|
||||
if (get("audit_conform"))
|
||||
get("audit_conform")->write(os);
|
||||
else if (m_validator != nullptr and m_validator->get_validator_for_category("audit_conform") != nullptr)
|
||||
{
|
||||
category auditConform("audit_conform");
|
||||
auditConform.emplace({
|
||||
{"dict_name", m_validator->name()},
|
||||
{"dict_version", m_validator->version()}});
|
||||
auditConform.write(os);
|
||||
}
|
||||
|
||||
for (auto &cat : *this)
|
||||
{
|
||||
if (cat.name() != "entry" and cat.name() != "audit_conform")
|
||||
cat.write(os);
|
||||
}
|
||||
}
|
||||
|
||||
void datablock::write(std::ostream &os, const std::vector<std::string> &tag_order)
|
||||
{
|
||||
os << "data_" << m_name << std::endl
|
||||
<< "# " << std::endl;
|
||||
|
||||
std::vector<std::string> cat_order;
|
||||
for (auto &o : tag_order)
|
||||
{
|
||||
std::string cat_name, item_name;
|
||||
std::tie(cat_name, item_name) = split_tag_name(o);
|
||||
if (find_if(cat_order.rbegin(), cat_order.rend(), [cat_name](const std::string &s) -> bool
|
||||
{ return iequals(cat_name, s); }) == cat_order.rend())
|
||||
cat_order.push_back(cat_name);
|
||||
}
|
||||
|
||||
for (auto &c : cat_order)
|
||||
{
|
||||
auto cat = get(c);
|
||||
if (cat == nullptr)
|
||||
continue;
|
||||
|
||||
std::vector<std::string> items;
|
||||
for (auto &o : tag_order)
|
||||
{
|
||||
std::string cat_name, item_name;
|
||||
std::tie(cat_name, item_name) = split_tag_name(o);
|
||||
|
||||
if (cat_name == c)
|
||||
items.push_back(item_name);
|
||||
}
|
||||
|
||||
cat->write(os, items);
|
||||
}
|
||||
|
||||
// for any Category we missed in the catOrder
|
||||
for (auto &cat : *this)
|
||||
{
|
||||
if (find_if(cat_order.begin(), cat_order.end(), [&](const std::string &s) -> bool
|
||||
{ return iequals(cat.name(), s); }) != cat_order.end())
|
||||
continue;
|
||||
|
||||
cat.write(os);
|
||||
}
|
||||
}
|
||||
|
||||
bool datablock::operator==(const datablock &rhs) const
|
||||
{
|
||||
auto &dbA = *this;
|
||||
auto &dbB = rhs;
|
||||
|
||||
std::vector<std::string> catA, catB;
|
||||
|
||||
for (auto &cat : dbA)
|
||||
{
|
||||
if (not cat.empty())
|
||||
catA.push_back(cat.name());
|
||||
}
|
||||
std::sort(catA.begin(), catA.end());
|
||||
|
||||
for (auto &cat : dbB)
|
||||
{
|
||||
if (not cat.empty())
|
||||
catB.push_back(cat.name());
|
||||
}
|
||||
std::sort(catB.begin(), catB.end());
|
||||
|
||||
// loop over categories twice, to group output
|
||||
// First iteration is to list missing categories.
|
||||
|
||||
std::vector<std::string> missingA, missingB;
|
||||
|
||||
auto catA_i = catA.begin(), catB_i = catB.begin();
|
||||
|
||||
while (catA_i != catA.end() and catB_i != catB.end())
|
||||
{
|
||||
if (not iequals(*catA_i, *catB_i))
|
||||
return false;
|
||||
|
||||
++catA_i, ++catB_i;
|
||||
}
|
||||
|
||||
if (catA_i != catA.end() or catB_i != catB.end())
|
||||
return false;
|
||||
|
||||
// Second loop, now compare category values
|
||||
catA_i = catA.begin(), catB_i = catB.begin();
|
||||
|
||||
while (catA_i != catA.end() and catB_i != catB.end())
|
||||
{
|
||||
std::string nA = *catA_i;
|
||||
to_lower(nA);
|
||||
|
||||
std::string nB = *catB_i;
|
||||
to_lower(nB);
|
||||
|
||||
int d = nA.compare(nB);
|
||||
if (d > 0)
|
||||
++catB_i;
|
||||
else if (d < 0)
|
||||
++catA_i;
|
||||
else
|
||||
{
|
||||
if (not (*dbA.get(*catA_i) == *dbB.get(*catB_i)))
|
||||
return false;
|
||||
++catA_i;
|
||||
++catB_i;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace cif::cif
|
||||
490
src/dictionary_parser.cpp
Normal file
490
src/dictionary_parser.cpp
Normal file
@@ -0,0 +1,490 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cif++/condition.hpp>
|
||||
#include <cif++/dictionary_parser.hpp>
|
||||
#include <cif++/file.hpp>
|
||||
#include <cif++/parser.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
using namespace literals;
|
||||
|
||||
inline void replace_all(std::string &s, std::string_view pat, std::string_view rep)
|
||||
{
|
||||
for (std::string::size_type i = s.find(pat); i != std::string::npos; i = s.find(pat, i))
|
||||
s.replace(i, pat.size(), rep.data(), rep.size());
|
||||
}
|
||||
|
||||
class dictionary_parser : public parser
|
||||
{
|
||||
public:
|
||||
dictionary_parser(validator &validator, std::istream &is, file &f)
|
||||
: parser(is, f)
|
||||
, m_validator(validator)
|
||||
{
|
||||
}
|
||||
|
||||
void load_dictionary()
|
||||
{
|
||||
std::unique_ptr<datablock> dict;
|
||||
auto savedDatablock = m_datablock;
|
||||
|
||||
try
|
||||
{
|
||||
while (m_lookahead != CIFToken::Eof)
|
||||
{
|
||||
switch (m_lookahead)
|
||||
{
|
||||
case CIFToken::GLOBAL:
|
||||
parse_global();
|
||||
break;
|
||||
|
||||
default:
|
||||
{
|
||||
dict.reset(new datablock(m_token_value)); // dummy datablock, for constructing the validator only
|
||||
m_datablock = dict.get();
|
||||
|
||||
match(CIFToken::DATA);
|
||||
parse_datablock();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (const std::exception &ex)
|
||||
{
|
||||
error(ex.what());
|
||||
}
|
||||
|
||||
// store all validators
|
||||
for (auto &ic : mCategoryValidators)
|
||||
m_validator.add_category_validator(std::move(ic));
|
||||
mCategoryValidators.clear();
|
||||
|
||||
for (auto &iv : mItemValidators)
|
||||
{
|
||||
auto cv = m_validator.get_validator_for_category(iv.first);
|
||||
if (cv == nullptr)
|
||||
error("Undefined category '" + iv.first);
|
||||
|
||||
for (auto &v : iv.second)
|
||||
const_cast<category_validator *>(cv)->addItemValidator(std::move(v));
|
||||
}
|
||||
|
||||
// check all item validators for having a typeValidator
|
||||
|
||||
if (dict)
|
||||
link_items();
|
||||
|
||||
// store meta information
|
||||
datablock::iterator info;
|
||||
bool is_new;
|
||||
std::tie(info, is_new) = m_datablock->emplace("dictionary");
|
||||
if (not is_new and not info->empty())
|
||||
{
|
||||
auto r = info->front();
|
||||
m_validator.set_name(r["title"].as<std::string>());
|
||||
m_validator.version(r["version"].as<std::string>());
|
||||
}
|
||||
|
||||
m_datablock = savedDatablock;
|
||||
|
||||
mItemValidators.clear();
|
||||
}
|
||||
|
||||
private:
|
||||
void parse_save_frame() override
|
||||
{
|
||||
if (not m_collected_item_types)
|
||||
m_collected_item_types = collect_item_types();
|
||||
|
||||
std::string saveFrameName = m_token_value;
|
||||
|
||||
if (saveFrameName.empty())
|
||||
error("Invalid save frame, should contain more than just 'save_' here");
|
||||
|
||||
bool isCategorySaveFrame = m_token_value[0] != '_';
|
||||
|
||||
datablock dict(m_token_value);
|
||||
datablock::iterator cat = dict.end();
|
||||
|
||||
match(CIFToken::SAVE);
|
||||
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag)
|
||||
{
|
||||
if (m_lookahead == CIFToken::LOOP)
|
||||
{
|
||||
cat = dict.end(); // should start a new category
|
||||
|
||||
match(CIFToken::LOOP);
|
||||
|
||||
std::vector<std::string> tags;
|
||||
while (m_lookahead == CIFToken::Tag)
|
||||
{
|
||||
std::string catName, item_name;
|
||||
std::tie(catName, item_name) = split_tag_name(m_token_value);
|
||||
|
||||
if (cat == dict.end())
|
||||
std::tie(cat, std::ignore) = dict.emplace(catName);
|
||||
else if (not iequals(cat->name(), catName))
|
||||
error("inconsistent categories in loop_");
|
||||
|
||||
tags.push_back(item_name);
|
||||
match(CIFToken::Tag);
|
||||
}
|
||||
|
||||
while (m_lookahead == CIFToken::Value)
|
||||
{
|
||||
cat->emplace({});
|
||||
auto row = cat->back();
|
||||
|
||||
for (auto tag : tags)
|
||||
{
|
||||
row[tag] = m_token_value;
|
||||
match(CIFToken::Value);
|
||||
}
|
||||
}
|
||||
|
||||
cat = dict.end();
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string catName, item_name;
|
||||
std::tie(catName, item_name) = split_tag_name(m_token_value);
|
||||
|
||||
if (cat == dict.end() or not iequals(cat->name(), catName))
|
||||
std::tie(cat, std::ignore) = dict.emplace(catName);
|
||||
|
||||
match(CIFToken::Tag);
|
||||
|
||||
if (cat->empty())
|
||||
cat->emplace({});
|
||||
cat->back()[item_name] = m_token_value;
|
||||
|
||||
match(CIFToken::Value);
|
||||
}
|
||||
}
|
||||
|
||||
match(CIFToken::SAVE);
|
||||
|
||||
if (isCategorySaveFrame)
|
||||
{
|
||||
std::string category = dict["category"].front().get<std::string>("id");
|
||||
|
||||
std::vector<std::string> keys;
|
||||
for (auto k : dict["category_key"])
|
||||
keys.push_back(std::get<1>(split_tag_name(k["name"].as<std::string>())));
|
||||
|
||||
iset groups;
|
||||
for (auto g : dict["category_group"])
|
||||
groups.insert(g["id"].as<std::string>());
|
||||
|
||||
mCategoryValidators.push_back(category_validator{ category, keys, groups });
|
||||
}
|
||||
else
|
||||
{
|
||||
// if the type code is missing, this must be a pointer, just skip it
|
||||
std::string typeCode = dict["item_type"].front().get<std::string>("code");
|
||||
|
||||
const type_validator *tv = nullptr;
|
||||
if (not(typeCode.empty() or typeCode == "?"))
|
||||
tv = m_validator.get_validator_for_type(typeCode);
|
||||
|
||||
iset ess;
|
||||
for (auto e : dict["item_enumeration"])
|
||||
ess.insert(e["value"].as<std::string>());
|
||||
|
||||
std::string defaultValue = dict["item_default"].front().get<std::string>("value");
|
||||
bool defaultIsNull = false;
|
||||
if (defaultValue.empty())
|
||||
{
|
||||
// TODO: Is this correct???
|
||||
for (auto r : dict["_item_default"])
|
||||
{
|
||||
defaultIsNull = r["value"].is_null();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// collect the dict from our dataBlock and construct validators
|
||||
for (auto i : dict["item"])
|
||||
{
|
||||
std::string tagName, category, mandatory;
|
||||
cif::tie(tagName, category, mandatory) = i.get("name", "category_id", "mandatory_code");
|
||||
|
||||
std::string cat_name, item_name;
|
||||
std::tie(cat_name, item_name) = split_tag_name(tagName);
|
||||
|
||||
if (cat_name.empty() or item_name.empty())
|
||||
error("Invalid tag name in _item.name " + tagName);
|
||||
|
||||
if (not iequals(category, cat_name) and not(category.empty() or category == "?"))
|
||||
error("specified category id does match the implicit category name for tag '" + tagName + '\'');
|
||||
else
|
||||
category = cat_name;
|
||||
|
||||
auto &ivs = mItemValidators[category];
|
||||
|
||||
auto vi = find(ivs.begin(), ivs.end(), item_validator{ item_name });
|
||||
if (vi == ivs.end())
|
||||
ivs.push_back(item_validator{ item_name, iequals(mandatory, "yes"), tv, ess, defaultValue, defaultIsNull });
|
||||
else
|
||||
{
|
||||
// need to update the itemValidator?
|
||||
if (vi->m_mandatory != (iequals(mandatory, "yes")))
|
||||
{
|
||||
if (VERBOSE > 2)
|
||||
{
|
||||
std::cerr << "inconsistent mandatory value for " << tagName << " in dictionary" << std::endl;
|
||||
|
||||
if (iequals(tagName, saveFrameName))
|
||||
std::cerr << "choosing " << mandatory << std::endl;
|
||||
else
|
||||
std::cerr << "choosing " << (vi->m_mandatory ? "Y" : "N") << std::endl;
|
||||
}
|
||||
|
||||
if (iequals(tagName, saveFrameName))
|
||||
vi->m_mandatory = (iequals(mandatory, "yes"));
|
||||
}
|
||||
|
||||
if (vi->m_type != nullptr and tv != nullptr and vi->m_type != tv)
|
||||
{
|
||||
if (VERBOSE > 1)
|
||||
std::cerr << "inconsistent type for " << tagName << " in dictionary" << std::endl;
|
||||
}
|
||||
|
||||
// vi->mMandatory = (iequals(mandatory, "yes"));
|
||||
if (vi->m_type == nullptr)
|
||||
vi->m_type = tv;
|
||||
|
||||
vi->m_enums.insert(ess.begin(), ess.end());
|
||||
|
||||
// anything else yet?
|
||||
// ...
|
||||
}
|
||||
}
|
||||
|
||||
// collect the dict from our dataBlock and construct validators
|
||||
for (auto i : dict["item_linked"])
|
||||
{
|
||||
mLinkedItems.emplace(i.get<std::string,std::string>("child_name", "parent_name"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void link_items()
|
||||
{
|
||||
if (not m_datablock)
|
||||
error("no datablock");
|
||||
|
||||
auto &dict = *m_datablock;
|
||||
|
||||
// links are identified by a parent category, a child category and a group ID
|
||||
|
||||
using key_type = std::tuple<std::string, std::string, int>;
|
||||
|
||||
std::map<key_type, size_t> linkIndex;
|
||||
|
||||
// Each link group consists of a set of keys
|
||||
std::vector<std::tuple<std::vector<std::string>, std::vector<std::string>>> linkKeys;
|
||||
|
||||
auto addLink = [&](size_t ix, const std::string &pk, const std::string &ck)
|
||||
{
|
||||
auto &&[pkeys, ckeys] = linkKeys.at(ix);
|
||||
|
||||
bool found = false;
|
||||
for (size_t i = 0; i < pkeys.size(); ++i)
|
||||
{
|
||||
if (pkeys[i] == pk and ckeys[i] == ck)
|
||||
{
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (not found)
|
||||
{
|
||||
pkeys.push_back(pk);
|
||||
ckeys.push_back(ck);
|
||||
}
|
||||
};
|
||||
|
||||
auto &linkedGroupList = dict["pdbx_item_linked_group_list"];
|
||||
|
||||
for (auto gl : linkedGroupList)
|
||||
{
|
||||
std::string child, parent;
|
||||
int link_group_id;
|
||||
cif::tie(child, parent, link_group_id) = gl.get("child_name", "parent_name", "link_group_id");
|
||||
|
||||
auto civ = m_validator.get_validator_for_item(child);
|
||||
if (civ == nullptr)
|
||||
error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
|
||||
|
||||
auto piv = m_validator.get_validator_for_item(parent);
|
||||
if (piv == nullptr)
|
||||
error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
|
||||
|
||||
key_type key{ piv->m_category->m_name, civ->m_category->m_name, link_group_id };
|
||||
if (not linkIndex.count(key))
|
||||
{
|
||||
linkIndex[key] = linkKeys.size();
|
||||
linkKeys.push_back({});
|
||||
}
|
||||
|
||||
size_t ix = linkIndex.at(key);
|
||||
addLink(ix, piv->m_tag, civ->m_tag);
|
||||
}
|
||||
|
||||
// Only process inline linked items if the linked group list is absent
|
||||
if (linkedGroupList.empty())
|
||||
{
|
||||
// for links recorded in categories but not in pdbx_item_linked_group_list
|
||||
for (auto li : mLinkedItems)
|
||||
{
|
||||
std::string child, parent;
|
||||
std::tie(child, parent) = li;
|
||||
|
||||
auto civ = m_validator.get_validator_for_item(child);
|
||||
if (civ == nullptr)
|
||||
error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
|
||||
|
||||
auto piv = m_validator.get_validator_for_item(parent);
|
||||
if (piv == nullptr)
|
||||
error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
|
||||
|
||||
key_type key{ piv->m_category->m_name, civ->m_category->m_name, 0 };
|
||||
if (not linkIndex.count(key))
|
||||
{
|
||||
linkIndex[key] = linkKeys.size();
|
||||
linkKeys.push_back({});
|
||||
}
|
||||
|
||||
size_t ix = linkIndex.at(key);
|
||||
addLink(ix, piv->m_tag, civ->m_tag);
|
||||
}
|
||||
}
|
||||
|
||||
auto &linkedGroup = dict["pdbx_item_linked_group"];
|
||||
|
||||
// now store the links in the validator
|
||||
for (auto &kv : linkIndex)
|
||||
{
|
||||
link_validator link = {};
|
||||
std::tie(link.m_parent_category, link.m_child_category, link.m_link_group_id) = kv.first;
|
||||
|
||||
std::tie(link.m_parent_keys, link.m_child_keys) = linkKeys[kv.second];
|
||||
|
||||
// look up the label
|
||||
for (auto r : linkedGroup.find("category_id"_key == link.m_child_category and "link_group_id"_key == link.m_link_group_id))
|
||||
{
|
||||
link.m_link_group_label = r["label"].as<std::string>();
|
||||
break;
|
||||
}
|
||||
|
||||
m_validator.add_link_validator(std::move(link));
|
||||
}
|
||||
|
||||
// now make sure the itemType is specified for all itemValidators
|
||||
|
||||
for (auto &cv : m_validator.m_category_validators)
|
||||
{
|
||||
for (auto &iv : cv.m_item_validators)
|
||||
{
|
||||
if (iv.m_type == nullptr and cif::VERBOSE >= 0)
|
||||
std::cerr << "Missing item_type for " << iv.m_tag << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool collect_item_types()
|
||||
{
|
||||
bool result = false;
|
||||
|
||||
if (not m_datablock)
|
||||
error("no datablock");
|
||||
|
||||
auto &dict = *m_datablock;
|
||||
|
||||
for (auto t : dict["item_type_list"])
|
||||
{
|
||||
std::string code, primitiveCode, construct;
|
||||
cif::tie(code, primitiveCode, construct) = t.get("code", "primitive_code", "construct");
|
||||
|
||||
replace_all(construct, "\\n", "\n");
|
||||
replace_all(construct, "\\t", "\t");
|
||||
replace_all(construct, "\\\n", "");
|
||||
|
||||
try
|
||||
{
|
||||
type_validator v = {
|
||||
code, map_to_primitive_type(primitiveCode), construct
|
||||
};
|
||||
|
||||
m_validator.add_type_validator(std::move(v));
|
||||
}
|
||||
catch (const std::exception &)
|
||||
{
|
||||
std::throw_with_nested(parse_error(/*t.lineNr()*/ 0, "error in regular expression"));
|
||||
}
|
||||
|
||||
// Do not replace an already defined type validator, this won't work with pdbx_v40
|
||||
// as it has a name that is too strict for its own names :-)
|
||||
// if (mFileImpl.mTypeValidators.count(v))
|
||||
// mFileImpl.mTypeValidators.erase(v);
|
||||
|
||||
if (VERBOSE >= 5)
|
||||
std::cerr << "Added type " << code << " (" << primitiveCode << ") => " << construct << std::endl;
|
||||
|
||||
result = true;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
validator &m_validator;
|
||||
bool m_collected_item_types = false;
|
||||
|
||||
std::vector<category_validator> mCategoryValidators;
|
||||
std::map<std::string, std::vector<item_validator>> mItemValidators;
|
||||
std::set<std::tuple<std::string, std::string>> mLinkedItems;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
validator parse_dictionary(std::string_view name, std::istream &is)
|
||||
{
|
||||
validator result(name);
|
||||
|
||||
file f;
|
||||
dictionary_parser p(result, is, f);
|
||||
p.load_dictionary();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
216
src/file.cpp
Normal file
216
src/file.cpp
Normal file
@@ -0,0 +1,216 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <gxrio.hpp>
|
||||
|
||||
#include <cif++/file.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
void file::set_validator(const validator *v)
|
||||
{
|
||||
m_validator = v;
|
||||
for (auto &db : *this)
|
||||
db.set_validator(v);
|
||||
}
|
||||
|
||||
bool file::is_valid() const
|
||||
{
|
||||
if (m_validator == nullptr)
|
||||
std::runtime_error("No validator loaded explicitly, cannot continue");
|
||||
|
||||
bool result = true;
|
||||
for (auto &d : *this)
|
||||
result = d.is_valid() and result;
|
||||
|
||||
if (result)
|
||||
result = validate_links();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool file::is_valid()
|
||||
{
|
||||
if (m_validator == nullptr)
|
||||
{
|
||||
if (VERBOSE > 0)
|
||||
std::cerr << "No dictionary loaded explicitly, loading default" << std::endl;
|
||||
|
||||
load_dictionary();
|
||||
}
|
||||
|
||||
bool result = not empty();
|
||||
|
||||
for (auto &d : *this)
|
||||
result = d.is_valid() and result;
|
||||
|
||||
if (result)
|
||||
result = validate_links();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
bool file::validate_links() const
|
||||
{
|
||||
if (m_validator == nullptr)
|
||||
std::runtime_error("No validator loaded explicitly, cannot continue");
|
||||
|
||||
bool result = true;
|
||||
|
||||
for (auto &db : *this)
|
||||
result = db.validate_links() and result;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void file::load_dictionary()
|
||||
{
|
||||
if (not empty())
|
||||
{
|
||||
auto *audit_conform = front().get("audit_conform");
|
||||
if (audit_conform and not audit_conform->empty())
|
||||
{
|
||||
std::string name = audit_conform->front().get<std::string>("dict_name");
|
||||
|
||||
if (not name.empty())
|
||||
{
|
||||
try
|
||||
{
|
||||
load_dictionary(name);
|
||||
}
|
||||
catch (const std::exception &ex)
|
||||
{
|
||||
if (VERBOSE)
|
||||
std::cerr << "Failed to load dictionary " << std::quoted(name) << ": " << ex.what() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (not m_validator)
|
||||
load_dictionary("mmcif_ddl");
|
||||
}
|
||||
|
||||
void file::load_dictionary(std::string_view name)
|
||||
{
|
||||
set_validator(&validator_factory::instance()[name]);
|
||||
}
|
||||
|
||||
bool file::contains(std::string_view name) const
|
||||
{
|
||||
return std::find_if(begin(), end(), [name](const datablock &db) { return db.name() == name; }) != end();
|
||||
}
|
||||
|
||||
datablock &file::operator[](std::string_view name)
|
||||
{
|
||||
auto i = std::find_if(begin(), end(), [name](const datablock &c)
|
||||
{ return iequals(c.name(), name); });
|
||||
|
||||
if (i != end())
|
||||
return *i;
|
||||
|
||||
emplace_back(name);
|
||||
return back();
|
||||
}
|
||||
|
||||
const datablock &file::operator[](std::string_view name) const
|
||||
{
|
||||
static const datablock s_empty;
|
||||
auto i = std::find_if(begin(), end(), [name](const datablock &c)
|
||||
{ return iequals(c.name(), name); });
|
||||
return i == end() ? s_empty : *i;
|
||||
}
|
||||
|
||||
std::tuple<file::iterator, bool> file::emplace(std::string_view name)
|
||||
{
|
||||
bool is_new = true;
|
||||
|
||||
auto i = begin();
|
||||
while (i != end())
|
||||
{
|
||||
if (iequals(name, i->name()))
|
||||
{
|
||||
is_new = false;
|
||||
|
||||
if (i != begin())
|
||||
{
|
||||
auto n = std::next(i);
|
||||
splice(begin(), *this, i, n);
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
++i;
|
||||
}
|
||||
|
||||
if (is_new)
|
||||
{
|
||||
auto &db = emplace_front(name);
|
||||
db.set_validator(m_validator);
|
||||
}
|
||||
|
||||
return std::make_tuple(begin(), is_new);
|
||||
}
|
||||
|
||||
void file::load(const std::filesystem::path &p)
|
||||
{
|
||||
gxrio::ifstream in(p);
|
||||
load(in);
|
||||
}
|
||||
|
||||
void file::load(std::istream &is)
|
||||
{
|
||||
auto saved = m_validator;
|
||||
set_validator(nullptr);
|
||||
|
||||
parser p(is, *this);
|
||||
p.parse_file();
|
||||
|
||||
if (saved != nullptr)
|
||||
set_validator(saved);
|
||||
else
|
||||
load_dictionary();
|
||||
}
|
||||
|
||||
void file::save(const std::filesystem::path &p) const
|
||||
{
|
||||
gxrio::ofstream outFile(p);
|
||||
save(outFile);
|
||||
}
|
||||
|
||||
void file::save(std::ostream &os) const
|
||||
{
|
||||
// if (not is_valid())
|
||||
// std::cout << "File is not valid!" << std::endl;
|
||||
|
||||
for (auto &db : *this)
|
||||
db.write(os);
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
68
src/item.cpp
Normal file
68
src/item.cpp
Normal file
@@ -0,0 +1,68 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
|
||||
#include <cif++/row.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
const item_handle item_handle::s_null_item;
|
||||
row_handle s_null_row_handle;
|
||||
|
||||
item_handle::item_handle()
|
||||
: m_column(std::numeric_limits<uint16_t>::max())
|
||||
, m_row_handle(s_null_row_handle)
|
||||
{
|
||||
}
|
||||
|
||||
std::string_view item_handle::text() const
|
||||
{
|
||||
if (not m_row_handle.empty())
|
||||
{
|
||||
auto iv = m_row_handle.m_row->get(m_column);
|
||||
if (iv != nullptr)
|
||||
return iv->text();
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
void item_handle::assign_value(const item &v)
|
||||
{
|
||||
assert(not m_row_handle.empty());
|
||||
m_row_handle.assign(m_column, v.value(), true);
|
||||
}
|
||||
|
||||
void item_handle::swap(item_handle &b)
|
||||
{
|
||||
assert(m_column == b.m_column);
|
||||
// assert(&m_row_handle.m_category == &b.m_row_handle.m_category);
|
||||
m_row_handle.swap(m_column, b.m_row_handle);
|
||||
}
|
||||
|
||||
}
|
||||
2609
src/model.cpp
Normal file
2609
src/model.cpp
Normal file
File diff suppressed because it is too large
Load Diff
820
src/parser.cpp
Normal file
820
src/parser.cpp
Normal file
@@ -0,0 +1,820 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <regex>
|
||||
#include <stack>
|
||||
|
||||
#include <cif++/utilities.hpp>
|
||||
|
||||
#include <cif++/forward_decl.hpp>
|
||||
#include <cif++/parser.hpp>
|
||||
#include <cif++/file.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
extern int VERBOSE;
|
||||
}
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
sac_parser::sac_parser(std::istream &is, bool init)
|
||||
: m_source(is)
|
||||
{
|
||||
m_validate = true;
|
||||
m_line_nr = 1;
|
||||
m_bol = true;
|
||||
|
||||
if (init)
|
||||
m_lookahead = get_next_token();
|
||||
}
|
||||
|
||||
// get_next_char takes a char from the buffer, or if it is empty
|
||||
// from the istream. This function also does carriage/linefeed
|
||||
// translation.
|
||||
int sac_parser::get_next_char()
|
||||
{
|
||||
int result;
|
||||
|
||||
if (m_buffer.empty())
|
||||
result = m_source.get();
|
||||
else
|
||||
{
|
||||
result = m_buffer.top();
|
||||
m_buffer.pop();
|
||||
}
|
||||
|
||||
// very simple CR/LF translation into LF
|
||||
if (result == '\r')
|
||||
{
|
||||
int lookahead = m_source.get();
|
||||
if (lookahead != '\n')
|
||||
m_buffer.push(lookahead);
|
||||
result = '\n';
|
||||
}
|
||||
|
||||
m_token_value += static_cast<char>(result);
|
||||
|
||||
if (result == '\n')
|
||||
++m_line_nr;
|
||||
|
||||
if (VERBOSE >= 6)
|
||||
{
|
||||
std::cerr << "get_next_char => ";
|
||||
if (iscntrl(result) or not isprint(result))
|
||||
std::cerr << int(result) << std::endl;
|
||||
else
|
||||
std::cerr << char(result) << std::endl;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void sac_parser::retract()
|
||||
{
|
||||
assert(not m_token_value.empty());
|
||||
|
||||
char ch = m_token_value.back();
|
||||
if (ch == '\n')
|
||||
--m_line_nr;
|
||||
|
||||
m_buffer.push(ch);
|
||||
m_token_value.pop_back();
|
||||
}
|
||||
|
||||
int sac_parser::restart(int start)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
while (not m_token_value.empty())
|
||||
retract();
|
||||
|
||||
switch (start)
|
||||
{
|
||||
case State::Start:
|
||||
result = State::Float;
|
||||
break;
|
||||
|
||||
case State::Float:
|
||||
result = State::Int;
|
||||
break;
|
||||
|
||||
case State::Int:
|
||||
result = State::Value;
|
||||
break;
|
||||
|
||||
default:
|
||||
error("Invalid state in SacParser");
|
||||
}
|
||||
|
||||
m_bol = false;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
sac_parser::CIFToken sac_parser::get_next_token()
|
||||
{
|
||||
const auto kEOF = std::char_traits<char>::eof();
|
||||
|
||||
CIFToken result = CIFToken::Unknown;
|
||||
int quoteChar = 0;
|
||||
int state = State::Start, start = State::Start;
|
||||
m_bol = false;
|
||||
|
||||
m_token_value.clear();
|
||||
mTokenType = CIFValue::Unknown;
|
||||
|
||||
while (result == CIFToken::Unknown)
|
||||
{
|
||||
auto ch = get_next_char();
|
||||
|
||||
switch (state)
|
||||
{
|
||||
case State::Start:
|
||||
if (ch == kEOF)
|
||||
result = CIFToken::Eof;
|
||||
else if (ch == '\n')
|
||||
{
|
||||
m_bol = true;
|
||||
state = State::White;
|
||||
}
|
||||
else if (ch == ' ' or ch == '\t')
|
||||
state = State::White;
|
||||
else if (ch == '#')
|
||||
state = State::Comment;
|
||||
else if (ch == '_')
|
||||
state = State::Tag;
|
||||
else if (ch == ';' and m_bol)
|
||||
state = State::TextField;
|
||||
else if (ch == '\'' or ch == '"')
|
||||
{
|
||||
quoteChar = ch;
|
||||
state = State::QuotedString;
|
||||
}
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::White:
|
||||
if (ch == kEOF)
|
||||
result = CIFToken::Eof;
|
||||
else if (not isspace(ch))
|
||||
{
|
||||
state = State::Start;
|
||||
retract();
|
||||
m_token_value.clear();
|
||||
}
|
||||
else
|
||||
m_bol = (ch == '\n');
|
||||
break;
|
||||
|
||||
case State::Comment:
|
||||
if (ch == '\n')
|
||||
{
|
||||
state = State::Start;
|
||||
m_bol = true;
|
||||
m_token_value.clear();
|
||||
}
|
||||
else if (ch == kEOF)
|
||||
result = CIFToken::Eof;
|
||||
else if (not is_any_print(ch))
|
||||
error("invalid character in comment");
|
||||
break;
|
||||
|
||||
case State::TextField:
|
||||
if (ch == '\n')
|
||||
state = State::TextField + 1;
|
||||
else if (ch == kEOF)
|
||||
error("unterminated textfield");
|
||||
// else if (ch == '\\')
|
||||
// state = State::Esc;
|
||||
else if (not is_any_print(ch))
|
||||
warning("invalid character in text field '" + std::string({static_cast<char>(ch)}) + "' (" + std::to_string((int)ch) + ")");
|
||||
break;
|
||||
|
||||
// case State::Esc:
|
||||
// if (ch == '\n')
|
||||
|
||||
// break;
|
||||
|
||||
case State::TextField + 1:
|
||||
if (is_text_lead(ch) or ch == ' ' or ch == '\t')
|
||||
state = State::TextField;
|
||||
else if (ch == ';')
|
||||
{
|
||||
assert(m_token_value.length() >= 2);
|
||||
m_token_value = m_token_value.substr(1, m_token_value.length() - 3);
|
||||
mTokenType = CIFValue::TextField;
|
||||
result = CIFToken::Value;
|
||||
}
|
||||
else if (ch == kEOF)
|
||||
error("unterminated textfield");
|
||||
else if (ch != '\n')
|
||||
error("invalid character in text field");
|
||||
break;
|
||||
|
||||
case State::QuotedString:
|
||||
if (ch == kEOF)
|
||||
error("unterminated quoted string");
|
||||
else if (ch == quoteChar)
|
||||
state = State::QuotedStringQuote;
|
||||
else if (not is_any_print(ch))
|
||||
warning("invalid character in quoted string: '" + std::string({static_cast<char>(ch)}) + '\'');
|
||||
break;
|
||||
|
||||
case State::QuotedStringQuote:
|
||||
if (is_white(ch))
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
mTokenType = CIFValue::String;
|
||||
|
||||
if (m_token_value.length() < 2)
|
||||
error("Invalid quoted string token");
|
||||
|
||||
m_token_value = m_token_value.substr(1, m_token_value.length() - 2);
|
||||
}
|
||||
else if (ch == quoteChar)
|
||||
;
|
||||
else if (is_any_print(ch))
|
||||
state = State::QuotedString;
|
||||
else if (ch == kEOF)
|
||||
error("unterminated quoted string");
|
||||
else
|
||||
error("invalid character in quoted string");
|
||||
break;
|
||||
|
||||
case State::Tag:
|
||||
if (not is_non_blank(ch))
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Tag;
|
||||
}
|
||||
break;
|
||||
|
||||
case State::Float:
|
||||
if (ch == '+' or ch == '-')
|
||||
{
|
||||
state = State::Float + 1;
|
||||
}
|
||||
else if (isdigit(ch))
|
||||
state = State::Float + 1;
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::Float + 1:
|
||||
// if (ch == '(') // numeric???
|
||||
// mState = State::NumericSuffix;
|
||||
// else
|
||||
if (ch == '.')
|
||||
state = State::Float + 2;
|
||||
else if (tolower(ch) == 'e')
|
||||
state = State::Float + 3;
|
||||
else if (is_white(ch) or ch == kEOF)
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
mTokenType = CIFValue::Int;
|
||||
}
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
// parsed '.'
|
||||
case State::Float + 2:
|
||||
if (tolower(ch) == 'e')
|
||||
state = State::Float + 3;
|
||||
else if (is_white(ch) or ch == kEOF)
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
mTokenType = CIFValue::Float;
|
||||
}
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
// parsed 'e'
|
||||
case State::Float + 3:
|
||||
if (ch == '-' or ch == '+')
|
||||
state = State::Float + 4;
|
||||
else if (isdigit(ch))
|
||||
state = State::Float + 5;
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::Float + 4:
|
||||
if (isdigit(ch))
|
||||
state = State::Float + 5;
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::Float + 5:
|
||||
if (is_white(ch) or ch == kEOF)
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
mTokenType = CIFValue::Float;
|
||||
}
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::Int:
|
||||
if (isdigit(ch) or ch == '+' or ch == '-')
|
||||
state = State::Int + 1;
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::Int + 1:
|
||||
if (is_white(ch) or ch == kEOF)
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
mTokenType = CIFValue::Int;
|
||||
}
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::Value:
|
||||
if (ch == '_')
|
||||
{
|
||||
std::string s = to_lower_copy(m_token_value);
|
||||
|
||||
if (s == "global_")
|
||||
result = CIFToken::GLOBAL;
|
||||
else if (s == "stop_")
|
||||
result = CIFToken::STOP;
|
||||
else if (s == "loop_")
|
||||
result = CIFToken::LOOP;
|
||||
else if (s == "data_")
|
||||
{
|
||||
state = State::DATA;
|
||||
continue;
|
||||
}
|
||||
else if (s == "save_")
|
||||
{
|
||||
state = State::SAVE;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (result == CIFToken::Unknown and not is_non_blank(ch))
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
|
||||
if (m_token_value == ".")
|
||||
mTokenType = CIFValue::Inapplicable;
|
||||
else if (m_token_value == "?")
|
||||
{
|
||||
mTokenType = CIFValue::Unknown;
|
||||
m_token_value.clear();
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case State::DATA:
|
||||
case State::SAVE:
|
||||
if (not is_non_blank(ch))
|
||||
{
|
||||
retract();
|
||||
|
||||
if (state == State::DATA)
|
||||
result = CIFToken::DATA;
|
||||
else
|
||||
result = CIFToken::SAVE;
|
||||
|
||||
m_token_value.erase(m_token_value.begin(), m_token_value.begin() + 5);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
error("Invalid state in get_next_token");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (VERBOSE >= 5)
|
||||
{
|
||||
std::cerr << get_token_name(result);
|
||||
if (mTokenType != CIFValue::Unknown)
|
||||
std::cerr << ' ' << get_value_name(mTokenType);
|
||||
if (result != CIFToken::Eof)
|
||||
std::cerr << " " << std::quoted(m_token_value);
|
||||
std::cerr << std::endl;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void sac_parser::match(CIFToken token)
|
||||
{
|
||||
if (m_lookahead != token)
|
||||
error(std::string("Unexpected token, expected ") + get_token_name(token) + " but found " + get_token_name(m_lookahead));
|
||||
|
||||
m_lookahead = get_next_token();
|
||||
}
|
||||
|
||||
bool sac_parser::parse_single_datablock(const std::string &datablock)
|
||||
{
|
||||
// first locate the start, as fast as we can
|
||||
auto &sb = *m_source.rdbuf();
|
||||
|
||||
enum
|
||||
{
|
||||
start,
|
||||
comment,
|
||||
string,
|
||||
string_quote,
|
||||
qstring,
|
||||
data
|
||||
} state = start;
|
||||
|
||||
int quote = 0;
|
||||
bool bol = true;
|
||||
std::string dblk = "data_" + datablock;
|
||||
std::string::size_type si = 0;
|
||||
bool found = false;
|
||||
|
||||
for (auto ch = sb.sbumpc(); not found and ch != std::streambuf::traits_type::eof(); ch = sb.sbumpc())
|
||||
{
|
||||
switch (state)
|
||||
{
|
||||
case start:
|
||||
switch (ch)
|
||||
{
|
||||
case '#': state = comment; break;
|
||||
case 'd':
|
||||
case 'D':
|
||||
state = data;
|
||||
si = 1;
|
||||
break;
|
||||
case '\'':
|
||||
case '"':
|
||||
state = string;
|
||||
quote = ch;
|
||||
break;
|
||||
case ';':
|
||||
if (bol)
|
||||
state = qstring;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case comment:
|
||||
if (ch == '\n')
|
||||
state = start;
|
||||
break;
|
||||
|
||||
case string:
|
||||
if (ch == quote)
|
||||
state = string_quote;
|
||||
break;
|
||||
|
||||
case string_quote:
|
||||
if (std::isspace(ch))
|
||||
state = start;
|
||||
else
|
||||
state = string;
|
||||
break;
|
||||
|
||||
case qstring:
|
||||
if (ch == ';' and bol)
|
||||
state = start;
|
||||
break;
|
||||
|
||||
case data:
|
||||
if (isspace(ch) and dblk[si] == 0)
|
||||
found = true;
|
||||
else if (dblk[si++] != ch)
|
||||
state = start;
|
||||
break;
|
||||
}
|
||||
|
||||
bol = (ch == '\n');
|
||||
}
|
||||
|
||||
if (found)
|
||||
{
|
||||
produce_datablock(datablock);
|
||||
m_lookahead = get_next_token();
|
||||
parse_datablock();
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
sac_parser::datablock_index sac_parser::index_datablocks()
|
||||
{
|
||||
datablock_index index;
|
||||
|
||||
// first locate the start, as fast as we can
|
||||
auto &sb = *m_source.rdbuf();
|
||||
|
||||
enum
|
||||
{
|
||||
start,
|
||||
comment,
|
||||
string,
|
||||
string_quote,
|
||||
qstring,
|
||||
data,
|
||||
data_name
|
||||
} state = start;
|
||||
|
||||
int quote = 0;
|
||||
bool bol = true;
|
||||
const char dblk[] = "data_";
|
||||
std::string::size_type si = 0;
|
||||
std::string datablock;
|
||||
|
||||
for (auto ch = sb.sbumpc(); ch != std::streambuf::traits_type::eof(); ch = sb.sbumpc())
|
||||
{
|
||||
switch (state)
|
||||
{
|
||||
case start:
|
||||
switch (ch)
|
||||
{
|
||||
case '#': state = comment; break;
|
||||
case 'd':
|
||||
case 'D':
|
||||
state = data;
|
||||
si = 1;
|
||||
break;
|
||||
case '\'':
|
||||
case '"':
|
||||
state = string;
|
||||
quote = ch;
|
||||
break;
|
||||
case ';':
|
||||
if (bol)
|
||||
state = qstring;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case comment:
|
||||
if (ch == '\n')
|
||||
state = start;
|
||||
break;
|
||||
|
||||
case string:
|
||||
if (ch == quote)
|
||||
state = string_quote;
|
||||
break;
|
||||
|
||||
case string_quote:
|
||||
if (std::isspace(ch))
|
||||
state = start;
|
||||
else
|
||||
state = string;
|
||||
break;
|
||||
|
||||
case qstring:
|
||||
if (ch == ';' and bol)
|
||||
state = start;
|
||||
break;
|
||||
|
||||
case data:
|
||||
if (dblk[si] == 0 and is_non_blank(ch))
|
||||
{
|
||||
datablock = {static_cast<char>(ch)};
|
||||
state = data_name;
|
||||
}
|
||||
else if (dblk[si++] != ch)
|
||||
state = start;
|
||||
break;
|
||||
|
||||
case data_name:
|
||||
if (is_non_blank(ch))
|
||||
datablock.insert(datablock.end(), char(ch));
|
||||
else if (isspace(ch))
|
||||
{
|
||||
if (not datablock.empty())
|
||||
index[datablock] = m_source.tellg();
|
||||
|
||||
state = start;
|
||||
}
|
||||
else
|
||||
state = start;
|
||||
break;
|
||||
}
|
||||
|
||||
bol = (ch == '\n');
|
||||
}
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
bool sac_parser::parse_single_datablock(const std::string &datablock, const datablock_index &index)
|
||||
{
|
||||
bool result = false;
|
||||
|
||||
auto i = index.find(datablock);
|
||||
if (i != index.end())
|
||||
{
|
||||
m_source.seekg(i->second);
|
||||
|
||||
produce_datablock(datablock);
|
||||
m_lookahead = get_next_token();
|
||||
parse_datablock();
|
||||
|
||||
result = true;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void sac_parser::parse_file()
|
||||
{
|
||||
while (m_lookahead != CIFToken::Eof)
|
||||
{
|
||||
switch (m_lookahead)
|
||||
{
|
||||
case CIFToken::GLOBAL:
|
||||
parse_global();
|
||||
break;
|
||||
|
||||
case CIFToken::DATA:
|
||||
produce_datablock(m_token_value);
|
||||
|
||||
match(CIFToken::DATA);
|
||||
parse_datablock();
|
||||
break;
|
||||
|
||||
default:
|
||||
error("This file does not seem to be an mmCIF file");
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void sac_parser::parse_global()
|
||||
{
|
||||
match(CIFToken::GLOBAL);
|
||||
while (m_lookahead == CIFToken::Tag)
|
||||
{
|
||||
match(CIFToken::Tag);
|
||||
match(CIFToken::Value);
|
||||
}
|
||||
}
|
||||
|
||||
void sac_parser::parse_datablock()
|
||||
{
|
||||
std::string cat;
|
||||
|
||||
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag or m_lookahead == CIFToken::SAVE)
|
||||
{
|
||||
switch (m_lookahead)
|
||||
{
|
||||
case CIFToken::LOOP:
|
||||
{
|
||||
cat.clear(); // should start a new category
|
||||
|
||||
match(CIFToken::LOOP);
|
||||
|
||||
std::vector<std::string> tags;
|
||||
|
||||
while (m_lookahead == CIFToken::Tag)
|
||||
{
|
||||
std::string catName, itemName;
|
||||
std::tie(catName, itemName) = split_tag_name(m_token_value);
|
||||
|
||||
if (cat.empty())
|
||||
{
|
||||
produce_category(catName);
|
||||
cat = catName;
|
||||
}
|
||||
else if (not iequals(cat, catName))
|
||||
error("inconsistent categories in loop_");
|
||||
|
||||
tags.push_back(itemName);
|
||||
|
||||
match(CIFToken::Tag);
|
||||
}
|
||||
|
||||
while (m_lookahead == CIFToken::Value)
|
||||
{
|
||||
produce_row();
|
||||
|
||||
for (auto tag : tags)
|
||||
{
|
||||
produce_item(cat, tag, m_token_value);
|
||||
match(CIFToken::Value);
|
||||
}
|
||||
}
|
||||
|
||||
cat.clear();
|
||||
break;
|
||||
}
|
||||
|
||||
case CIFToken::Tag:
|
||||
{
|
||||
std::string catName, itemName;
|
||||
std::tie(catName, itemName) = split_tag_name(m_token_value);
|
||||
|
||||
if (not iequals(cat, catName))
|
||||
{
|
||||
produce_category(catName);
|
||||
cat = catName;
|
||||
produce_row();
|
||||
}
|
||||
|
||||
match(CIFToken::Tag);
|
||||
|
||||
produce_item(cat, itemName, m_token_value);
|
||||
|
||||
match(CIFToken::Value);
|
||||
break;
|
||||
}
|
||||
|
||||
case CIFToken::SAVE:
|
||||
parse_save_frame();
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void sac_parser::parse_save_frame()
|
||||
{
|
||||
error("A regular CIF file should not contain a save frame");
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void parser::produce_datablock(const std::string &name)
|
||||
{
|
||||
const auto &[iter, ignore] = m_file.emplace(name);
|
||||
m_datablock = &(*iter);
|
||||
}
|
||||
|
||||
void parser::produce_category(const std::string &name)
|
||||
{
|
||||
if (VERBOSE >= 4)
|
||||
std::cerr << "producing category " << name << std::endl;
|
||||
|
||||
const auto &[cat, ignore] = m_datablock->emplace(name);
|
||||
m_category = &*cat;
|
||||
}
|
||||
|
||||
void parser::produce_row()
|
||||
{
|
||||
if (VERBOSE >= 4)
|
||||
std::cerr << "producing row for category " << m_category->name() << std::endl;
|
||||
|
||||
m_category->emplace({});
|
||||
m_row = m_category->back();
|
||||
// m_row.lineNr(m_line_nr);
|
||||
}
|
||||
|
||||
void parser::produce_item(const std::string &category, const std::string &item, const std::string &value)
|
||||
{
|
||||
if (VERBOSE >= 4)
|
||||
std::cerr << "producing _" << category << '.' << item << " -> " << value << std::endl;
|
||||
|
||||
if (not iequals(category, m_category->name()))
|
||||
error("inconsistent categories in loop_");
|
||||
|
||||
m_row[item] = m_token_value;
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
3726
src/pdb/cif2pdb.cpp
Normal file
3726
src/pdb/cif2pdb.cpp
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
1500
src/pdb/pdb2cif_remark_3.cpp
Normal file
1500
src/pdb/pdb2cif_remark_3.cpp
Normal file
File diff suppressed because it is too large
Load Diff
2016
src/pdb/tls.cpp
Normal file
2016
src/pdb/tls.cpp
Normal file
File diff suppressed because it is too large
Load Diff
544
src/point.cpp
Normal file
544
src/point.cpp
Normal file
@@ -0,0 +1,544 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
#include <random>
|
||||
|
||||
#include <cif++/point.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// We're using expression templates here
|
||||
|
||||
template <typename M>
|
||||
class MatrixExpression
|
||||
{
|
||||
public:
|
||||
uint32_t dim_m() const { return static_cast<const M &>(*this).dim_m(); }
|
||||
uint32_t dim_n() const { return static_cast<const M &>(*this).dim_n(); }
|
||||
|
||||
double &operator()(uint32_t i, uint32_t j)
|
||||
{
|
||||
return static_cast<M &>(*this).operator()(i, j);
|
||||
}
|
||||
|
||||
double operator()(uint32_t i, uint32_t j) const
|
||||
{
|
||||
return static_cast<const M &>(*this).operator()(i, j);
|
||||
}
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// matrix is m x n, addressing i,j is 0 <= i < m and 0 <= j < n
|
||||
// element m i,j is mapped to [i * n + j] and thus storage is row major
|
||||
|
||||
class Matrix : public MatrixExpression<Matrix>
|
||||
{
|
||||
public:
|
||||
template <typename M2>
|
||||
Matrix(const MatrixExpression<M2> &m)
|
||||
: m_m(m.dim_m())
|
||||
, m_n(m.dim_n())
|
||||
, m_data(m_m * m_n)
|
||||
{
|
||||
for (uint32_t i = 0; i < m_m; ++i)
|
||||
{
|
||||
for (uint32_t j = 0; j < m_n; ++j)
|
||||
operator()(i, j) = m(i, j);
|
||||
}
|
||||
}
|
||||
|
||||
Matrix(size_t m, size_t n, double v = 0)
|
||||
: m_m(m)
|
||||
, m_n(n)
|
||||
, m_data(m_m * m_n)
|
||||
{
|
||||
std::fill(m_data.begin(), m_data.end(), v);
|
||||
}
|
||||
|
||||
Matrix() = default;
|
||||
Matrix(Matrix &&m) = default;
|
||||
Matrix(const Matrix &m) = default;
|
||||
Matrix &operator=(Matrix &&m) = default;
|
||||
Matrix &operator=(const Matrix &m) = default;
|
||||
|
||||
uint32_t dim_m() const { return m_m; }
|
||||
uint32_t dim_n() const { return m_n; }
|
||||
|
||||
double operator()(uint32_t i, uint32_t j) const
|
||||
{
|
||||
assert(i < m_m);
|
||||
assert(j < m_n);
|
||||
return m_data[i * m_n + j];
|
||||
}
|
||||
|
||||
double &operator()(uint32_t i, uint32_t j)
|
||||
{
|
||||
assert(i < m_m);
|
||||
assert(j < m_n);
|
||||
return m_data[i * m_n + j];
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_m = 0, m_n = 0;
|
||||
std::vector<double> m_data;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class SymmetricMatrix : public MatrixExpression<SymmetricMatrix>
|
||||
{
|
||||
public:
|
||||
SymmetricMatrix(uint32_t n, double v = 0)
|
||||
: m_n(n)
|
||||
, m_data((m_n * (m_n + 1)) / 2)
|
||||
{
|
||||
std::fill(m_data.begin(), m_data.end(), v);
|
||||
}
|
||||
|
||||
SymmetricMatrix() = default;
|
||||
SymmetricMatrix(SymmetricMatrix &&m) = default;
|
||||
SymmetricMatrix(const SymmetricMatrix &m) = default;
|
||||
SymmetricMatrix &operator=(SymmetricMatrix &&m) = default;
|
||||
SymmetricMatrix &operator=(const SymmetricMatrix &m) = default;
|
||||
|
||||
uint32_t dim_m() const { return m_n; }
|
||||
uint32_t dim_n() const { return m_n; }
|
||||
|
||||
double operator()(uint32_t i, uint32_t j) const
|
||||
{
|
||||
return i < j
|
||||
? m_data[(j * (j + 1)) / 2 + i]
|
||||
: m_data[(i * (i + 1)) / 2 + j];
|
||||
}
|
||||
|
||||
double &operator()(uint32_t i, uint32_t j)
|
||||
{
|
||||
if (i > j)
|
||||
std::swap(i, j);
|
||||
assert(j < m_n);
|
||||
return m_data[(j * (j + 1)) / 2 + i];
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_n;
|
||||
std::vector<double> m_data;
|
||||
};
|
||||
|
||||
class IdentityMatrix : public MatrixExpression<IdentityMatrix>
|
||||
{
|
||||
public:
|
||||
IdentityMatrix(uint32_t n)
|
||||
: m_n(n)
|
||||
{
|
||||
}
|
||||
|
||||
uint32_t dim_m() const { return m_n; }
|
||||
uint32_t dim_n() const { return m_n; }
|
||||
|
||||
double operator()(uint32_t i, uint32_t j) const
|
||||
{
|
||||
return i == j ? 1 : 0;
|
||||
}
|
||||
|
||||
private:
|
||||
uint32_t m_n;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// matrix functions, implemented as expression templates
|
||||
|
||||
template <typename M1, typename M2>
|
||||
class MatrixSubtraction : public MatrixExpression<MatrixSubtraction<M1, M2>>
|
||||
{
|
||||
public:
|
||||
MatrixSubtraction(const M1 &m1, const M2 &m2)
|
||||
: m_m1(m1)
|
||||
, m_m2(m2)
|
||||
{
|
||||
assert(m_m1.dim_m() == m_m2.dim_m());
|
||||
assert(m_m1.dim_n() == m_m2.dim_n());
|
||||
}
|
||||
|
||||
uint32_t dim_m() const { return m_m1.dim_m(); }
|
||||
uint32_t dim_n() const { return m_m1.dim_n(); }
|
||||
|
||||
double operator()(uint32_t i, uint32_t j) const
|
||||
{
|
||||
return m_m1(i, j) - m_m2(i, j);
|
||||
}
|
||||
|
||||
private:
|
||||
const M1 &m_m1;
|
||||
const M2 &m_m2;
|
||||
};
|
||||
|
||||
template <typename M1, typename M2>
|
||||
MatrixSubtraction<M1, M2> operator-(const MatrixExpression<M1> &m1, const MatrixExpression<M2> &m2)
|
||||
{
|
||||
return MatrixSubtraction(*static_cast<const M1 *>(&m1), *static_cast<const M2 *>(&m2));
|
||||
}
|
||||
|
||||
template <typename M>
|
||||
class MatrixMultiplication : public MatrixExpression<MatrixMultiplication<M>>
|
||||
{
|
||||
public:
|
||||
MatrixMultiplication(const M &m, double v)
|
||||
: m_m(m)
|
||||
, m_v(v)
|
||||
{
|
||||
}
|
||||
|
||||
uint32_t dim_m() const { return m_m.dim_m(); }
|
||||
uint32_t dim_n() const { return m_m.dim_n(); }
|
||||
|
||||
double operator()(uint32_t i, uint32_t j) const
|
||||
{
|
||||
return m_m(i, j) * m_v;
|
||||
}
|
||||
|
||||
private:
|
||||
const M &m_m;
|
||||
double m_v;
|
||||
};
|
||||
|
||||
template <typename M>
|
||||
MatrixMultiplication<M> operator*(const MatrixExpression<M> &m, double v)
|
||||
{
|
||||
return MatrixMultiplication(*static_cast<const M *>(&m), v);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
template <class M1>
|
||||
Matrix Cofactors(const M1 &m)
|
||||
{
|
||||
Matrix cf(m.dim_m(), m.dim_m());
|
||||
|
||||
const size_t ixs[4][3] = {
|
||||
{ 1, 2, 3 },
|
||||
{ 0, 2, 3 },
|
||||
{ 0, 1, 3 },
|
||||
{ 0, 1, 2 }
|
||||
};
|
||||
|
||||
for (size_t x = 0; x < 4; ++x)
|
||||
{
|
||||
const size_t *ix = ixs[x];
|
||||
|
||||
for (size_t y = 0; y < 4; ++y)
|
||||
{
|
||||
const size_t *iy = ixs[y];
|
||||
|
||||
cf(x, y) =
|
||||
m(ix[0], iy[0]) * m(ix[1], iy[1]) * m(ix[2], iy[2]) +
|
||||
m(ix[0], iy[1]) * m(ix[1], iy[2]) * m(ix[2], iy[0]) +
|
||||
m(ix[0], iy[2]) * m(ix[1], iy[0]) * m(ix[2], iy[1]) -
|
||||
m(ix[0], iy[2]) * m(ix[1], iy[1]) * m(ix[2], iy[0]) -
|
||||
m(ix[0], iy[1]) * m(ix[1], iy[0]) * m(ix[2], iy[2]) -
|
||||
m(ix[0], iy[0]) * m(ix[1], iy[2]) * m(ix[2], iy[1]);
|
||||
|
||||
if ((x + y) % 2 == 1)
|
||||
cf(x, y) *= -1;
|
||||
}
|
||||
}
|
||||
|
||||
return cf;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
template<typename T>
|
||||
quaternion_type<T> normalize(quaternion_type<T> q)
|
||||
{
|
||||
std::valarray<double> t(4);
|
||||
|
||||
t[0] = q.get_a();
|
||||
t[1] = q.get_b();
|
||||
t[2] = q.get_c();
|
||||
t[3] = q.get_d();
|
||||
|
||||
t *= t;
|
||||
|
||||
double length = std::sqrt(t.sum());
|
||||
|
||||
if (length > 0.001)
|
||||
q /= static_cast<quaternion::value_type>(length);
|
||||
else
|
||||
q = quaternion(1, 0, 0, 0);
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
quaternion construct_from_angle_axis(float angle, point axis)
|
||||
{
|
||||
auto q = std::cos((angle * kPI / 180) / 2);
|
||||
auto s = std::sqrt(1 - q * q);
|
||||
|
||||
axis.normalize();
|
||||
|
||||
return normalize(quaternion{
|
||||
static_cast<float>(q),
|
||||
static_cast<float>(s * axis.m_x),
|
||||
static_cast<float>(s * axis.m_y),
|
||||
static_cast<float>(s * axis.m_z) });
|
||||
}
|
||||
|
||||
std::tuple<double, point> quaternion_to_angle_axis(quaternion q)
|
||||
{
|
||||
if (q.get_a() > 1)
|
||||
q = normalize(q);
|
||||
|
||||
// angle:
|
||||
double angle = 2 * std::acos(q.get_a());
|
||||
angle = angle * 180 / kPI;
|
||||
|
||||
// axis:
|
||||
float s = std::sqrt(1 - q.get_a() * q.get_a());
|
||||
if (s < 0.001)
|
||||
s = 1;
|
||||
|
||||
point axis(q.get_b() / s, q.get_c() / s, q.get_d() / s);
|
||||
|
||||
return { angle, axis };
|
||||
}
|
||||
|
||||
point center_points(std::vector<point> &Points)
|
||||
{
|
||||
point t;
|
||||
|
||||
for (point &pt : Points)
|
||||
{
|
||||
t.m_x += pt.m_x;
|
||||
t.m_y += pt.m_y;
|
||||
t.m_z += pt.m_z;
|
||||
}
|
||||
|
||||
t.m_x /= Points.size();
|
||||
t.m_y /= Points.size();
|
||||
t.m_z /= Points.size();
|
||||
|
||||
for (point &pt : Points)
|
||||
{
|
||||
pt.m_x -= t.m_x;
|
||||
pt.m_y -= t.m_y;
|
||||
pt.m_z -= t.m_z;
|
||||
}
|
||||
|
||||
return t;
|
||||
}
|
||||
|
||||
point centroid(const std::vector<point> &pts)
|
||||
{
|
||||
point result;
|
||||
|
||||
for (auto &pt : pts)
|
||||
result += pt;
|
||||
|
||||
result /= static_cast<float>(pts.size());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
double RMSd(const std::vector<point> &a, const std::vector<point> &b)
|
||||
{
|
||||
double sum = 0;
|
||||
for (uint32_t i = 0; i < a.size(); ++i)
|
||||
{
|
||||
std::valarray<double> d(3);
|
||||
|
||||
d[0] = b[i].m_x - a[i].m_x;
|
||||
d[1] = b[i].m_y - a[i].m_y;
|
||||
d[2] = b[i].m_z - a[i].m_z;
|
||||
|
||||
d *= d;
|
||||
|
||||
sum += d.sum();
|
||||
}
|
||||
|
||||
return std::sqrt(sum / a.size());
|
||||
}
|
||||
|
||||
// The next function returns the largest solution for a quartic equation
|
||||
// based on Ferrari's algorithm.
|
||||
// A depressed quartic is of the form:
|
||||
//
|
||||
// x^4 + ax^2 + bx + c = 0
|
||||
//
|
||||
// (since I'm too lazy to find out a better way, I've implemented the
|
||||
// routine using complex values to avoid nan's as a result of taking
|
||||
// sqrt of a negative number)
|
||||
double LargestDepressedQuarticSolution(double a, double b, double c)
|
||||
{
|
||||
std::complex<double> P = -(a * a) / 12 - c;
|
||||
std::complex<double> Q = -(a * a * a) / 108 + (a * c) / 3 - (b * b) / 8;
|
||||
std::complex<double> R = -Q / 2.0 + std::sqrt((Q * Q) / 4.0 + (P * P * P) / 27.0);
|
||||
|
||||
std::complex<double> U = std::pow(R, 1 / 3.0);
|
||||
|
||||
std::complex<double> y;
|
||||
if (U == 0.0)
|
||||
y = -5.0 * a / 6.0 + U - std::pow(Q, 1.0 / 3.0);
|
||||
else
|
||||
y = -5.0 * a / 6.0 + U - P / (3.0 * U);
|
||||
|
||||
std::complex<double> W = std::sqrt(a + 2.0 * y);
|
||||
|
||||
// And to get the final result:
|
||||
// result = (±W + std::sqrt(-(3 * alpha + 2 * y ± 2 * beta / W))) / 2;
|
||||
// We want the largest result, so:
|
||||
|
||||
std::valarray<double> t(4);
|
||||
|
||||
t[0] = ((W + std::sqrt(-(3.0 * a + 2.0 * y + 2.0 * b / W))) / 2.0).real();
|
||||
t[1] = ((W + std::sqrt(-(3.0 * a + 2.0 * y - 2.0 * b / W))) / 2.0).real();
|
||||
t[2] = ((-W + std::sqrt(-(3.0 * a + 2.0 * y + 2.0 * b / W))) / 2.0).real();
|
||||
t[3] = ((-W + std::sqrt(-(3.0 * a + 2.0 * y - 2.0 * b / W))) / 2.0).real();
|
||||
|
||||
return t.max();
|
||||
}
|
||||
|
||||
quaternion align_points(const std::vector<point> &pa, const std::vector<point> &pb)
|
||||
{
|
||||
// First calculate M, a 3x3 Matrix containing the sums of products of the coordinates of A and B
|
||||
Matrix M(3, 3, 0);
|
||||
|
||||
for (uint32_t i = 0; i < pa.size(); ++i)
|
||||
{
|
||||
const point &a = pa[i];
|
||||
const point &b = pb[i];
|
||||
|
||||
M(0, 0) += a.m_x * b.m_x;
|
||||
M(0, 1) += a.m_x * b.m_y;
|
||||
M(0, 2) += a.m_x * b.m_z;
|
||||
M(1, 0) += a.m_y * b.m_x;
|
||||
M(1, 1) += a.m_y * b.m_y;
|
||||
M(1, 2) += a.m_y * b.m_z;
|
||||
M(2, 0) += a.m_z * b.m_x;
|
||||
M(2, 1) += a.m_z * b.m_y;
|
||||
M(2, 2) += a.m_z * b.m_z;
|
||||
}
|
||||
|
||||
// Now calculate N, a symmetric 4x4 Matrix
|
||||
SymmetricMatrix N(4);
|
||||
|
||||
N(0, 0) = M(0, 0) + M(1, 1) + M(2, 2);
|
||||
N(0, 1) = M(1, 2) - M(2, 1);
|
||||
N(0, 2) = M(2, 0) - M(0, 2);
|
||||
N(0, 3) = M(0, 1) - M(1, 0);
|
||||
|
||||
N(1, 1) = M(0, 0) - M(1, 1) - M(2, 2);
|
||||
N(1, 2) = M(0, 1) + M(1, 0);
|
||||
N(1, 3) = M(0, 2) + M(2, 0);
|
||||
|
||||
N(2, 2) = -M(0, 0) + M(1, 1) - M(2, 2);
|
||||
N(2, 3) = M(1, 2) + M(2, 1);
|
||||
|
||||
N(3, 3) = -M(0, 0) - M(1, 1) + M(2, 2);
|
||||
|
||||
// det(N - λI) = 0
|
||||
// find the largest λ (λm)
|
||||
//
|
||||
// Aλ4 + Bλ3 + Cλ2 + Dλ + E = 0
|
||||
// A = 1
|
||||
// B = 0
|
||||
// and so this is a so-called depressed quartic
|
||||
// solve it using Ferrari's algorithm
|
||||
|
||||
double C = -2 * (M(0, 0) * M(0, 0) + M(0, 1) * M(0, 1) + M(0, 2) * M(0, 2) +
|
||||
M(1, 0) * M(1, 0) + M(1, 1) * M(1, 1) + M(1, 2) * M(1, 2) +
|
||||
M(2, 0) * M(2, 0) + M(2, 1) * M(2, 1) + M(2, 2) * M(2, 2));
|
||||
|
||||
double D = 8 * (M(0, 0) * M(1, 2) * M(2, 1) +
|
||||
M(1, 1) * M(2, 0) * M(0, 2) +
|
||||
M(2, 2) * M(0, 1) * M(1, 0)) -
|
||||
8 * (M(0, 0) * M(1, 1) * M(2, 2) +
|
||||
M(1, 2) * M(2, 0) * M(0, 1) +
|
||||
M(2, 1) * M(1, 0) * M(0, 2));
|
||||
|
||||
// E is the determinant of N:
|
||||
double E =
|
||||
(N(0, 0) * N(1, 1) - N(0, 1) * N(0, 1)) * (N(2, 2) * N(3, 3) - N(2, 3) * N(2, 3)) +
|
||||
(N(0, 1) * N(0, 2) - N(0, 0) * N(2, 1)) * (N(2, 1) * N(3, 3) - N(2, 3) * N(1, 3)) +
|
||||
(N(0, 0) * N(1, 3) - N(0, 1) * N(0, 3)) * (N(2, 1) * N(2, 3) - N(2, 2) * N(1, 3)) +
|
||||
(N(0, 1) * N(2, 1) - N(1, 1) * N(0, 2)) * (N(0, 2) * N(3, 3) - N(2, 3) * N(0, 3)) +
|
||||
(N(1, 1) * N(0, 3) - N(0, 1) * N(1, 3)) * (N(0, 2) * N(2, 3) - N(2, 2) * N(0, 3)) +
|
||||
(N(0, 2) * N(1, 3) - N(2, 1) * N(0, 3)) * (N(0, 2) * N(1, 3) - N(2, 1) * N(0, 3));
|
||||
|
||||
// solve quartic
|
||||
double lambda = LargestDepressedQuarticSolution(C, D, E);
|
||||
|
||||
// calculate t = (N - λI)
|
||||
Matrix t = N - IdentityMatrix(4) * lambda;
|
||||
|
||||
// calculate a Matrix of cofactors for t
|
||||
Matrix cf = Cofactors(t);
|
||||
|
||||
int maxR = 0;
|
||||
for (int r = 1; r < 4; ++r)
|
||||
{
|
||||
if (std::abs(cf(r, 0)) > std::abs(cf(maxR, 0)))
|
||||
maxR = r;
|
||||
}
|
||||
|
||||
quaternion q(
|
||||
static_cast<float>(cf(maxR, 0)),
|
||||
static_cast<float>(cf(maxR, 1)),
|
||||
static_cast<float>(cf(maxR, 2)),
|
||||
static_cast<float>(cf(maxR, 3)));
|
||||
q = normalize(q);
|
||||
|
||||
return q;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
point nudge(point p, float offset)
|
||||
{
|
||||
static std::random_device rd;
|
||||
static std::mt19937_64 rng(rd());
|
||||
|
||||
std::uniform_real_distribution<float> randomAngle(0, 2 * kPI);
|
||||
std::normal_distribution<> randomOffset(0, offset);
|
||||
|
||||
float theta = randomAngle(rng);
|
||||
float phi1 = randomAngle(rng) - kPI;
|
||||
float phi2 = randomAngle(rng) - kPI;
|
||||
|
||||
quaternion q = spherical(1.0f, theta, phi1, phi2);
|
||||
|
||||
point r{ 0, 0, 1 };
|
||||
r.rotate(q);
|
||||
r *= randomOffset(rng);
|
||||
|
||||
return p + r;
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
@@ -1,5 +0,0 @@
|
||||
const char kRevision[] = R"(
|
||||
lib@PROJECT_NAME@-version: @PROJECT_VERSION@
|
||||
@BUILD_VERSION_STRING@
|
||||
Date: @BUILD_DATE_TIME@
|
||||
)";
|
||||
100
src/row.cpp
Normal file
100
src/row.cpp
Normal file
@@ -0,0 +1,100 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cif++/category.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
void row_handle::assign(size_t column, std::string_view value, bool updateLinked, bool validate)
|
||||
{
|
||||
assert(m_category);
|
||||
m_category->update_value(m_row, column, value, updateLinked, validate);
|
||||
}
|
||||
|
||||
uint16_t row_handle::get_column_ix(std::string_view name) const
|
||||
{
|
||||
assert(m_category);
|
||||
return m_category->get_column_ix(name);
|
||||
}
|
||||
|
||||
std::string_view row_handle::get_column_name(uint16_t ix) const
|
||||
{
|
||||
assert(m_category);
|
||||
return m_category->get_column_name(ix);
|
||||
}
|
||||
|
||||
uint16_t row_handle::add_column(std::string_view name)
|
||||
{
|
||||
assert(m_category);
|
||||
return m_category->add_column(name);
|
||||
}
|
||||
|
||||
void row_handle::swap(size_t column, row_handle &b)
|
||||
{
|
||||
m_category->swap_item(column, *this, b);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
row_initializer::row_initializer(row_handle rh)
|
||||
{
|
||||
assert(rh.m_category);
|
||||
assert(rh.m_row);
|
||||
|
||||
row *r = rh.get_row();
|
||||
auto &cat = *rh.m_category;
|
||||
|
||||
for (size_t ix = 0; ix < r->size(); ++ix)
|
||||
{
|
||||
auto &i = r->operator[](ix);
|
||||
if (not i)
|
||||
continue;
|
||||
emplace_back(cat.get_column_name(ix), i.text());
|
||||
}
|
||||
}
|
||||
|
||||
void row_initializer::set_value(std::string_view name, std::string_view value)
|
||||
{
|
||||
for (auto &i : *this)
|
||||
{
|
||||
if (i.name() == name)
|
||||
{
|
||||
i.value(value);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
emplace_back(name, value);
|
||||
}
|
||||
|
||||
void row_initializer::set_value_if_empty(std::string_view name, std::string_view value)
|
||||
{
|
||||
if (find_if(begin(), end(), [name](auto &i) { return i.name() == name; }) == end())
|
||||
emplace_back(name, value);
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
@@ -27,12 +27,11 @@
|
||||
#include <atomic>
|
||||
#include <mutex>
|
||||
|
||||
#include "cif++/Symmetry.hpp"
|
||||
#include "cif++/CifUtils.hpp"
|
||||
#include <cif++/symmetry.hpp>
|
||||
|
||||
#include "SymOpTable_data.hpp"
|
||||
#include "./symop_table_data.hpp"
|
||||
|
||||
namespace mmcif
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
@@ -42,7 +41,7 @@ namespace mmcif
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
int GetSpacegroupNumber(std::string spacegroup)
|
||||
int get_space_group_number(std::string spacegroup)
|
||||
{
|
||||
if (spacegroup == "P 21 21 2 A")
|
||||
spacegroup = "P 21 21 2 (a)";
|
||||
@@ -90,4 +89,66 @@ int GetSpacegroupNumber(std::string spacegroup)
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
int get_space_group_number(std::string spacegroup, space_group_name type)
|
||||
{
|
||||
if (spacegroup == "P 21 21 2 A")
|
||||
spacegroup = "P 21 21 2 (a)";
|
||||
else if (spacegroup.empty())
|
||||
throw std::runtime_error("No spacegroup, cannot continue");
|
||||
|
||||
int result = 0;
|
||||
|
||||
if (type == space_group_name::full)
|
||||
{
|
||||
const size_t N = kNrOfSpaceGroups;
|
||||
int32_t L = 0, R = static_cast<int32_t>(N - 1);
|
||||
while (L <= R)
|
||||
{
|
||||
int32_t i = (L + R) / 2;
|
||||
|
||||
int d = spacegroup.compare(kSpaceGroups[i].name);
|
||||
|
||||
if (d > 0)
|
||||
L = i + 1;
|
||||
else if (d < 0)
|
||||
R = i - 1;
|
||||
else
|
||||
{
|
||||
result = kSpaceGroups[i].nr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (type == space_group_name::xHM)
|
||||
{
|
||||
for (auto &sg : kSpaceGroups)
|
||||
{
|
||||
if (sg.xHM == spacegroup)
|
||||
{
|
||||
result = sg.nr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (auto &sg : kSpaceGroups)
|
||||
{
|
||||
if (sg.Hall == spacegroup)
|
||||
{
|
||||
result = sg.nr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// not found, see if we can find a match based on xHM name
|
||||
if (result == 0)
|
||||
throw std::runtime_error("Spacegroup name " + spacegroup + " was not found in table");
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -2,12 +2,12 @@
|
||||
// and $CLIBD/syminfo.lib using symop-map-generator,
|
||||
// part of the PDB-REDO suite of programs.
|
||||
|
||||
#include "cif++/Symmetry.hpp"
|
||||
#include <cif++/symmetry.hpp>
|
||||
|
||||
namespace mmcif
|
||||
namespace cif
|
||||
{
|
||||
|
||||
const Spacegroup kSpaceGroups[] =
|
||||
const space_group kSpaceGroups[] =
|
||||
{
|
||||
{ "" , "P 2 1 1" , " P 2y (y,z,x)" , 10005 },
|
||||
{ "" , "P 21 1 1" , " P 2yb (y,z,x)" , 10008 },
|
||||
@@ -632,9 +632,9 @@ const Spacegroup kSpaceGroups[] =
|
||||
|
||||
};
|
||||
|
||||
const size_t kNrOfSpaceGroups = sizeof(kSpaceGroups) / sizeof(Spacegroup);
|
||||
const size_t kNrOfSpaceGroups = sizeof(kSpaceGroups) / sizeof(space_group);
|
||||
|
||||
const SymopDataBlock kSymopNrTable[] = {
|
||||
const symop_datablock kSymopNrTable[] = {
|
||||
|
||||
// P 1
|
||||
{ 1, 1, { 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, } },
|
||||
@@ -8654,7 +8654,7 @@ const SymopDataBlock kSymopNrTable[] = {
|
||||
{ 10528, 192, { 0, 0, 1, 0,-1, 0,-1, 0, 0, 1, 2, 3, 4, 3, 4, } },
|
||||
};
|
||||
|
||||
const size_t kSymopNrTableSize = sizeof(kSymopNrTable) / sizeof(SymopDataBlock);
|
||||
const size_t kSymopNrTableSize = sizeof(kSymopNrTable) / sizeof(symop_datablock);
|
||||
|
||||
} // namespace mmcif
|
||||
|
||||
505
src/text.cpp
Normal file
505
src/text.cpp
Normal file
@@ -0,0 +1,505 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
|
||||
#include <cif++/text.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// This really makes a difference, having our own tolower routines
|
||||
|
||||
const uint8_t kCharToLowerMap[256] =
|
||||
{
|
||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
|
||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
|
||||
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
|
||||
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
|
||||
0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
|
||||
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
|
||||
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
|
||||
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
|
||||
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
|
||||
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
|
||||
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
|
||||
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
|
||||
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
|
||||
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
|
||||
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
|
||||
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
bool iequals(std::string_view a, std::string_view b)
|
||||
{
|
||||
bool result = a.length() == b.length();
|
||||
for (auto ai = a.begin(), bi = b.begin(); result and ai != a.end(); ++ai, ++bi)
|
||||
result = kCharToLowerMap[uint8_t(*ai)] == kCharToLowerMap[uint8_t(*bi)];
|
||||
// result = tolower(*ai) == tolower(*bi);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool iequals(const char *a, const char *b)
|
||||
{
|
||||
bool result = true;
|
||||
for (; result and *a and *b; ++a, ++b)
|
||||
result = tolower(*a) == tolower(*b);
|
||||
|
||||
return result and *a == *b;
|
||||
}
|
||||
|
||||
int icompare(std::string_view a, std::string_view b)
|
||||
{
|
||||
int d = 0;
|
||||
auto ai = a.begin(), bi = b.begin();
|
||||
|
||||
for (; d == 0 and ai != a.end() and bi != b.end(); ++ai, ++bi)
|
||||
d = tolower(*ai) - tolower(*bi);
|
||||
|
||||
if (d == 0)
|
||||
{
|
||||
if (ai != a.end())
|
||||
d = 1;
|
||||
else if (bi != b.end())
|
||||
d = -1;
|
||||
}
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
int icompare(const char *a, const char *b)
|
||||
{
|
||||
int d = 0;
|
||||
|
||||
for (; d == 0 and *a != 0 and *b != 0; ++a, ++b)
|
||||
d = tolower(*a) - tolower(*b);
|
||||
|
||||
if (d == 0)
|
||||
{
|
||||
if (*a != 0)
|
||||
d = 1;
|
||||
else if (*b != 0)
|
||||
d = -1;
|
||||
}
|
||||
|
||||
return d;
|
||||
}
|
||||
|
||||
void to_lower(std::string &s)
|
||||
{
|
||||
for (auto &c : s)
|
||||
c = tolower(c);
|
||||
}
|
||||
|
||||
std::string to_lower_copy(std::string_view s)
|
||||
{
|
||||
std::string result(s);
|
||||
for (auto &c : result)
|
||||
c = tolower(c);
|
||||
return result;
|
||||
}
|
||||
|
||||
void to_upper(std::string &s)
|
||||
{
|
||||
for (auto &c : s)
|
||||
c = toupper(c);
|
||||
}
|
||||
|
||||
void replace_all(std::string &s, std::string_view what, std::string_view with)
|
||||
{
|
||||
for (std::string::size_type p = s.find(what); p != std::string::npos; p = s.find(what, p))
|
||||
{
|
||||
s.replace(p, what.length(), with);
|
||||
p += with.length();
|
||||
}
|
||||
}
|
||||
|
||||
bool icontains(std::string_view s, std::string_view q)
|
||||
{
|
||||
return contains(to_lower_copy(s), to_lower_copy(q));
|
||||
}
|
||||
|
||||
void trim_right(std::string &s)
|
||||
{
|
||||
auto e = s.end();
|
||||
while (e != s.begin())
|
||||
{
|
||||
auto pe = std::prev(e);
|
||||
if (not std::isspace(*pe))
|
||||
break;
|
||||
e = pe;
|
||||
}
|
||||
|
||||
if (e != s.end())
|
||||
s.erase(e, s.end());
|
||||
}
|
||||
|
||||
std::string trim_right_copy(std::string_view s)
|
||||
{
|
||||
auto e = s.end();
|
||||
while (e != s.begin())
|
||||
{
|
||||
auto pe = std::prev(e);
|
||||
if (not std::isspace(*pe))
|
||||
break;
|
||||
e = pe;
|
||||
}
|
||||
|
||||
return {s.begin(), e};
|
||||
}
|
||||
|
||||
std::string trim_left_copy(std::string_view s)
|
||||
{
|
||||
auto b = s.begin();
|
||||
while (b != s.end())
|
||||
{
|
||||
if (not std::isspace(*b))
|
||||
break;
|
||||
|
||||
b = std::next(b);
|
||||
}
|
||||
|
||||
return {b, s.end()};
|
||||
}
|
||||
|
||||
void trim_left(std::string &s)
|
||||
{
|
||||
auto b = s.begin();
|
||||
while (b != s.end())
|
||||
{
|
||||
if (not std::isspace(*b))
|
||||
break;
|
||||
|
||||
b = std::next(b);
|
||||
}
|
||||
|
||||
s.erase(s.begin(), b);
|
||||
}
|
||||
|
||||
void trim(std::string &s)
|
||||
{
|
||||
trim_right(s);
|
||||
trim_left(s);
|
||||
}
|
||||
|
||||
std::string trim_copy(std::string_view s)
|
||||
{
|
||||
return trim_left_copy(trim_right_copy(s));
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::tuple<std::string, std::string> split_tag_name(std::string_view tag)
|
||||
{
|
||||
if (tag.empty())
|
||||
throw std::runtime_error("empty tag");
|
||||
if (tag[0] != '_')
|
||||
throw std::runtime_error("tag does not start with underscore");
|
||||
|
||||
auto s = tag.find('.');
|
||||
if (s == std::string::npos)
|
||||
throw std::runtime_error("tag does not contain dot");
|
||||
return std::tuple<std::string, std::string>{
|
||||
tag.substr(1, s - 1), tag.substr(s + 1)};
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::string cif_id_for_number(int number)
|
||||
{
|
||||
std::string result;
|
||||
|
||||
if (number >= 26 * 26 * 26)
|
||||
result = 'L' + std::to_string(number);
|
||||
else
|
||||
{
|
||||
if (number >= 26 * 26)
|
||||
{
|
||||
int v = number / (26 * 26);
|
||||
result += char('A' - 1 + v);
|
||||
number %= (26 * 26);
|
||||
}
|
||||
|
||||
if (number >= 26)
|
||||
{
|
||||
int v = number / 26;
|
||||
result += char('A' - 1 + v);
|
||||
number %= 26;
|
||||
}
|
||||
|
||||
result += char('A' + number);
|
||||
}
|
||||
|
||||
assert(not result.empty());
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Simplified line breaking code taken from a decent text editor.
|
||||
// In this case, simplified means it only supports ASCII.
|
||||
|
||||
enum LineBreakClass
|
||||
{
|
||||
kLBC_OpenPunctuation,
|
||||
kLBC_ClosePunctuation,
|
||||
kLBC_CloseParenthesis,
|
||||
kLBC_Quotation,
|
||||
kLBC_NonBreaking,
|
||||
kLBC_Nonstarter,
|
||||
kLBC_Exlamation,
|
||||
kLBC_SymbolAllowingBreakAfter,
|
||||
kLBC_InfixNumericSeparator,
|
||||
kLBC_PrefixNumeric,
|
||||
kLBC_PostfixNumeric,
|
||||
kLBC_Numeric,
|
||||
kLBC_Alphabetic,
|
||||
kLBC_Ideographic,
|
||||
kLBC_Inseperable,
|
||||
kLBC_Hyphen,
|
||||
kLBC_BreakAfter,
|
||||
kLBC_BreakBefor,
|
||||
kLBC_BreakOpportunityBeforeAndAfter,
|
||||
kLBC_ZeroWidthSpace,
|
||||
kLBC_CombiningMark,
|
||||
kLBC_WordJoiner,
|
||||
kLBC_HangulLVSyllable,
|
||||
kLBC_HangulLVTSyllable,
|
||||
kLBC_HangulLJamo,
|
||||
kLBC_HangulVJamo,
|
||||
kLBC_HangulTJamo,
|
||||
|
||||
kLBC_MandatoryBreak,
|
||||
kLBC_CarriageReturn,
|
||||
kLBC_LineFeed,
|
||||
kLBC_NextLine,
|
||||
kLBC_Surrogate,
|
||||
kLBC_Space,
|
||||
kLBC_ContigentBreakOpportunity,
|
||||
kLBC_Ambiguous,
|
||||
kLBC_ComplexContext,
|
||||
kLBC_Unknown
|
||||
};
|
||||
|
||||
const LineBreakClass kASCII_LBTable[128] =
|
||||
{
|
||||
kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark,
|
||||
kLBC_CombiningMark, kLBC_BreakAfter, kLBC_LineFeed, kLBC_MandatoryBreak, kLBC_MandatoryBreak, kLBC_CarriageReturn, kLBC_CombiningMark, kLBC_CombiningMark,
|
||||
kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark,
|
||||
kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark,
|
||||
kLBC_Space, kLBC_Exlamation, kLBC_Quotation, kLBC_Alphabetic, kLBC_PrefixNumeric, kLBC_PostfixNumeric, kLBC_Alphabetic, kLBC_Quotation,
|
||||
kLBC_OpenPunctuation, kLBC_CloseParenthesis, kLBC_Alphabetic, kLBC_PrefixNumeric,
|
||||
|
||||
// comma treated differently here, it is not a numeric separator in PDB
|
||||
kLBC_SymbolAllowingBreakAfter /* kLBC_InfixNumericSeparator */,
|
||||
|
||||
kLBC_Hyphen, kLBC_InfixNumericSeparator, kLBC_SymbolAllowingBreakAfter,
|
||||
kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric,
|
||||
kLBC_Numeric, kLBC_Numeric, kLBC_InfixNumericSeparator, kLBC_InfixNumericSeparator, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Exlamation,
|
||||
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
|
||||
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
|
||||
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
|
||||
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_OpenPunctuation, kLBC_PrefixNumeric, kLBC_CloseParenthesis, kLBC_Alphabetic, kLBC_Alphabetic,
|
||||
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
|
||||
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
|
||||
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
|
||||
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_OpenPunctuation, kLBC_BreakAfter, kLBC_ClosePunctuation, kLBC_Alphabetic, kLBC_CombiningMark};
|
||||
|
||||
std::string::const_iterator nextLineBreak(std::string::const_iterator text, std::string::const_iterator end)
|
||||
{
|
||||
if (text == end)
|
||||
return text;
|
||||
|
||||
enum breakAction
|
||||
{
|
||||
DBK = 0, // direct break (blank in table)
|
||||
IBK, // indirect break (% in table)
|
||||
PBK, // prohibited break (^ in table)
|
||||
CIB, // combining indirect break
|
||||
CPB // combining prohibited break
|
||||
};
|
||||
|
||||
const breakAction brkTable[27][27] = {
|
||||
// OP CL CP QU GL NS EX SY IS PR PO NU AL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT
|
||||
/* OP */ {PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, CPB, PBK, PBK, PBK, PBK, PBK, PBK},
|
||||
/* CL */ {DBK, PBK, PBK, IBK, IBK, PBK, PBK, PBK, PBK, IBK, IBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* CP */ {DBK, PBK, PBK, IBK, IBK, PBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* QU */ {PBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
|
||||
/* GL */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
|
||||
/* NS */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* EX */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* SY */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* IS */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* PR */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, IBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
|
||||
/* PO */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* NU */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* AL */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* ID */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* IN */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* HY */ {DBK, PBK, PBK, IBK, DBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* BA */ {DBK, PBK, PBK, IBK, DBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* BB */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
|
||||
/* B2 */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, PBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* ZW */ {DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* CM */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
|
||||
/* WJ */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
|
||||
/* H2 */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, IBK, IBK},
|
||||
/* H3 */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, IBK},
|
||||
/* JL */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, DBK},
|
||||
/* JV */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, IBK, IBK},
|
||||
/* JT */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, IBK},
|
||||
};
|
||||
|
||||
uint8_t ch = static_cast<uint8_t>(*text);
|
||||
|
||||
LineBreakClass cls;
|
||||
|
||||
if (ch == '\n')
|
||||
cls = kLBC_MandatoryBreak;
|
||||
else if (ch < 128)
|
||||
{
|
||||
cls = kASCII_LBTable[ch];
|
||||
if (cls > kLBC_MandatoryBreak and cls != kLBC_Space) // duh...
|
||||
cls = kLBC_Alphabetic;
|
||||
}
|
||||
else
|
||||
cls = kLBC_Unknown;
|
||||
|
||||
if (cls == kLBC_Space)
|
||||
cls = kLBC_WordJoiner;
|
||||
|
||||
LineBreakClass ncls = cls;
|
||||
|
||||
while (++text != end and cls != kLBC_MandatoryBreak)
|
||||
{
|
||||
ch = *text;
|
||||
|
||||
LineBreakClass lcls = ncls;
|
||||
|
||||
if (ch == '\n')
|
||||
{
|
||||
++text;
|
||||
break;
|
||||
}
|
||||
|
||||
ncls = kASCII_LBTable[ch];
|
||||
|
||||
if (ncls == kLBC_Space)
|
||||
continue;
|
||||
|
||||
breakAction brk = brkTable[cls][ncls];
|
||||
|
||||
if (brk == DBK or (brk == IBK and lcls == kLBC_Space))
|
||||
break;
|
||||
|
||||
cls = ncls;
|
||||
}
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
std::vector<std::string> wrapLine(const std::string &text, size_t width)
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
std::vector<size_t> offsets = {0};
|
||||
|
||||
auto b = text.begin();
|
||||
while (b != text.end())
|
||||
{
|
||||
auto e = nextLineBreak(b, text.end());
|
||||
|
||||
offsets.push_back(e - text.begin());
|
||||
|
||||
b = e;
|
||||
}
|
||||
|
||||
size_t count = offsets.size() - 1;
|
||||
|
||||
std::vector<size_t> minima(count + 1, 1000000);
|
||||
minima[0] = 0;
|
||||
std::vector<size_t> breaks(count + 1, 0);
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
size_t j = i + 1;
|
||||
while (j <= count)
|
||||
{
|
||||
size_t w = offsets[j] - offsets[i];
|
||||
|
||||
if (w > width)
|
||||
break;
|
||||
|
||||
while (w > 0 and isspace(text[offsets[i] + w - 1]))
|
||||
--w;
|
||||
|
||||
size_t cost = minima[i];
|
||||
if (j < count) // last line may be shorter
|
||||
cost += (width - w) * (width - w);
|
||||
|
||||
if (cost < minima[j])
|
||||
{
|
||||
minima[j] = cost;
|
||||
breaks[j] = i;
|
||||
}
|
||||
|
||||
++j;
|
||||
}
|
||||
}
|
||||
|
||||
size_t j = count;
|
||||
while (j > 0)
|
||||
{
|
||||
size_t i = breaks[j];
|
||||
result.push_back(text.substr(offsets[i], offsets[j] - offsets[i]));
|
||||
j = i;
|
||||
}
|
||||
|
||||
reverse(result.begin(), result.end());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<std::string> word_wrap(const std::string &text, size_t width)
|
||||
{
|
||||
std::vector<std::string> result;
|
||||
for (auto p : cif::split<std::string>(text, "\n"))
|
||||
{
|
||||
if (p.empty())
|
||||
{
|
||||
result.push_back("");
|
||||
continue;
|
||||
}
|
||||
|
||||
auto lines = wrapLine(p, width);
|
||||
result.insert(result.end(), lines.begin(), lines.end());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
944
src/utilities.cpp
Normal file
944
src/utilities.cpp
Normal file
@@ -0,0 +1,944 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <atomic>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <regex>
|
||||
#include <sstream>
|
||||
#include <thread>
|
||||
|
||||
#if not defined(_MSC_VER)
|
||||
#include <sys/ioctl.h>
|
||||
#include <termios.h>
|
||||
#endif
|
||||
|
||||
#include <cif++/utilities.hpp>
|
||||
|
||||
#include "revision.hpp"
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
int VERBOSE = 0;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::string get_version_nr()
|
||||
{
|
||||
std::ostringstream s;
|
||||
write_version_string(s, false);
|
||||
return s.str();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
#ifdef _MSC_VER
|
||||
}
|
||||
#include <Windows.h>
|
||||
#include <libloaderapi.h>
|
||||
#include <wincon.h>
|
||||
|
||||
#include <codecvt>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
uint32_t get_terminal_width()
|
||||
{
|
||||
CONSOLE_SCREEN_BUFFER_INFO csbi;
|
||||
::GetConsoleScreenBufferInfo(::GetStdHandle(STD_OUTPUT_HANDLE), &csbi);
|
||||
return csbi.srWindow.Right - csbi.srWindow.Left + 1;
|
||||
}
|
||||
|
||||
std::string GetExecutablePath()
|
||||
{
|
||||
WCHAR buffer[4096];
|
||||
|
||||
DWORD n = ::GetModuleFileNameW(nullptr, buffer, sizeof(buffer) / sizeof(WCHAR));
|
||||
if (n == 0)
|
||||
throw std::runtime_error("could not get exe path");
|
||||
|
||||
std::wstring ws(buffer);
|
||||
|
||||
// convert from utf16 to utf8
|
||||
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv1;
|
||||
std::string u8str = conv1.to_bytes(ws);
|
||||
|
||||
return u8str;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <limits.h>
|
||||
|
||||
uint32_t get_terminal_width()
|
||||
{
|
||||
uint32_t result = 80;
|
||||
|
||||
if (isatty(STDOUT_FILENO))
|
||||
{
|
||||
struct winsize w;
|
||||
ioctl(0, TIOCGWINSZ, &w);
|
||||
result = w.ws_col;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string get_executable_path()
|
||||
{
|
||||
using namespace std::literals;
|
||||
|
||||
char path[PATH_MAX] = "";
|
||||
if (readlink("/proc/self/exe", path, sizeof(path)) == -1)
|
||||
throw std::runtime_error("could not get exe path "s + strerror(errno));
|
||||
return {path};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
struct ProgressImpl
|
||||
{
|
||||
ProgressImpl(int64_t inMax, const std::string &inAction)
|
||||
: mMax(inMax)
|
||||
, mConsumed(0)
|
||||
, mAction(inAction)
|
||||
, mMessage(inAction)
|
||||
, mThread(std::bind(&ProgressImpl::Run, this))
|
||||
{
|
||||
}
|
||||
|
||||
void Run();
|
||||
void Stop()
|
||||
{
|
||||
mStop = true;
|
||||
if (mThread.joinable())
|
||||
mThread.join();
|
||||
}
|
||||
|
||||
void PrintProgress();
|
||||
void PrintDone();
|
||||
|
||||
int64_t mMax;
|
||||
std::atomic<int64_t> mConsumed;
|
||||
int64_t mLastConsumed = 0;
|
||||
int mSpinnerIndex = 0;
|
||||
std::string mAction, mMessage;
|
||||
std::mutex mMutex;
|
||||
std::thread mThread;
|
||||
std::chrono::time_point<std::chrono::system_clock>
|
||||
mStart = std::chrono::system_clock::now();
|
||||
bool mStop = false;
|
||||
};
|
||||
|
||||
void ProgressImpl::Run()
|
||||
{
|
||||
bool printedAny = false;
|
||||
|
||||
try
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(100));
|
||||
|
||||
std::unique_lock lock(mMutex);
|
||||
|
||||
if (mStop or mConsumed == mMax)
|
||||
break;
|
||||
|
||||
auto elapsed = std::chrono::system_clock::now() - mStart;
|
||||
|
||||
if (elapsed < std::chrono::seconds(5))
|
||||
continue;
|
||||
|
||||
PrintProgress();
|
||||
printedAny = true;
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
|
||||
if (printedAny)
|
||||
PrintDone();
|
||||
}
|
||||
|
||||
void ProgressImpl::PrintProgress()
|
||||
{
|
||||
// const char* kBlocks[] = {
|
||||
// " ", // 0
|
||||
// u8"\u258F", // 1
|
||||
// u8"\u258E", // 2
|
||||
// u8"\u258D", // 3
|
||||
// u8"\u258C", // 4
|
||||
// u8"\u258B", // 5
|
||||
// u8"\u258A", // 6
|
||||
// u8"\u2589", // 7
|
||||
// u8"\u2588", // 8
|
||||
// };
|
||||
|
||||
const char *kBlocks[] = {
|
||||
" ", // 0
|
||||
" ", // 1
|
||||
" ", // 2
|
||||
"-", // 3
|
||||
"-", // 4
|
||||
"-", // 5
|
||||
"=", // 6
|
||||
"=", // 7
|
||||
"=", // 8
|
||||
};
|
||||
|
||||
uint32_t width = get_terminal_width();
|
||||
|
||||
std::string msg;
|
||||
msg.reserve(width + 1);
|
||||
if (mMessage.length() <= 20)
|
||||
{
|
||||
msg = mMessage;
|
||||
if (msg.length() < 20)
|
||||
msg.append(20 - msg.length(), ' ');
|
||||
}
|
||||
else
|
||||
msg = mMessage.substr(0, 17) + "...";
|
||||
|
||||
msg += " |";
|
||||
|
||||
int64_t consumed = mConsumed;
|
||||
float progress = static_cast<float>(consumed) / mMax;
|
||||
int pi = static_cast<int>(std::ceil(progress * 33 * 8));
|
||||
// int tw = width - 28;
|
||||
// int twd = static_cast<int>(tw * progress + 0.5f);
|
||||
// msg.append(twd, '=');
|
||||
// msg.append(tw - twd, ' ');
|
||||
|
||||
for (int i = 0; i < 33; ++i)
|
||||
{
|
||||
if (pi <= 0)
|
||||
msg += kBlocks[0];
|
||||
else if (pi >= 8)
|
||||
msg += kBlocks[8];
|
||||
else
|
||||
msg += kBlocks[pi];
|
||||
pi -= 8;
|
||||
}
|
||||
|
||||
msg.append("| ");
|
||||
|
||||
const char kSpinner[] = {' ', '.', 'o', 'O', '0', 'O', 'o', '.'};
|
||||
const size_t kSpinnerCount = sizeof(kSpinner);
|
||||
|
||||
if (mLastConsumed < consumed)
|
||||
{
|
||||
mLastConsumed = consumed;
|
||||
mSpinnerIndex = (mSpinnerIndex + 1) % kSpinnerCount;
|
||||
}
|
||||
|
||||
const char spinner[2] = {kSpinner[mSpinnerIndex], 0};
|
||||
msg.append(spinner);
|
||||
|
||||
// int perc = static_cast<int>(100 * progress);
|
||||
// if (perc < 100)
|
||||
// msg += ' ';
|
||||
// if (perc < 10)
|
||||
// msg += ' ';
|
||||
// msg += to_string(perc);
|
||||
// msg += '%';
|
||||
|
||||
std::cout << '\r' << msg;
|
||||
std::cout.flush();
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const std::chrono::duration<double> &t)
|
||||
{
|
||||
uint64_t s = static_cast<uint64_t>(std::trunc(t.count()));
|
||||
if (s > 24 * 60 * 60)
|
||||
{
|
||||
auto days = s / (24 * 60 * 60);
|
||||
os << days << "d ";
|
||||
s %= 24 * 60 * 60;
|
||||
}
|
||||
|
||||
if (s > 60 * 60)
|
||||
{
|
||||
auto hours = s / (60 * 60);
|
||||
os << hours << "h ";
|
||||
s %= 60 * 60;
|
||||
}
|
||||
|
||||
if (s > 60)
|
||||
{
|
||||
auto minutes = s / 60;
|
||||
os << minutes << "m ";
|
||||
s %= 60;
|
||||
}
|
||||
|
||||
double ss = s + 1e-6 * (t.count() - s);
|
||||
|
||||
os << std::fixed << std::setprecision(1) << ss << 's';
|
||||
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void ProgressImpl::PrintDone()
|
||||
{
|
||||
std::chrono::duration<double> elapsed = std::chrono::system_clock::now() - mStart;
|
||||
|
||||
std::ostringstream msgstr;
|
||||
msgstr << mAction << " done in " << elapsed << " seconds";
|
||||
auto msg = msgstr.str();
|
||||
|
||||
uint32_t width = get_terminal_width();
|
||||
|
||||
if (msg.length() < width)
|
||||
msg += std::string(width - msg.length(), ' ');
|
||||
|
||||
std::cout << '\r' << msg << std::endl;
|
||||
}
|
||||
|
||||
Progress::Progress(int64_t inMax, const std::string &inAction)
|
||||
: m_impl(nullptr)
|
||||
{
|
||||
if (isatty(STDOUT_FILENO) and VERBOSE >= 0)
|
||||
m_impl = new ProgressImpl(inMax, inAction);
|
||||
}
|
||||
|
||||
Progress::~Progress()
|
||||
{
|
||||
if (m_impl != nullptr)
|
||||
m_impl->Stop();
|
||||
|
||||
delete m_impl;
|
||||
}
|
||||
|
||||
void Progress::consumed(int64_t inConsumed)
|
||||
{
|
||||
if (m_impl != nullptr and
|
||||
(m_impl->mConsumed += inConsumed) >= m_impl->mMax)
|
||||
{
|
||||
m_impl->Stop();
|
||||
}
|
||||
}
|
||||
|
||||
void Progress::progress(int64_t inProgress)
|
||||
{
|
||||
if (m_impl != nullptr and
|
||||
(m_impl->mConsumed = inProgress) >= m_impl->mMax)
|
||||
{
|
||||
m_impl->Stop();
|
||||
}
|
||||
}
|
||||
|
||||
void Progress::message(const std::string &inMessage)
|
||||
{
|
||||
if (m_impl != nullptr)
|
||||
{
|
||||
std::unique_lock lock(m_impl->mMutex);
|
||||
m_impl->mMessage = inMessage;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
//
|
||||
// Try to find a named resource. Can be either a local file with this name,
|
||||
// a file located in our cache directory or a resource linked in with mrc.
|
||||
//
|
||||
// We have a special, private version of mrsrc here. To be able to create
|
||||
// shared libraries and still be able to link when there's no mrc used.
|
||||
|
||||
namespace mrsrc
|
||||
{
|
||||
/// \brief Internal data structure as generated by mrc
|
||||
struct rsrc_imp
|
||||
{
|
||||
unsigned int m_next;
|
||||
unsigned int m_child;
|
||||
unsigned int m_name;
|
||||
unsigned int m_size;
|
||||
unsigned int m_data;
|
||||
};
|
||||
} // namespace mrsrc
|
||||
|
||||
#if _MSC_VER
|
||||
|
||||
extern "C" const mrsrc::rsrc_imp *gResourceIndexDefault[1] = {};
|
||||
extern "C" const char *gResourceDataDefault[1] = {};
|
||||
extern "C" const char *gResourceNameDefault[1] = {};
|
||||
|
||||
extern "C" const mrsrc::rsrc_imp gResourceIndex[];
|
||||
extern "C" const char gResourceData[];
|
||||
extern "C" const char gResourceName[];
|
||||
|
||||
#pragma comment(linker, "/alternatename:gResourceIndex=gResourceIndexDefault")
|
||||
#pragma comment(linker, "/alternatename:gResourceData=gResourceDataDefault")
|
||||
#pragma comment(linker, "/alternatename:gResourceName=gResourceNameDefault")
|
||||
|
||||
#else
|
||||
extern const __attribute__((weak)) mrsrc::rsrc_imp gResourceIndex[];
|
||||
extern const __attribute__((weak)) char gResourceData[];
|
||||
extern const __attribute__((weak)) char gResourceName[];
|
||||
|
||||
const mrsrc::rsrc_imp gResourceIndex[1] = {};
|
||||
const char gResourceData[1] = {};
|
||||
const char gResourceName[1] = {};
|
||||
|
||||
#endif
|
||||
|
||||
namespace mrsrc
|
||||
{
|
||||
class rsrc_data
|
||||
{
|
||||
public:
|
||||
static rsrc_data &instance()
|
||||
{
|
||||
static rsrc_data s_instance;
|
||||
return s_instance;
|
||||
}
|
||||
|
||||
const rsrc_imp *index() const { return m_index; }
|
||||
|
||||
const char *data(unsigned int offset) const
|
||||
{
|
||||
return m_data + offset;
|
||||
}
|
||||
|
||||
const char *name(unsigned int offset) const
|
||||
{
|
||||
return m_name + offset;
|
||||
}
|
||||
|
||||
private:
|
||||
rsrc_data()
|
||||
{
|
||||
// if (gResourceIndex and (gResourceIndex[0].m_child > 0 or gResourceIndex[0].m_size > 0) and gResourceIndex and gResourceName)
|
||||
if (gResourceIndex[0].m_child > 0 or gResourceIndex[0].m_size > 0)
|
||||
{
|
||||
m_index = gResourceIndex;
|
||||
m_data = gResourceData;
|
||||
m_name = gResourceName;
|
||||
}
|
||||
}
|
||||
|
||||
rsrc_imp m_dummy = {};
|
||||
const rsrc_imp *m_index = &m_dummy;
|
||||
const char *m_data = "";
|
||||
const char *m_name = "";
|
||||
};
|
||||
|
||||
/// \brief Class mrsrc::rsrc contains a pointer to the data in the
|
||||
/// resource, as well as offering an iterator interface to its
|
||||
/// children.
|
||||
|
||||
class rsrc
|
||||
{
|
||||
public:
|
||||
rsrc()
|
||||
: m_impl(rsrc_data::instance().index())
|
||||
{
|
||||
}
|
||||
|
||||
rsrc(const rsrc &other)
|
||||
: m_impl(other.m_impl)
|
||||
{
|
||||
}
|
||||
|
||||
rsrc &operator=(const rsrc &other)
|
||||
{
|
||||
m_impl = other.m_impl;
|
||||
return *this;
|
||||
}
|
||||
|
||||
rsrc(std::filesystem::path path);
|
||||
|
||||
std::string name() const { return rsrc_data::instance().name(m_impl->m_name); }
|
||||
|
||||
const char *data() const { return rsrc_data::instance().data(m_impl->m_data); }
|
||||
|
||||
unsigned long size() const { return m_impl->m_size; }
|
||||
|
||||
explicit operator bool() const { return m_impl != NULL and m_impl->m_size > 0; }
|
||||
|
||||
template <typename RSRC>
|
||||
class iterator_t
|
||||
{
|
||||
public:
|
||||
using iterator_category = std::input_iterator_tag;
|
||||
using value_type = RSRC;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = value_type *;
|
||||
using reference = value_type &;
|
||||
|
||||
iterator_t(const rsrc_imp *cur)
|
||||
: m_cur(cur)
|
||||
{
|
||||
}
|
||||
|
||||
iterator_t(const iterator_t &i)
|
||||
: m_cur(i.m_cur)
|
||||
{
|
||||
}
|
||||
|
||||
iterator_t &operator=(const iterator_t &i)
|
||||
{
|
||||
m_cur = i.m_cur;
|
||||
return *this;
|
||||
}
|
||||
|
||||
reference operator*() { return m_cur; }
|
||||
pointer operator->() { return &m_cur; }
|
||||
|
||||
iterator_t &operator++()
|
||||
{
|
||||
if (m_cur.m_impl->m_next)
|
||||
m_cur.m_impl = rsrc_data::instance().index() + m_cur.m_impl->m_next;
|
||||
else
|
||||
m_cur.m_impl = nullptr;
|
||||
return *this;
|
||||
}
|
||||
|
||||
iterator_t operator++(int)
|
||||
{
|
||||
auto tmp(*this);
|
||||
this->operator++();
|
||||
return tmp;
|
||||
}
|
||||
|
||||
bool operator==(const iterator_t &rhs) const { return m_cur.m_impl == rhs.m_cur.m_impl; }
|
||||
bool operator!=(const iterator_t &rhs) const { return m_cur.m_impl != rhs.m_cur.m_impl; }
|
||||
|
||||
private:
|
||||
value_type m_cur;
|
||||
};
|
||||
|
||||
using iterator = iterator_t<rsrc>;
|
||||
|
||||
iterator begin() const
|
||||
{
|
||||
const rsrc_imp *impl = nullptr;
|
||||
if (m_impl and m_impl->m_child)
|
||||
impl = rsrc_data::instance().index() + m_impl->m_child;
|
||||
return iterator(impl);
|
||||
}
|
||||
|
||||
iterator end() const
|
||||
{
|
||||
return iterator(nullptr);
|
||||
}
|
||||
|
||||
private:
|
||||
rsrc(const rsrc_imp *imp)
|
||||
: m_impl(imp)
|
||||
{
|
||||
}
|
||||
|
||||
const rsrc_imp *m_impl;
|
||||
};
|
||||
|
||||
inline rsrc::rsrc(std::filesystem::path p)
|
||||
{
|
||||
m_impl = rsrc_data::instance().index();
|
||||
|
||||
// using std::filesytem::path would have been natural here of course...
|
||||
|
||||
auto pb = p.begin();
|
||||
auto pe = p.end();
|
||||
|
||||
while (m_impl != nullptr and pb != pe)
|
||||
{
|
||||
auto name = *pb++;
|
||||
|
||||
const rsrc_imp *impl = nullptr;
|
||||
for (rsrc child : *this)
|
||||
{
|
||||
if (child.name() == name)
|
||||
{
|
||||
impl = child.m_impl;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
m_impl = impl;
|
||||
}
|
||||
|
||||
if (pb != pe) // not found
|
||||
m_impl = nullptr;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
template <typename CharT, typename Traits>
|
||||
class basic_streambuf : public std::basic_streambuf<CharT, Traits>
|
||||
{
|
||||
public:
|
||||
typedef CharT char_type;
|
||||
typedef Traits traits_type;
|
||||
typedef typename traits_type::int_type int_type;
|
||||
typedef typename traits_type::pos_type pos_type;
|
||||
typedef typename traits_type::off_type off_type;
|
||||
|
||||
/// \brief constructor taking a \a path to the resource in memory
|
||||
basic_streambuf(const std::string &path)
|
||||
: m_rsrc(path)
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
/// \brief constructor taking a \a rsrc
|
||||
basic_streambuf(const rsrc &rsrc)
|
||||
: m_rsrc(rsrc)
|
||||
{
|
||||
init();
|
||||
}
|
||||
|
||||
basic_streambuf(const basic_streambuf &) = delete;
|
||||
|
||||
basic_streambuf(basic_streambuf &&rhs)
|
||||
: basic_streambuf(rhs.m_rsrc)
|
||||
{
|
||||
}
|
||||
|
||||
basic_streambuf &operator=(const basic_streambuf &) = delete;
|
||||
|
||||
basic_streambuf &operator=(basic_streambuf &&rhs)
|
||||
{
|
||||
swap(rhs);
|
||||
return *this;
|
||||
}
|
||||
|
||||
void swap(basic_streambuf &rhs)
|
||||
{
|
||||
std::swap(m_begin, rhs.m_begin);
|
||||
std::swap(m_end, rhs.m_end);
|
||||
std::swap(m_current, rhs.m_current);
|
||||
}
|
||||
|
||||
private:
|
||||
void init()
|
||||
{
|
||||
m_begin = reinterpret_cast<const char_type *>(m_rsrc.data());
|
||||
m_end = reinterpret_cast<const char_type *>(m_rsrc.data() + m_rsrc.size());
|
||||
m_current = m_begin;
|
||||
}
|
||||
|
||||
int_type underflow()
|
||||
{
|
||||
if (m_current == m_end)
|
||||
return traits_type::eof();
|
||||
|
||||
return traits_type::to_int_type(*m_current);
|
||||
}
|
||||
|
||||
int_type uflow()
|
||||
{
|
||||
if (m_current == m_end)
|
||||
return traits_type::eof();
|
||||
|
||||
return traits_type::to_int_type(*m_current++);
|
||||
}
|
||||
|
||||
int_type pbackfail(int_type ch)
|
||||
{
|
||||
if (m_current == m_begin or (ch != traits_type::eof() and ch != m_current[-1]))
|
||||
return traits_type::eof();
|
||||
|
||||
return traits_type::to_int_type(*--m_current);
|
||||
}
|
||||
|
||||
std::streamsize showmanyc()
|
||||
{
|
||||
assert(std::less_equal<const char *>()(m_current, m_end));
|
||||
return m_end - m_current;
|
||||
}
|
||||
|
||||
pos_type seekoff(off_type off, std::ios_base::seekdir dir, std::ios_base::openmode which)
|
||||
{
|
||||
switch (dir)
|
||||
{
|
||||
case std::ios_base::beg:
|
||||
m_current = m_begin + off;
|
||||
break;
|
||||
|
||||
case std::ios_base::end:
|
||||
m_current = m_end + off;
|
||||
break;
|
||||
|
||||
case std::ios_base::cur:
|
||||
m_current += off;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
if (m_current < m_begin)
|
||||
m_current = m_begin;
|
||||
|
||||
if (m_current > m_end)
|
||||
m_current = m_end;
|
||||
|
||||
return m_current - m_begin;
|
||||
}
|
||||
|
||||
pos_type seekpos(pos_type pos, std::ios_base::openmode which)
|
||||
{
|
||||
m_current = m_begin + pos;
|
||||
|
||||
if (m_current < m_begin)
|
||||
m_current = m_begin;
|
||||
|
||||
if (m_current > m_end)
|
||||
m_current = m_end;
|
||||
|
||||
return m_current - m_begin;
|
||||
}
|
||||
|
||||
private:
|
||||
rsrc m_rsrc;
|
||||
const char_type *m_begin;
|
||||
const char_type *m_end;
|
||||
const char_type *m_current;
|
||||
};
|
||||
|
||||
using streambuf = basic_streambuf<char, std::char_traits<char>>;
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// class mrsrc::istream
|
||||
|
||||
template <typename CharT, typename Traits>
|
||||
class basic_istream : public std::basic_istream<CharT, Traits>
|
||||
{
|
||||
public:
|
||||
typedef CharT char_type;
|
||||
typedef Traits traits_type;
|
||||
typedef typename traits_type::int_type int_type;
|
||||
typedef typename traits_type::pos_type pos_type;
|
||||
typedef typename traits_type::off_type off_type;
|
||||
|
||||
private:
|
||||
using __streambuf_type = basic_streambuf<CharT, Traits>;
|
||||
using __istream_type = std::basic_istream<CharT, Traits>;
|
||||
|
||||
__streambuf_type m_buffer;
|
||||
|
||||
public:
|
||||
basic_istream(const std::string &path)
|
||||
: __istream_type(&m_buffer)
|
||||
, m_buffer(path)
|
||||
{
|
||||
this->init(&m_buffer);
|
||||
}
|
||||
|
||||
basic_istream(rsrc &resource)
|
||||
: __istream_type(&m_buffer)
|
||||
, m_buffer(resource)
|
||||
{
|
||||
this->init(&m_buffer);
|
||||
}
|
||||
|
||||
basic_istream(const basic_istream &) = delete;
|
||||
|
||||
basic_istream(basic_istream &&rhs)
|
||||
: __istream_type(std::move(rhs))
|
||||
, m_buffer(std::move(rhs.m_buffer))
|
||||
{
|
||||
__istream_type::set_rdbuf(&m_buffer);
|
||||
}
|
||||
|
||||
basic_istream &operator=(const basic_istream &) = delete;
|
||||
|
||||
basic_istream &operator=(basic_istream &&rhs)
|
||||
{
|
||||
__istream_type::operator=(std::move(rhs));
|
||||
m_buffer = std::move(rhs.m_buffer);
|
||||
return *this;
|
||||
}
|
||||
|
||||
void swap(basic_istream &rhs)
|
||||
{
|
||||
__istream_type::swap(rhs);
|
||||
m_buffer.swap(rhs.m_buffer);
|
||||
}
|
||||
|
||||
__streambuf_type *rdbuf() const
|
||||
{
|
||||
return const_cast<__streambuf_type *>(&m_buffer);
|
||||
}
|
||||
};
|
||||
|
||||
using istream = basic_istream<char, std::char_traits<char>>;
|
||||
} // namespace mrsrc
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class ResourcePool
|
||||
{
|
||||
public:
|
||||
static ResourcePool &instance()
|
||||
{
|
||||
static std::unique_ptr<ResourcePool> s_instance(new ResourcePool);
|
||||
return *s_instance;
|
||||
}
|
||||
|
||||
void pushDir(fs::path dir)
|
||||
{
|
||||
std::error_code ec;
|
||||
|
||||
if (fs::exists(dir, ec) and not ec)
|
||||
mDirs.push_front(dir);
|
||||
}
|
||||
|
||||
void pushDir(const char *path)
|
||||
{
|
||||
if (path != nullptr)
|
||||
pushDir(fs::path(path));
|
||||
}
|
||||
|
||||
void pushAlias(const std::string &name, std::filesystem::path dataFile)
|
||||
{
|
||||
std::error_code ec;
|
||||
if (not fs::exists(dataFile, ec) or ec)
|
||||
throw std::runtime_error("Attempt to add a file resource for " + name + " that cannot be used (" + dataFile.string() + ") :" + ec.message());
|
||||
|
||||
mLocalResources[name] = dataFile;
|
||||
}
|
||||
|
||||
std::unique_ptr<std::istream> load(fs::path name);
|
||||
|
||||
private:
|
||||
ResourcePool();
|
||||
|
||||
std::unique_ptr<std::ifstream> open(fs::path &p)
|
||||
{
|
||||
std::unique_ptr<std::ifstream> result;
|
||||
|
||||
try
|
||||
{
|
||||
if (fs::exists(p))
|
||||
{
|
||||
std::unique_ptr<std::ifstream> file(new std::ifstream(p, std::ios::binary));
|
||||
if (file->is_open())
|
||||
result.reset(file.release());
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::map<std::string, std::filesystem::path> mLocalResources;
|
||||
std::deque<fs::path> mDirs;
|
||||
};
|
||||
|
||||
ResourcePool::ResourcePool()
|
||||
{
|
||||
#if defined(DATA_DIR)
|
||||
pushDir(DATA_DIR);
|
||||
#endif
|
||||
|
||||
pushDir(getenv("LIBCIFPP_DATA_DIR"));
|
||||
|
||||
auto ccp4 = getenv("CCP4");
|
||||
if (ccp4 != nullptr)
|
||||
pushDir(fs::path(ccp4) / "share" / "libcifpp");
|
||||
|
||||
#if defined(CACHE_DIR)
|
||||
pushDir(CACHE_DIR);
|
||||
#endif
|
||||
}
|
||||
|
||||
std::unique_ptr<std::istream> ResourcePool::load(fs::path name)
|
||||
{
|
||||
std::unique_ptr<std::istream> result;
|
||||
std::error_code ec;
|
||||
|
||||
fs::path p = name;
|
||||
|
||||
if (mLocalResources.count(name.string()))
|
||||
result = open(mLocalResources[name.string()]);
|
||||
|
||||
for (auto di = mDirs.begin(); not result and di != mDirs.end(); ++di)
|
||||
{
|
||||
auto p2 = *di / p;
|
||||
if (fs::exists(p2, ec) and not ec)
|
||||
result = open(p2);
|
||||
}
|
||||
|
||||
// if (not result and gResourceData)
|
||||
if (not result and (gResourceIndex[0].m_child > 0 or gResourceIndex[0].m_size > 0))
|
||||
{
|
||||
mrsrc::rsrc rsrc(name);
|
||||
if (rsrc)
|
||||
result.reset(new mrsrc::istream(rsrc));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void add_data_directory(std::filesystem::path dataDir)
|
||||
{
|
||||
ResourcePool::instance().pushDir(dataDir);
|
||||
}
|
||||
|
||||
void add_file_resource(const std::string &name, std::filesystem::path dataFile)
|
||||
{
|
||||
ResourcePool::instance().pushAlias(name, dataFile);
|
||||
}
|
||||
|
||||
std::unique_ptr<std::istream> load_resource(std::filesystem::path name)
|
||||
{
|
||||
return ResourcePool::instance().load(name);
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
480
src/validate.cpp
Normal file
480
src/validate.cpp
Normal file
@@ -0,0 +1,480 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <cassert>
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
// The validator depends on regular expressions. Unfortunately,
|
||||
// the implementation of std::regex in g++ is buggy and crashes
|
||||
// on reading the pdbx dictionary. Therefore, in case g++ is used
|
||||
// the code will use boost::regex instead.
|
||||
|
||||
#if USE_BOOST_REGEX
|
||||
#include <boost/regex.hpp>
|
||||
using boost::regex;
|
||||
#else
|
||||
#include <regex>
|
||||
using std::regex;
|
||||
#endif
|
||||
|
||||
#include <gxrio.hpp>
|
||||
|
||||
#include <cif++/dictionary_parser.hpp>
|
||||
#include <cif++/validate.hpp>
|
||||
|
||||
#include <cif++/utilities.hpp>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
struct regex_impl : public regex
|
||||
{
|
||||
regex_impl(std::string_view rx)
|
||||
: regex(rx.begin(), rx.end(), regex::extended | regex::optimize)
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
validation_error::validation_error(const std::string &msg)
|
||||
: m_msg(msg)
|
||||
{
|
||||
}
|
||||
|
||||
validation_error::validation_error(const std::string &cat, const std::string &item, const std::string &msg)
|
||||
: m_msg("When validating _" + cat + '.' + item + ": " + msg)
|
||||
{
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
DDL_PrimitiveType map_to_primitive_type(std::string_view s)
|
||||
{
|
||||
DDL_PrimitiveType result;
|
||||
if (iequals(s, "char"))
|
||||
result = DDL_PrimitiveType::Char;
|
||||
else if (iequals(s, "uchar"))
|
||||
result = DDL_PrimitiveType::UChar;
|
||||
else if (iequals(s, "numb"))
|
||||
result = DDL_PrimitiveType::Numb;
|
||||
else
|
||||
throw validation_error("Not a known primitive type");
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
type_validator::type_validator(std::string_view name, DDL_PrimitiveType type, std::string_view rx)
|
||||
: m_name(name)
|
||||
, m_primitive_type(type)
|
||||
, m_rx(new regex_impl(rx.empty() ? ".+" : rx)) /// Empty regular expressions are not allowed, in libcpp's std::regex (POSIX?)
|
||||
{
|
||||
}
|
||||
|
||||
type_validator::~type_validator()
|
||||
{
|
||||
delete m_rx;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct my_from_chars
|
||||
{
|
||||
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
|
||||
{
|
||||
return cif::from_chars(a, b, d);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct std_from_chars
|
||||
{
|
||||
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
|
||||
{
|
||||
return std::from_chars(a, b, d);
|
||||
}
|
||||
};
|
||||
|
||||
int type_validator::compare(std::string_view a, std::string_view b) const
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
if (a.empty())
|
||||
result = b.empty() ? 0 : -1;
|
||||
else if (b.empty())
|
||||
result = a.empty() ? 0 : +1;
|
||||
else
|
||||
{
|
||||
switch (m_primitive_type)
|
||||
{
|
||||
case DDL_PrimitiveType::Numb:
|
||||
{
|
||||
double da, db;
|
||||
|
||||
using namespace cif;
|
||||
using namespace std;
|
||||
|
||||
std::from_chars_result ra, rb;
|
||||
|
||||
ra = selected_charconv<double>::from_chars(a.begin(), a.end(), da);
|
||||
rb = selected_charconv<double>::from_chars(b.begin(), b.end(), db);
|
||||
|
||||
if (ra.ec == std::errc() and rb.ec == std::errc())
|
||||
{
|
||||
auto d = da - db;
|
||||
if (std::abs(d) > std::numeric_limits<double>::epsilon())
|
||||
{
|
||||
if (d > 0)
|
||||
result = 1;
|
||||
else if (d < 0)
|
||||
result = -1;
|
||||
}
|
||||
}
|
||||
else if (ra.ec == std::errc())
|
||||
result = 1;
|
||||
else
|
||||
result = -1;
|
||||
break;
|
||||
}
|
||||
|
||||
case DDL_PrimitiveType::UChar:
|
||||
case DDL_PrimitiveType::Char:
|
||||
{
|
||||
// CIF is guaranteed to have ascii only, therefore this primitive code will do
|
||||
// also, we're collapsing spaces
|
||||
|
||||
auto ai = a.begin(), bi = b.begin();
|
||||
for (;;)
|
||||
{
|
||||
if (ai == a.end())
|
||||
{
|
||||
if (bi != b.end())
|
||||
result = -1;
|
||||
break;
|
||||
}
|
||||
else if (bi == b.end())
|
||||
{
|
||||
result = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
char ca = *ai;
|
||||
char cb = *bi;
|
||||
|
||||
if (m_primitive_type == DDL_PrimitiveType::UChar)
|
||||
{
|
||||
ca = tolower(ca);
|
||||
cb = tolower(cb);
|
||||
}
|
||||
|
||||
result = ca - cb;
|
||||
|
||||
if (result != 0)
|
||||
break;
|
||||
|
||||
if (ca == ' ')
|
||||
{
|
||||
while (ai[1] == ' ')
|
||||
++ai;
|
||||
while (bi[1] == ' ')
|
||||
++bi;
|
||||
}
|
||||
|
||||
++ai;
|
||||
++bi;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
// void ValidateItem::addLinked(ValidateItem* parent, const std::string& parentItem, const std::string& childItem)
|
||||
//{
|
||||
//// if (mParent != nullptr and VERBOSE)
|
||||
//// cerr << "replacing parent in " << mCategory->m_name << " from " << mParent->mCategory->m_name << " to " << parent->mCategory->m_name << endl;
|
||||
//// mParent = parent;
|
||||
//
|
||||
// if (m_type == nullptr and parent != nullptr)
|
||||
// m_type = parent->m_type;
|
||||
//
|
||||
// if (parent != nullptr)
|
||||
// {
|
||||
// mLinked.push_back({parent, parentItem, childItem});
|
||||
//
|
||||
// parent->mChildren.insert(this);
|
||||
////
|
||||
//// if (mCategory->mKeys == std::vector<std::string>{mTag})
|
||||
//// parent->mForeignKeys.insert(this);
|
||||
// }
|
||||
//}
|
||||
|
||||
void item_validator::operator()(std::string_view value) const
|
||||
{
|
||||
if (not value.empty() and value != "?" and value != ".")
|
||||
{
|
||||
if (m_type != nullptr and not regex_match(value.begin(), value.end(), *m_type->m_rx))
|
||||
throw validation_error(m_category->m_name, m_tag, "Value '" + std::string{ value } + "' does not match type expression for type " + m_type->m_name);
|
||||
|
||||
if (not m_enums.empty())
|
||||
{
|
||||
if (m_enums.count(std::string{ value }) == 0)
|
||||
throw validation_error(m_category->m_name, m_tag, "Value '" + std::string{ value } + "' is not in the list of allowed values");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void category_validator::addItemValidator(item_validator &&v)
|
||||
{
|
||||
if (v.m_mandatory)
|
||||
m_mandatory_fields.insert(v.m_tag);
|
||||
|
||||
v.m_category = this;
|
||||
|
||||
auto r = m_item_validators.insert(std::move(v));
|
||||
if (not r.second and VERBOSE >= 4)
|
||||
std::cout << "Could not add validator for item " << v.m_tag << " to category " << m_name << std::endl;
|
||||
}
|
||||
|
||||
const item_validator *category_validator::get_validator_for_item(std::string_view tag) const
|
||||
{
|
||||
const item_validator *result = nullptr;
|
||||
auto i = m_item_validators.find(item_validator{ std::string(tag) });
|
||||
if (i != m_item_validators.end())
|
||||
result = &*i;
|
||||
else if (VERBOSE > 4)
|
||||
std::cout << "No validator for tag " << tag << std::endl;
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void validator::add_type_validator(type_validator &&v)
|
||||
{
|
||||
auto r = m_type_validators.insert(std::move(v));
|
||||
if (not r.second and VERBOSE > 4)
|
||||
std::cout << "Could not add validator for type " << v.m_name << std::endl;
|
||||
}
|
||||
|
||||
const type_validator *validator::get_validator_for_type(std::string_view typeCode) const
|
||||
{
|
||||
const type_validator *result = nullptr;
|
||||
|
||||
auto i = m_type_validators.find(type_validator{ std::string(typeCode), DDL_PrimitiveType::Char, {} });
|
||||
if (i != m_type_validators.end())
|
||||
result = &*i;
|
||||
else if (VERBOSE > 4)
|
||||
std::cout << "No validator for type " << typeCode << std::endl;
|
||||
return result;
|
||||
}
|
||||
|
||||
void validator::add_category_validator(category_validator &&v)
|
||||
{
|
||||
auto r = m_category_validators.insert(std::move(v));
|
||||
if (not r.second and VERBOSE > 4)
|
||||
std::cout << "Could not add validator for category " << v.m_name << std::endl;
|
||||
}
|
||||
|
||||
const category_validator *validator::get_validator_for_category(std::string_view category) const
|
||||
{
|
||||
const category_validator *result = nullptr;
|
||||
auto i = m_category_validators.find(category_validator{ std::string(category) });
|
||||
if (i != m_category_validators.end())
|
||||
result = &*i;
|
||||
else if (VERBOSE > 4)
|
||||
std::cout << "No validator for category " << category << std::endl;
|
||||
return result;
|
||||
}
|
||||
|
||||
item_validator *validator::get_validator_for_item(std::string_view tag) const
|
||||
{
|
||||
item_validator *result = nullptr;
|
||||
|
||||
std::string cat, item;
|
||||
std::tie(cat, item) = split_tag_name(tag);
|
||||
|
||||
auto *cv = get_validator_for_category(cat);
|
||||
if (cv != nullptr)
|
||||
result = const_cast<item_validator *>(cv->get_validator_for_item(item));
|
||||
|
||||
if (result == nullptr and VERBOSE > 4)
|
||||
std::cout << "No validator for item " << tag << std::endl;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void validator::add_link_validator(link_validator &&v)
|
||||
{
|
||||
assert(v.m_parent_keys.size() == v.m_child_keys.size());
|
||||
if (v.m_parent_keys.size() != v.m_child_keys.size())
|
||||
throw std::runtime_error("unequal number of keys for parent and child in link");
|
||||
|
||||
auto pcv = get_validator_for_category(v.m_parent_category);
|
||||
auto ccv = get_validator_for_category(v.m_child_category);
|
||||
|
||||
if (pcv == nullptr)
|
||||
throw std::runtime_error("unknown parent category " + v.m_parent_category);
|
||||
|
||||
if (ccv == nullptr)
|
||||
throw std::runtime_error("unknown child category " + v.m_child_category);
|
||||
|
||||
for (size_t i = 0; i < v.m_parent_keys.size(); ++i)
|
||||
{
|
||||
auto piv = pcv->get_validator_for_item(v.m_parent_keys[i]);
|
||||
|
||||
if (piv == nullptr)
|
||||
throw std::runtime_error("unknown parent tag _" + v.m_parent_category + '.' + v.m_parent_keys[i]);
|
||||
|
||||
auto civ = ccv->get_validator_for_item(v.m_child_keys[i]);
|
||||
if (civ == nullptr)
|
||||
throw std::runtime_error("unknown child tag _" + v.m_child_category + '.' + v.m_child_keys[i]);
|
||||
|
||||
if (civ->m_type == nullptr and piv->m_type != nullptr)
|
||||
const_cast<item_validator *>(civ)->m_type = piv->m_type;
|
||||
}
|
||||
|
||||
m_link_validators.emplace_back(std::move(v));
|
||||
}
|
||||
|
||||
std::vector<const link_validator *> validator::get_links_for_parent(std::string_view category) const
|
||||
{
|
||||
std::vector<const link_validator *> result;
|
||||
|
||||
for (auto &l : m_link_validators)
|
||||
{
|
||||
if (l.m_parent_category == category)
|
||||
result.push_back(&l);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
std::vector<const link_validator *> validator::get_links_for_child(std::string_view category) const
|
||||
{
|
||||
std::vector<const link_validator *> result;
|
||||
|
||||
for (auto &l : m_link_validators)
|
||||
{
|
||||
if (l.m_child_category == category)
|
||||
result.push_back(&l);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void validator::report_error(const std::string &msg, bool fatal) const
|
||||
{
|
||||
if (m_strict or fatal)
|
||||
throw validation_error(msg);
|
||||
else if (VERBOSE > 0)
|
||||
std::cerr << msg << std::endl;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
const validator &validator_factory::operator[](std::string_view dictionary_name)
|
||||
{
|
||||
std::lock_guard lock(m_mutex);
|
||||
|
||||
for (auto &validator : m_validators)
|
||||
{
|
||||
if (iequals(validator.name(), dictionary_name))
|
||||
return validator;
|
||||
}
|
||||
|
||||
// not found, add it
|
||||
|
||||
// too bad clang version 10 did not have a constructor for std::filesystem::path that accepts a std::string_view
|
||||
std::filesystem::path dictionary(dictionary_name.data(), dictionary_name.data() + dictionary_name.length());
|
||||
|
||||
auto data = load_resource(dictionary_name);
|
||||
|
||||
if (not data and dictionary.extension().string() != ".dic")
|
||||
data = load_resource(dictionary.parent_path() / (dictionary.filename().string() + ".dic"));
|
||||
|
||||
if (data)
|
||||
construct_validator(dictionary_name, *data);
|
||||
else
|
||||
{
|
||||
std::error_code ec;
|
||||
|
||||
// might be a compressed dictionary on disk
|
||||
std::filesystem::path p = dictionary;
|
||||
if (p.extension() == ".dic")
|
||||
p = p.parent_path() / (p.filename().string() + ".gz");
|
||||
else
|
||||
p = p.parent_path() / (p.filename().string() + ".dic.gz");
|
||||
|
||||
#if defined(CACHE_DIR) or defined(DATA_DIR)
|
||||
if (not std::filesystem::exists(p, ec) or ec)
|
||||
{
|
||||
for (const char *dir : {
|
||||
#if defined(CACHE_DIR)
|
||||
CACHE_DIR,
|
||||
#endif
|
||||
#if defined(DATA_DIR)
|
||||
DATA_DIR
|
||||
#endif
|
||||
})
|
||||
{
|
||||
auto p2 = std::filesystem::path(dir) / p;
|
||||
if (std::filesystem::exists(p2, ec) and not ec)
|
||||
{
|
||||
swap(p, p2);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
if (std::filesystem::exists(p, ec) and not ec)
|
||||
{
|
||||
gxrio::ifstream in(p);
|
||||
|
||||
if (not in.is_open())
|
||||
throw std::runtime_error("Could not open dictionary (" + p.string() + ")");
|
||||
|
||||
construct_validator(dictionary_name, in);
|
||||
}
|
||||
else
|
||||
throw std::runtime_error("Dictionary not found or defined (" + dictionary.string() + ")");
|
||||
}
|
||||
|
||||
return m_validators.back();
|
||||
}
|
||||
|
||||
void validator_factory::construct_validator(std::string_view name, std::istream &is)
|
||||
{
|
||||
m_validators.emplace_back(parse_dictionary(name, is));
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
BIN
test/1juh.cif.gz
Normal file
BIN
test/1juh.cif.gz
Normal file
Binary file not shown.
253
test/HEM.cif
Normal file
253
test/HEM.cif
Normal file
@@ -0,0 +1,253 @@
|
||||
data_HEM
|
||||
#
|
||||
|
||||
_chem_comp.id HEM
|
||||
_chem_comp.name "PROTOPORPHYRIN IX CONTAINING FE"
|
||||
_chem_comp.type NON-POLYMER
|
||||
_chem_comp.pdbx_type HETAIN
|
||||
_chem_comp.formula "C34 H32 Fe N4 O4"
|
||||
_chem_comp.mon_nstd_parent_comp_id ?
|
||||
_chem_comp.pdbx_synonyms HEME
|
||||
_chem_comp.pdbx_formal_charge 0
|
||||
_chem_comp.pdbx_initial_date 1999-07-08
|
||||
_chem_comp.pdbx_modified_date 2020-06-17
|
||||
_chem_comp.pdbx_ambiguous_flag Y
|
||||
_chem_comp.pdbx_release_status REL
|
||||
_chem_comp.pdbx_replaced_by ?
|
||||
_chem_comp.pdbx_replaces MHM
|
||||
_chem_comp.formula_weight 616.487
|
||||
_chem_comp.one_letter_code ?
|
||||
_chem_comp.three_letter_code HEM
|
||||
_chem_comp.pdbx_model_coordinates_details ?
|
||||
_chem_comp.pdbx_model_coordinates_missing_flag N
|
||||
_chem_comp.pdbx_ideal_coordinates_details Corina
|
||||
_chem_comp.pdbx_ideal_coordinates_missing_flag N
|
||||
_chem_comp.pdbx_model_coordinates_db_code 3IA3
|
||||
_chem_comp.pdbx_subcomponent_list ?
|
||||
_chem_comp.pdbx_processing_site RCSB
|
||||
# #
|
||||
loop_
|
||||
_chem_comp_atom.comp_id
|
||||
_chem_comp_atom.atom_id
|
||||
_chem_comp_atom.alt_atom_id
|
||||
_chem_comp_atom.type_symbol
|
||||
_chem_comp_atom.charge
|
||||
_chem_comp_atom.pdbx_align
|
||||
_chem_comp_atom.pdbx_aromatic_flag
|
||||
_chem_comp_atom.pdbx_leaving_atom_flag
|
||||
_chem_comp_atom.pdbx_stereo_config
|
||||
_chem_comp_atom.model_Cartn_x
|
||||
_chem_comp_atom.model_Cartn_y
|
||||
_chem_comp_atom.model_Cartn_z
|
||||
_chem_comp_atom.pdbx_model_Cartn_x_ideal
|
||||
_chem_comp_atom.pdbx_model_Cartn_y_ideal
|
||||
_chem_comp_atom.pdbx_model_Cartn_z_ideal
|
||||
_chem_comp_atom.pdbx_component_atom_id
|
||||
_chem_comp_atom.pdbx_component_comp_id
|
||||
_chem_comp_atom.pdbx_ordinal
|
||||
HEM CHA CHA C 0 1 N N N 2.748 -19.531 39.896 -2.161 -0.125 0.490 CHA HEM 1
|
||||
HEM CHB CHB C 0 1 N N N 3.258 -17.744 35.477 1.458 -3.419 0.306 CHB HEM 2
|
||||
HEM CHC CHC C 0 1 N N N 1.703 -21.900 33.637 4.701 0.169 -0.069 CHC HEM 3
|
||||
HEM CHD CHD C 0 1 N N N 1.149 -23.677 38.059 1.075 3.460 0.018 CHD HEM 4
|
||||
HEM C1A C1A C 0 1 Y N N 3.031 -18.673 38.872 -1.436 -1.305 0.380 C1A HEM 5
|
||||
HEM C2A C2A C 0 1 Y N N 3.578 -17.325 39.013 -2.015 -2.587 0.320 C2A HEM 6
|
||||
HEM C3A C3A C 0 1 Y N N 3.705 -16.820 37.785 -1.009 -3.500 0.270 C3A HEM 7
|
||||
HEM C4A C4A C 0 1 Y N N 3.256 -17.863 36.862 0.216 -2.803 0.298 C4A HEM 8
|
||||
HEM CMA CMA C 0 1 N N N 4.227 -15.469 37.393 -1.175 -4.996 0.197 CMA HEM 9
|
||||
HEM CAA CAA C 0 1 N N N 3.945 -16.670 40.296 -3.490 -2.893 0.314 CAA HEM 10
|
||||
HEM CBA CBA C 0 1 N N N 5.391 -17.138 40.581 -3.998 -2.926 -1.129 CBA HEM 11
|
||||
HEM CGA CGA C 0 1 N N N 6.095 -16.663 41.825 -5.473 -3.232 -1.136 CGA HEM 12
|
||||
HEM O1A O1A O 0 1 N N N 7.098 -15.928 41.683 -6.059 -3.405 -0.094 O1A HEM 13
|
||||
HEM O2A O2A O 0 1 N N N 5.657 -17.040 42.940 -6.137 -3.311 -2.300 O2A HEM 14
|
||||
HEM C1B C1B C 0 1 N N N 2.888 -18.698 34.579 2.664 -2.707 0.308 C1B HEM 15
|
||||
HEM C2B C2B C 0 1 N N N 2.933 -18.535 33.146 3.937 -3.328 0.418 C2B HEM 16
|
||||
HEM C3B C3B C 0 1 N N N 2.499 -19.716 32.632 4.874 -2.341 0.314 C3B HEM 17
|
||||
HEM C4B C4B C 0 1 N N N 2.187 -20.580 33.743 4.117 -1.079 0.139 C4B HEM 18
|
||||
HEM CMB CMB C 0 1 N N N 3.391 -17.290 32.422 4.203 -4.798 0.613 CMB HEM 19
|
||||
HEM CAB CAB C 0 1 N N N 2.345 -20.140 31.217 6.339 -2.497 0.365 CAB HEM 20
|
||||
HEM CBB CBB C 0 1 N N N 1.755 -19.492 30.233 6.935 -3.419 -0.385 CBB HEM 21
|
||||
HEM C1C C1C C 0 1 Y N N 1.395 -22.786 34.659 3.964 1.345 -0.174 C1C HEM 22
|
||||
HEM C2C C2C C 0 1 Y N N 0.854 -24.130 34.500 4.531 2.601 -0.445 C2C HEM 23
|
||||
HEM C3C C3C C 0 1 Y N N 0.689 -24.626 35.757 3.510 3.536 -0.437 C3C HEM 24
|
||||
HEM C4C C4C C 0 1 Y N N 1.139 -23.583 36.674 2.304 2.846 -0.139 C4C HEM 25
|
||||
HEM CMC CMC C 0 1 N N N 0.550 -24.782 33.175 5.991 2.880 -0.697 CMC HEM 26
|
||||
HEM CAC CAC C 0 1 N N N 0.164 -25.943 36.196 3.649 4.981 -0.692 CAC HEM 27
|
||||
HEM CBC CBC C 0 1 N N N 0.498 -27.158 35.750 4.201 5.407 -1.823 CBC HEM 28
|
||||
HEM C1D C1D C 0 1 N N N 1.550 -22.718 38.980 -0.102 2.753 0.298 C1D HEM 29
|
||||
HEM C2D C2D C 0 1 N N N 1.513 -22.879 40.415 -1.382 3.388 0.641 C2D HEM 30
|
||||
HEM C3D C3D C 0 1 N N N 1.951 -21.691 40.929 -2.283 2.389 0.774 C3D HEM 31
|
||||
HEM C4D C4D C 0 1 N N N 2.277 -20.826 39.811 -1.561 1.137 0.511 C4D HEM 32
|
||||
HEM CMD CMD C 0 1 N N N 1.055 -24.094 41.156 -1.639 4.863 0.811 CMD HEM 33
|
||||
HEM CAD CAD C 0 1 N N N 2.048 -21.326 42.352 -3.741 2.532 1.123 CAD HEM 34
|
||||
HEM CBD CBD C 0 1 N N N 0.741 -20.498 42.530 -4.573 2.563 -0.160 CBD HEM 35
|
||||
HEM CGD CGD C 0 1 N N N 0.578 -19.987 43.892 -6.032 2.706 0.189 CGD HEM 36
|
||||
HEM O1D O1D O 0 1 N N N 1.387 -19.103 44.303 -6.372 2.776 1.347 O1D HEM 37
|
||||
HEM O2D O2D O 0 1 N N N -0.401 -20.468 44.537 -6.954 2.755 -0.785 O2D HEM 38
|
||||
HEM NA NA N 0 1 Y N N 2.863 -18.969 37.554 -0.068 -1.456 0.321 NA HEM 39
|
||||
HEM NB NB N 0 1 N N N 2.439 -19.944 34.911 2.820 -1.386 0.207 NB HEM 40
|
||||
HEM NC NC N 0 1 Y N N 1.537 -22.509 35.976 2.604 1.506 -0.033 NC HEM 41
|
||||
HEM ND ND N 0 1 N N N 2.008 -21.465 38.663 -0.276 1.431 0.298 ND HEM 42
|
||||
HEM FE FE FE 0 0 N N N 2.196 -20.749 36.814 1.010 0.157 -0.060 FE HEM 43
|
||||
HEM HHB HHB H 0 1 N N N 3.587 -16.798 35.072 1.498 -4.508 0.309 HHB HEM 44
|
||||
HEM HHC HHC H 0 1 N N N 1.553 -22.268 32.633 5.786 0.229 -0.153 HHC HEM 45
|
||||
HEM HHD HHD H 0 1 N N N 0.802 -24.613 38.472 1.018 4.543 -0.083 HHD HEM 46
|
||||
HEM HMA HMA H 0 1 N N N 5.316 -15.524 37.249 -1.220 -5.306 -0.847 HMA HEM 47
|
||||
HEM HMAA HMAA H 0 0 N N N 3.749 -15.149 36.455 -0.328 -5.480 0.683 HMAA HEM 48
|
||||
HEM HMAB HMAB H 0 0 N N N 3.998 -14.743 38.187 -2.097 -5.285 0.702 HMAB HEM 49
|
||||
HEM HAA HAA H 0 1 N N N 3.905 -15.575 40.197 -3.662 -3.862 0.782 HAA HEM 50
|
||||
HEM HAAA HAAA H 0 0 N N N 3.268 -16.991 41.102 -4.024 -2.121 0.869 HAAA HEM 51
|
||||
HEM HBA HBA H 0 1 N N N 5.368 -18.237 40.627 -3.825 -1.956 -1.597 HBA HEM 52
|
||||
HEM HBAA HBAA H 0 0 N N N 6.004 -16.819 39.725 -3.464 -3.697 -1.684 HBAA HEM 53
|
||||
HEM HMB HMB H 0 1 N N N 3.319 -17.449 31.336 3.256 -5.336 0.660 HMB HEM 54
|
||||
HEM HMBA HMBA H 0 0 N N N 2.753 -16.442 32.711 4.794 -5.175 -0.222 HMBA HEM 55
|
||||
HEM HMBB HMBB H 0 0 N N N 4.435 -17.072 32.692 4.752 -4.948 1.543 HMBB HEM 56
|
||||
HEM HAB HAB H 0 1 N N N 2.770 -21.100 30.963 6.927 -1.863 1.011 HAB HEM 57
|
||||
HEM HBB HBB H 0 1 N N N 1.719 -19.927 29.245 7.994 -3.600 -0.277 HBB HEM 58
|
||||
HEM HBBA HBBA H 0 0 N N N 1.308 -18.526 30.414 6.360 -3.987 -1.102 HBBA HEM 59
|
||||
HEM HMC HMC H 0 1 N N N 0.153 -25.793 33.346 6.554 1.949 -0.639 HMC HEM 60
|
||||
HEM HMCA HMCA H 0 0 N N N -0.196 -24.182 32.634 6.110 3.316 -1.689 HMCA HEM 61
|
||||
HEM HMCB HMCB H 0 0 N N N 1.472 -24.846 32.578 6.362 3.578 0.053 HMCB HEM 62
|
||||
HEM HAC HAC H 0 1 N N N -0.583 -25.916 36.975 3.303 5.694 0.042 HAC HEM 63
|
||||
HEM HBC HBC H 0 1 N N N 0.027 -28.035 36.169 4.614 4.696 -2.523 HBC HEM 64
|
||||
HEM HBCA HBCA H 0 0 N N N 1.239 -27.263 34.971 4.235 6.464 -2.043 HBCA HEM 65
|
||||
HEM HMD HMD H 0 1 N N N 1.142 -23.919 42.238 -0.715 5.415 0.639 HMD HEM 66
|
||||
HEM HMDA HMDA H 0 0 N N N 0.006 -24.304 40.902 -2.394 5.185 0.094 HMDA HEM 67
|
||||
HEM HMDB HMDB H 0 0 N N N 1.680 -24.954 40.872 -1.994 5.055 1.824 HMDB HEM 68
|
||||
HEM HAD HAD H 0 1 N N N 2.055 -22.216 42.999 -4.052 1.687 1.738 HAD HEM 69
|
||||
HEM HADA HADA H 0 0 N N N 2.943 -20.719 42.554 -3.893 3.459 1.677 HADA HEM 70
|
||||
HEM HBD HBD H 0 1 N N N 0.767 -19.646 41.835 -4.262 3.408 -0.775 HBD HEM 71
|
||||
HEM HBDA HBDA H 0 0 N N N -0.119 -21.141 42.290 -4.421 1.636 -0.714 HBDA HEM 72
|
||||
HEM H2A H2A H 0 1 N N N 6.201 -16.682 43.632 -7.082 -3.510 -2.254 H2A HEM 73
|
||||
HEM H2D H2D H 0 1 N N N -0.445 -20.063 45.395 -7.877 2.847 -0.512 H2D HEM 74
|
||||
HEM HHA HHA H 0 1 N N N 2.913 -19.150 40.893 -3.246 -0.188 0.567 HHA HEM 75
|
||||
# #
|
||||
loop_
|
||||
_chem_comp_bond.comp_id
|
||||
_chem_comp_bond.atom_id_1
|
||||
_chem_comp_bond.atom_id_2
|
||||
_chem_comp_bond.value_order
|
||||
_chem_comp_bond.pdbx_aromatic_flag
|
||||
_chem_comp_bond.pdbx_stereo_config
|
||||
_chem_comp_bond.pdbx_ordinal
|
||||
HEM CHA C1A SING N N 1
|
||||
HEM CHA C4D DOUB N N 2
|
||||
HEM CHA HHA SING N N 3
|
||||
HEM CHB C4A SING N N 4
|
||||
HEM CHB C1B DOUB N N 5
|
||||
HEM CHB HHB SING N N 6
|
||||
HEM CHC C4B SING N N 7
|
||||
HEM CHC C1C DOUB N N 8
|
||||
HEM CHC HHC SING N N 9
|
||||
HEM CHD C4C DOUB N N 10
|
||||
HEM CHD C1D SING N N 11
|
||||
HEM CHD HHD SING N N 12
|
||||
HEM C1A C2A DOUB Y N 13
|
||||
HEM C1A NA SING Y N 14
|
||||
HEM C2A C3A SING Y N 15
|
||||
HEM C2A CAA SING N N 16
|
||||
HEM C3A C4A DOUB Y N 17
|
||||
HEM C3A CMA SING N N 18
|
||||
HEM C4A NA SING Y N 19
|
||||
HEM CMA HMA SING N N 20
|
||||
HEM CMA HMAA SING N N 21
|
||||
HEM CMA HMAB SING N N 22
|
||||
HEM CAA CBA SING N N 23
|
||||
HEM CAA HAA SING N N 24
|
||||
HEM CAA HAAA SING N N 25
|
||||
HEM CBA CGA SING N N 26
|
||||
HEM CBA HBA SING N N 27
|
||||
HEM CBA HBAA SING N N 28
|
||||
HEM CGA O1A DOUB N N 29
|
||||
HEM CGA O2A SING N N 30
|
||||
HEM C1B C2B SING N N 31
|
||||
HEM C1B NB SING N N 32
|
||||
HEM C2B C3B DOUB N N 33
|
||||
HEM C2B CMB SING N N 34
|
||||
HEM C3B C4B SING N N 35
|
||||
HEM C3B CAB SING N N 36
|
||||
HEM C4B NB DOUB N N 37
|
||||
HEM CMB HMB SING N N 38
|
||||
HEM CMB HMBA SING N N 39
|
||||
HEM CMB HMBB SING N N 40
|
||||
HEM CAB CBB DOUB N N 41
|
||||
HEM CAB HAB SING N N 42
|
||||
HEM CBB HBB SING N N 43
|
||||
HEM CBB HBBA SING N N 44
|
||||
HEM C1C C2C SING Y N 45
|
||||
HEM C1C NC SING Y N 46
|
||||
HEM C2C C3C DOUB Y N 47
|
||||
HEM C2C CMC SING N N 48
|
||||
HEM C3C C4C SING Y N 49
|
||||
HEM C3C CAC SING N N 50
|
||||
HEM C4C NC SING Y N 51
|
||||
HEM CMC HMC SING N N 52
|
||||
HEM CMC HMCA SING N N 53
|
||||
HEM CMC HMCB SING N N 54
|
||||
HEM CAC CBC DOUB N N 55
|
||||
HEM CAC HAC SING N N 56
|
||||
HEM CBC HBC SING N N 57
|
||||
HEM CBC HBCA SING N N 58
|
||||
HEM C1D C2D SING N N 59
|
||||
HEM C1D ND DOUB N N 60
|
||||
HEM C2D C3D DOUB N N 61
|
||||
HEM C2D CMD SING N N 62
|
||||
HEM C3D C4D SING N N 63
|
||||
HEM C3D CAD SING N N 64
|
||||
HEM C4D ND SING N N 65
|
||||
HEM CMD HMD SING N N 66
|
||||
HEM CMD HMDA SING N N 67
|
||||
HEM CMD HMDB SING N N 68
|
||||
HEM CAD CBD SING N N 69
|
||||
HEM CAD HAD SING N N 70
|
||||
HEM CAD HADA SING N N 71
|
||||
HEM CBD CGD SING N N 72
|
||||
HEM CBD HBD SING N N 73
|
||||
HEM CBD HBDA SING N N 74
|
||||
HEM CGD O1D DOUB N N 75
|
||||
HEM CGD O2D SING N N 76
|
||||
HEM O2A H2A SING N N 77
|
||||
HEM O2D H2D SING N N 78
|
||||
HEM FE NA SING N N 79
|
||||
HEM FE NB SING N N 80
|
||||
HEM FE NC SING N N 81
|
||||
HEM FE ND SING N N 82
|
||||
# #
|
||||
loop_
|
||||
_pdbx_chem_comp_descriptor.comp_id
|
||||
_pdbx_chem_comp_descriptor.type
|
||||
_pdbx_chem_comp_descriptor.program
|
||||
_pdbx_chem_comp_descriptor.program_version
|
||||
_pdbx_chem_comp_descriptor.descriptor
|
||||
HEM SMILES ACDLabs 12.01 "C=1c3c(c(c4C=C5C(=C(C=6C=C7C(=C(C8=CC=2C(=C(C=1N=2[Fe](n34)(N5=6)N78)CCC(=O)O)C)\C=C)C)\C=C)C)C)CCC(=O)O"
|
||||
HEM InChI InChI 1.03 "InChI=1S/C34H34N4O4.Fe/c1-7-21-17(3)25-13-26-19(5)23(9-11-33(39)40)31(37-26)16-32-24(10-12-34(41)42)20(6)28(38-32)15-30-22(8-2)18(4)27(36-30)14-29(21)35-25;/h7-8,13-16H,1-2,9-12H2,3-6H3,(H4,35,36,37,38,39,40,41,42);/q;+2/p-2/b25-13-,26-13-,27-14-,28-15-,29-14-,30-15-,31-16-,32-16-;"
|
||||
HEM InChIKey InChI 1.03 KABFMIBPWCXCRK-RGGAHWMASA-L
|
||||
HEM SMILES_CANONICAL CACTVS 3.385 "CC1=C(CCC(O)=O)C2=Cc3n4[Fe]5|6|N2=C1C=c7n5c(=CC8=N|6C(=Cc4c(C)c3CCC(O)=O)C(=C8C=C)C)c(C)c7C=C"
|
||||
HEM SMILES CACTVS 3.385 "CC1=C(CCC(O)=O)C2=Cc3n4[Fe]5|6|N2=C1C=c7n5c(=CC8=N|6C(=Cc4c(C)c3CCC(O)=O)C(=C8C=C)C)c(C)c7C=C"
|
||||
HEM SMILES_CANONICAL "OpenEye OEToolkits" 1.7.6 "Cc1c2n3c(c1CCC(=O)O)C=C4C(=C(C5=[N]4[Fe]36[N]7=C(C=C8N6C(=C5)C(=C8C)C=C)C(=C(C7=C2)C)C=C)C)CCC(=O)O"
|
||||
HEM SMILES "OpenEye OEToolkits" 1.7.6 "Cc1c2n3c(c1CCC(=O)O)C=C4C(=C(C5=[N]4[Fe]36[N]7=C(C=C8N6C(=C5)C(=C8C)C=C)C(=C(C7=C2)C)C=C)C)CCC(=O)O"
|
||||
# #
|
||||
loop_
|
||||
_pdbx_chem_comp_identifier.comp_id
|
||||
_pdbx_chem_comp_identifier.type
|
||||
_pdbx_chem_comp_identifier.program
|
||||
_pdbx_chem_comp_identifier.program_version
|
||||
_pdbx_chem_comp_identifier.identifier
|
||||
HEM "SYSTEMATIC NAME" "OpenEye OEToolkits" 1.6.1 "3-[(5Z,10Z,14Z,19Z)-18-(2-carboxyethyl)-8,13-bis(ethenyl)-3,7,12,17-tetramethyl-21,23-dihydroporphyrin-2-yl]propanoic acid"
|
||||
HEM "SYSTEMATIC NAME" ACDLabs 12.01 "[3,3'-(7,12-diethenyl-3,8,13,17-tetramethylporphyrin-2,18-diyl-kappa~4~N~21~,N~22~,N~23~,N~24~)dipropanoato(2-)]iron"
|
||||
# #
|
||||
loop_
|
||||
_pdbx_chem_comp_audit.comp_id
|
||||
_pdbx_chem_comp_audit.action_type
|
||||
_pdbx_chem_comp_audit.date
|
||||
_pdbx_chem_comp_audit.processing_site
|
||||
HEM "Create component" 1999-07-08 RCSB
|
||||
HEM "Other modification" 2016-01-20 RCSB
|
||||
HEM "Modify synonyms" 2020-06-05 PDBE
|
||||
#
|
||||
_pdbx_chem_comp_synonyms.ordinal 1
|
||||
_pdbx_chem_comp_synonyms.comp_id HEM
|
||||
_pdbx_chem_comp_synonyms.name HEME
|
||||
_pdbx_chem_comp_synonyms.provenance ?
|
||||
_pdbx_chem_comp_synonyms.type ?
|
||||
##
|
||||
|
||||
188
test/REA.cif
Normal file
188
test/REA.cif
Normal file
@@ -0,0 +1,188 @@
|
||||
data_REA
|
||||
#
|
||||
_chem_comp.id REA
|
||||
_chem_comp.name "RETINOIC ACID"
|
||||
_chem_comp.type NON-POLYMER
|
||||
_chem_comp.pdbx_type HETAIN
|
||||
_chem_comp.formula "C20 H28 O2"
|
||||
_chem_comp.mon_nstd_parent_comp_id ?
|
||||
_chem_comp.pdbx_synonyms ?
|
||||
_chem_comp.pdbx_formal_charge 0
|
||||
_chem_comp.pdbx_initial_date 1999-07-08
|
||||
_chem_comp.pdbx_modified_date 2016-10-18
|
||||
_chem_comp.pdbx_ambiguous_flag N
|
||||
_chem_comp.pdbx_release_status REL
|
||||
_chem_comp.pdbx_replaced_by ?
|
||||
_chem_comp.pdbx_replaces 3KV
|
||||
_chem_comp.formula_weight 300.435
|
||||
_chem_comp.one_letter_code ?
|
||||
_chem_comp.three_letter_code REA
|
||||
_chem_comp.pdbx_model_coordinates_details ?
|
||||
_chem_comp.pdbx_model_coordinates_missing_flag N
|
||||
_chem_comp.pdbx_ideal_coordinates_details Corina
|
||||
_chem_comp.pdbx_ideal_coordinates_missing_flag N
|
||||
_chem_comp.pdbx_model_coordinates_db_code 1CBS
|
||||
_chem_comp.pdbx_subcomponent_list ?
|
||||
_chem_comp.pdbx_processing_site RCSB
|
||||
#
|
||||
loop_
|
||||
_chem_comp_atom.comp_id
|
||||
_chem_comp_atom.atom_id
|
||||
_chem_comp_atom.alt_atom_id
|
||||
_chem_comp_atom.type_symbol
|
||||
_chem_comp_atom.charge
|
||||
_chem_comp_atom.pdbx_align
|
||||
_chem_comp_atom.pdbx_aromatic_flag
|
||||
_chem_comp_atom.pdbx_leaving_atom_flag
|
||||
_chem_comp_atom.pdbx_stereo_config
|
||||
_chem_comp_atom.model_Cartn_x
|
||||
_chem_comp_atom.model_Cartn_y
|
||||
_chem_comp_atom.model_Cartn_z
|
||||
_chem_comp_atom.pdbx_model_Cartn_x_ideal
|
||||
_chem_comp_atom.pdbx_model_Cartn_y_ideal
|
||||
_chem_comp_atom.pdbx_model_Cartn_z_ideal
|
||||
_chem_comp_atom.pdbx_component_atom_id
|
||||
_chem_comp_atom.pdbx_component_comp_id
|
||||
_chem_comp_atom.pdbx_ordinal
|
||||
REA C1 C1 C 0 1 N N N 21.972 29.831 16.739 -4.684 0.932 -0.497 C1 REA 1
|
||||
REA C2 C2 C 0 1 N N N 20.921 30.524 15.841 -5.837 0.190 -1.176 C2 REA 2
|
||||
REA C3 C3 C 0 1 N N N 20.245 29.635 14.848 -6.441 -0.798 -0.171 C3 REA 3
|
||||
REA C4 C4 C 0 1 N N N 19.555 28.479 15.488 -5.418 -1.903 0.100 C4 REA 4
|
||||
REA C5 C5 C 0 1 N N N 20.389 27.812 16.587 -4.082 -1.301 0.429 C5 REA 5
|
||||
REA C6 C6 C 0 1 N N N 21.425 28.446 17.218 -3.756 -0.048 0.161 C6 REA 6
|
||||
REA C7 C7 C 0 1 N N N 22.242 27.851 18.297 -2.457 0.396 0.516 C7 REA 7
|
||||
REA C8 C8 C 0 1 N N N 21.868 26.977 19.240 -1.363 -0.229 0.007 C8 REA 8
|
||||
REA C9 C9 C 0 1 N N N 22.705 26.434 20.286 -0.076 0.257 0.298 C9 REA 9
|
||||
REA C10 C10 C 0 1 N N N 22.159 25.536 21.131 1.022 -0.370 -0.213 C10 REA 10
|
||||
REA C11 C11 C 0 1 N N N 22.875 24.924 22.234 2.306 0.115 0.077 C11 REA 11
|
||||
REA C12 C12 C 0 1 N N N 22.237 24.026 22.990 3.405 -0.513 -0.435 C12 REA 12
|
||||
REA C13 C13 C 0 1 N N N 22.856 23.377 24.125 4.689 -0.028 -0.144 C13 REA 13
|
||||
REA C14 C14 C 0 1 N N N 22.135 22.473 24.834 5.787 -0.655 -0.656 C14 REA 14
|
||||
REA C15 C15 C 0 1 N N N 22.563 21.710 26.016 7.077 -0.265 -0.244 C15 REA 15
|
||||
REA C16 C16 C 0 1 N N N 22.238 30.737 17.948 -5.246 1.886 0.559 C16 REA 16
|
||||
REA C17 C17 C 0 1 N N N 23.292 29.620 15.948 -3.911 1.737 -1.544 C17 REA 17
|
||||
REA C18 C18 C 0 1 N N N 19.791 26.449 16.947 -3.056 -2.175 1.103 C18 REA 18
|
||||
REA C19 C19 C 0 1 N N N 24.181 26.841 20.385 0.090 1.471 1.175 C19 REA 19
|
||||
REA C20 C20 C 0 1 N N N 24.303 23.747 24.489 4.855 1.186 0.733 C20 REA 20
|
||||
REA O1 O1 O 0 1 N N N 23.640 21.075 25.978 7.210 0.553 0.648 O1 REA 21
|
||||
REA O2 O2 O 0 1 N N N 21.840 21.712 27.037 8.166 -0.798 -0.840 O2 REA 22
|
||||
REA H21 H21 H 0 1 N N N 20.147 30.955 16.494 -6.598 0.905 -1.490 H21 REA 23
|
||||
REA H22 H22 H 0 1 N N N 21.425 31.330 15.288 -5.462 -0.353 -2.044 H22 REA 24
|
||||
REA H31 H31 H 0 1 N N N 19.501 30.227 14.295 -6.673 -0.278 0.759 H31 REA 25
|
||||
REA H32 H32 H 0 1 N N N 21.001 29.250 14.148 -7.349 -1.234 -0.586 H32 REA 26
|
||||
REA H41 H41 H 0 1 N N N 18.613 28.835 15.931 -5.756 -2.511 0.938 H41 REA 27
|
||||
REA H42 H42 H 0 1 N N N 19.335 27.730 14.713 -5.322 -2.531 -0.786 H42 REA 28
|
||||
REA H7 H7 H 0 1 N N N 23.276 28.162 18.329 -2.337 1.230 1.191 H7 REA 29
|
||||
REA H8 H8 H 0 1 N N N 20.840 26.645 19.217 -1.482 -1.100 -0.622 H8 REA 30
|
||||
REA H10 H10 H 0 1 N N N 21.127 25.256 20.977 0.903 -1.241 -0.842 H10 REA 31
|
||||
REA H11 H11 H 0 1 N N N 23.902 25.189 22.440 2.425 0.985 0.706 H11 REA 32
|
||||
REA H12 H12 H 0 1 N N N 21.216 23.774 22.743 3.286 -1.383 -1.063 H12 REA 33
|
||||
REA H14 H14 H 0 1 N N N 21.127 22.292 24.490 5.667 -1.451 -1.376 H14 REA 34
|
||||
REA H161 H161 H 0 0 N N N 22.984 30.265 18.604 -5.802 1.316 1.303 H161 REA 35
|
||||
REA H162 H162 H 0 0 N N N 22.618 31.709 17.601 -4.426 2.415 1.044 H162 REA 36
|
||||
REA H163 H163 H 0 0 N N N 21.302 30.887 18.506 -5.911 2.605 0.081 H163 REA 37
|
||||
REA H171 H171 H 0 0 N N N 24.033 29.127 16.595 -4.598 2.394 -2.077 H171 REA 38
|
||||
REA H172 H172 H 0 0 N N N 23.095 28.989 15.069 -3.146 2.335 -1.050 H172 REA 39
|
||||
REA H173 H173 H 0 0 N N N 23.683 30.595 15.620 -3.439 1.054 -2.251 H173 REA 40
|
||||
REA H181 H181 H 0 0 N N N 20.397 25.979 17.736 -3.448 -3.187 1.201 H181 REA 41
|
||||
REA H182 H182 H 0 0 N N N 18.761 26.584 17.308 -2.145 -2.194 0.503 H182 REA 42
|
||||
REA H183 H183 H 0 0 N N N 19.786 25.804 16.056 -2.831 -1.775 2.092 H183 REA 43
|
||||
REA H191 H191 H 0 0 N N N 24.647 26.327 21.238 0.171 1.159 2.216 H191 REA 44
|
||||
REA H192 H192 H 0 0 N N N 24.702 26.559 19.458 0.993 2.008 0.885 H192 REA 45
|
||||
REA H193 H193 H 0 0 N N N 24.252 27.929 20.529 -0.774 2.125 1.058 H193 REA 46
|
||||
REA H201 H201 H 0 0 N N N 24.620 23.168 25.369 5.026 0.871 1.762 H201 REA 47
|
||||
REA H202 H202 H 0 0 N N N 24.965 23.516 23.641 5.707 1.771 0.386 H202 REA 48
|
||||
REA H203 H203 H 0 0 N N N 24.360 24.822 24.717 3.952 1.795 0.685 H203 REA 49
|
||||
REA HO2 HO2 H 0 1 N N N 22.244 21.180 27.713 9.006 -0.469 -0.490 HO2 REA 50
|
||||
#
|
||||
loop_
|
||||
_chem_comp_bond.comp_id
|
||||
_chem_comp_bond.atom_id_1
|
||||
_chem_comp_bond.atom_id_2
|
||||
_chem_comp_bond.value_order
|
||||
_chem_comp_bond.pdbx_aromatic_flag
|
||||
_chem_comp_bond.pdbx_stereo_config
|
||||
_chem_comp_bond.pdbx_ordinal
|
||||
REA C1 C2 SING N N 1
|
||||
REA C1 C6 SING N N 2
|
||||
REA C1 C16 SING N N 3
|
||||
REA C1 C17 SING N N 4
|
||||
REA C2 C3 SING N N 5
|
||||
REA C2 H21 SING N N 6
|
||||
REA C2 H22 SING N N 7
|
||||
REA C3 C4 SING N N 8
|
||||
REA C3 H31 SING N N 9
|
||||
REA C3 H32 SING N N 10
|
||||
REA C4 C5 SING N N 11
|
||||
REA C4 H41 SING N N 12
|
||||
REA C4 H42 SING N N 13
|
||||
REA C5 C6 DOUB N N 14
|
||||
REA C5 C18 SING N N 15
|
||||
REA C6 C7 SING N N 16
|
||||
REA C7 C8 DOUB N E 17
|
||||
REA C7 H7 SING N N 18
|
||||
REA C8 C9 SING N N 19
|
||||
REA C8 H8 SING N N 20
|
||||
REA C9 C10 DOUB N E 21
|
||||
REA C9 C19 SING N N 22
|
||||
REA C10 C11 SING N N 23
|
||||
REA C10 H10 SING N N 24
|
||||
REA C11 C12 DOUB N E 25
|
||||
REA C11 H11 SING N N 26
|
||||
REA C12 C13 SING N N 27
|
||||
REA C12 H12 SING N N 28
|
||||
REA C13 C14 DOUB N E 29
|
||||
REA C13 C20 SING N N 30
|
||||
REA C14 C15 SING N N 31
|
||||
REA C14 H14 SING N N 32
|
||||
REA C15 O1 DOUB N N 33
|
||||
REA C15 O2 SING N N 34
|
||||
REA C16 H161 SING N N 35
|
||||
REA C16 H162 SING N N 36
|
||||
REA C16 H163 SING N N 37
|
||||
REA C17 H171 SING N N 38
|
||||
REA C17 H172 SING N N 39
|
||||
REA C17 H173 SING N N 40
|
||||
REA C18 H181 SING N N 41
|
||||
REA C18 H182 SING N N 42
|
||||
REA C18 H183 SING N N 43
|
||||
REA C19 H191 SING N N 44
|
||||
REA C19 H192 SING N N 45
|
||||
REA C19 H193 SING N N 46
|
||||
REA C20 H201 SING N N 47
|
||||
REA C20 H202 SING N N 48
|
||||
REA C20 H203 SING N N 49
|
||||
REA O2 HO2 SING N N 50
|
||||
#
|
||||
loop_
|
||||
_pdbx_chem_comp_descriptor.comp_id
|
||||
_pdbx_chem_comp_descriptor.type
|
||||
_pdbx_chem_comp_descriptor.program
|
||||
_pdbx_chem_comp_descriptor.program_version
|
||||
_pdbx_chem_comp_descriptor.descriptor
|
||||
REA SMILES ACDLabs 12.01 "C1(CCCC(=C1\C=C\C(=C\C=C\C(=C\C(=O)O)C)C)C)(C)C"
|
||||
REA InChI InChI 1.03 "InChI=1S/C20H28O2/c1-15(8-6-9-16(2)14-19(21)22)11-12-18-17(3)10-7-13-20(18,4)5/h6,8-9,11-12,14H,7,10,13H2,1-5H3,(H,21,22)/b9-6+,12-11+,15-8+,16-14+"
|
||||
REA InChIKey InChI 1.03 SHGAZHPCJJPHSC-YCNIQYBTSA-N
|
||||
REA SMILES_CANONICAL CACTVS 3.385 "CC1=C(\C=C\C(C)=C\C=C\C(C)=C\C(O)=O)C(C)(C)CCC1"
|
||||
REA SMILES CACTVS 3.385 "CC1=C(C=CC(C)=CC=CC(C)=CC(O)=O)C(C)(C)CCC1"
|
||||
REA SMILES_CANONICAL "OpenEye OEToolkits" 1.7.6 "CC1=C(C(CCC1)(C)C)/C=C/C(=C/C=C/C(=C/C(=O)O)/C)/C"
|
||||
REA SMILES "OpenEye OEToolkits" 1.7.6 "CC1=C(C(CCC1)(C)C)C=CC(=CC=CC(=CC(=O)O)C)C"
|
||||
#
|
||||
loop_
|
||||
_pdbx_chem_comp_identifier.comp_id
|
||||
_pdbx_chem_comp_identifier.type
|
||||
_pdbx_chem_comp_identifier.program
|
||||
_pdbx_chem_comp_identifier.program_version
|
||||
_pdbx_chem_comp_identifier.identifier
|
||||
REA "SYSTEMATIC NAME" ACDLabs 12.01 "retinoic acid"
|
||||
REA "SYSTEMATIC NAME" "OpenEye OEToolkits" 1.7.6 "(2E,4E,6E,8E)-3,7-dimethyl-9-(2,6,6-trimethylcyclohexen-1-yl)nona-2,4,6,8-tetraenoic acid"
|
||||
#
|
||||
loop_
|
||||
_pdbx_chem_comp_audit.comp_id
|
||||
_pdbx_chem_comp_audit.action_type
|
||||
_pdbx_chem_comp_audit.date
|
||||
_pdbx_chem_comp_audit.processing_site
|
||||
REA "Create component" 1999-07-08 RCSB
|
||||
REA "Modify descriptor" 2011-06-04 RCSB
|
||||
REA "Other modification" 2016-10-18 RCSB
|
||||
#
|
||||
85
test/format-test.cpp
Normal file
85
test/format-test.cpp
Normal file
@@ -0,0 +1,85 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define BOOST_TEST_ALTERNATIVE_INIT_API
|
||||
#include <boost/test/included/unit_test.hpp>
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include <cif++.hpp>
|
||||
|
||||
namespace tt = boost::test_tools;
|
||||
|
||||
std::filesystem::path gTestDir = std::filesystem::current_path(); // filled in first test
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
cif::file operator""_cf(const char *text, size_t length)
|
||||
{
|
||||
struct membuf : public std::streambuf
|
||||
{
|
||||
membuf(char *text, size_t length)
|
||||
{
|
||||
this->setg(text, text, text + length);
|
||||
}
|
||||
} buffer(const_cast<char *>(text), length);
|
||||
|
||||
std::istream is(&buffer);
|
||||
return cif::file(is);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
bool init_unit_test()
|
||||
{
|
||||
cif::VERBOSE = 1;
|
||||
|
||||
// // not a test, just initialize test dir
|
||||
// if (boost::unit_test::framework::master_test_suite().argc == 2)
|
||||
// gTestDir = boost::unit_test::framework::master_test_suite().argv[1];
|
||||
|
||||
// // do this now, avoids the need for installing
|
||||
// cif::add_file_resource("mmcif_pdbx.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic");
|
||||
|
||||
// // initialize CCD location
|
||||
// cif::add_file_resource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BOOST_AUTO_TEST_CASE(fmt_1)
|
||||
{
|
||||
std::ostringstream os;
|
||||
|
||||
std::string world("world");
|
||||
os << cif::format("Hello, %-10.10s, the magic number is %d and pi is %g", world, 42, M_PI);
|
||||
BOOST_CHECK_EQUAL(os.str(), "Hello, world , the magic number is 42 and pi is 3.14159");
|
||||
|
||||
BOOST_CHECK_EQUAL(cif::format("Hello, %-10.10s, the magic number is %d and pi is %g", world, 42, M_PI).str(),
|
||||
"Hello, world , the magic number is 42 and pi is 3.14159");
|
||||
}
|
||||
341
test/model-test.cpp
Normal file
341
test/model-test.cpp
Normal file
@@ -0,0 +1,341 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define BOOST_TEST_ALTERNATIVE_INIT_API
|
||||
#include <boost/test/included/unit_test.hpp>
|
||||
|
||||
#include <stdexcept>
|
||||
|
||||
#include <cif++.hpp>
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
cif::file operator""_cf(const char* text, size_t length)
|
||||
{
|
||||
struct membuf : public std::streambuf
|
||||
{
|
||||
membuf(char* text, size_t length)
|
||||
{
|
||||
this->setg(text, text, text + length);
|
||||
}
|
||||
} buffer(const_cast<char*>(text), length);
|
||||
|
||||
std::istream is(&buffer);
|
||||
return cif::file(is);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::filesystem::path gTestDir = std::filesystem::current_path();
|
||||
|
||||
bool init_unit_test()
|
||||
{
|
||||
cif::VERBOSE = 1;
|
||||
|
||||
// not a test, just initialize test dir
|
||||
if (boost::unit_test::framework::master_test_suite().argc == 2)
|
||||
gTestDir = boost::unit_test::framework::master_test_suite().argv[1];
|
||||
|
||||
// do this now, avoids the need for installing
|
||||
cif::add_file_resource("mmcif_pdbx.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic");
|
||||
|
||||
// initialize CCD location
|
||||
cif::add_file_resource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");
|
||||
|
||||
cif::compound_factory::instance().push_dictionary(gTestDir / "HEM.cif");
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BOOST_AUTO_TEST_CASE(create_nonpoly_1)
|
||||
{
|
||||
cif::VERBOSE = 1;
|
||||
|
||||
cif::file file;
|
||||
file.load_dictionary("mmcif_pdbx.dic");
|
||||
file.emplace("TEST"); // create a datablock
|
||||
|
||||
cif::mm::structure structure(file);
|
||||
|
||||
std::string entity_id = structure.create_non_poly_entity("HEM");
|
||||
|
||||
auto atoms = R"(
|
||||
data_HEM
|
||||
loop_
|
||||
_atom_site.id
|
||||
_atom_site.group_PDB
|
||||
_atom_site.type_symbol
|
||||
_atom_site.label_atom_id
|
||||
_atom_site.label_alt_id
|
||||
_atom_site.pdbx_PDB_ins_code
|
||||
_atom_site.Cartn_x
|
||||
_atom_site.Cartn_y
|
||||
_atom_site.Cartn_z
|
||||
_atom_site.occupancy
|
||||
_atom_site.B_iso_or_equiv
|
||||
_atom_site.pdbx_formal_charge
|
||||
1 HETATM C CHA . ? -5.248 39.769 -0.250 1.00 7.67 ?
|
||||
2 HETATM C CHB . ? -3.774 36.790 3.280 1.00 7.05 ?
|
||||
3 HETATM C CHC . ? -2.879 33.328 0.013 1.00 7.69 ?
|
||||
4 HETATM C CHD . ? -4.342 36.262 -3.536 1.00 8.00 ?
|
||||
# that's enough to test with
|
||||
)"_cf;
|
||||
|
||||
atoms.load_dictionary("mmcif_pdbx");
|
||||
|
||||
auto &hem_data = atoms["HEM"];
|
||||
auto &atom_site = hem_data["atom_site"];
|
||||
|
||||
auto hem_atoms = atom_site.rows();
|
||||
std::vector<cif::mm::atom> atom_data;
|
||||
for (auto hem_atom: hem_atoms)
|
||||
atom_data.emplace_back(hem_data, hem_atom);
|
||||
|
||||
structure.create_non_poly(entity_id, atom_data);
|
||||
|
||||
auto expected = R"(
|
||||
data_TEST
|
||||
#
|
||||
_pdbx_nonpoly_scheme.asym_id A
|
||||
_pdbx_nonpoly_scheme.ndb_seq_num 1
|
||||
_pdbx_nonpoly_scheme.entity_id 1
|
||||
_pdbx_nonpoly_scheme.mon_id HEM
|
||||
_pdbx_nonpoly_scheme.pdb_seq_num 1
|
||||
_pdbx_nonpoly_scheme.auth_seq_num 1
|
||||
_pdbx_nonpoly_scheme.pdb_mon_id HEM
|
||||
_pdbx_nonpoly_scheme.auth_mon_id HEM
|
||||
_pdbx_nonpoly_scheme.pdb_strand_id A
|
||||
_pdbx_nonpoly_scheme.pdb_ins_code .
|
||||
#
|
||||
loop_
|
||||
_atom_site.id
|
||||
_atom_site.auth_asym_id
|
||||
_atom_site.label_alt_id
|
||||
_atom_site.label_asym_id
|
||||
_atom_site.label_atom_id
|
||||
_atom_site.label_comp_id
|
||||
_atom_site.label_entity_id
|
||||
_atom_site.label_seq_id
|
||||
_atom_site.type_symbol
|
||||
_atom_site.group_PDB
|
||||
_atom_site.pdbx_PDB_ins_code
|
||||
_atom_site.Cartn_x
|
||||
_atom_site.Cartn_y
|
||||
_atom_site.Cartn_z
|
||||
_atom_site.occupancy
|
||||
_atom_site.B_iso_or_equiv
|
||||
_atom_site.pdbx_formal_charge
|
||||
_atom_site.auth_seq_id
|
||||
_atom_site.auth_comp_id
|
||||
_atom_site.auth_atom_id
|
||||
_atom_site.pdbx_PDB_model_num
|
||||
1 A ? A CHA HEM 1 . C HETATM ? -5.248 39.769 -0.250 1.00 7.67 ? 1 HEM CHA 1
|
||||
2 A ? A CHB HEM 1 . C HETATM ? -3.774 36.790 3.280 1.00 7.05 ? 1 HEM CHB 1
|
||||
3 A ? A CHC HEM 1 . C HETATM ? -2.879 33.328 0.013 1.00 7.69 ? 1 HEM CHC 1
|
||||
4 A ? A CHD HEM 1 . C HETATM ? -4.342 36.262 -3.536 1.00 8.00 ? 1 HEM CHD 1
|
||||
#
|
||||
_chem_comp.id HEM
|
||||
_chem_comp.type NON-POLYMER
|
||||
_chem_comp.name 'PROTOPORPHYRIN IX CONTAINING FE'
|
||||
_chem_comp.formula 'C34 H32 Fe N4 O4'
|
||||
_chem_comp.formula_weight 616.487000
|
||||
#
|
||||
_pdbx_entity_nonpoly.entity_id 1
|
||||
_pdbx_entity_nonpoly.name 'PROTOPORPHYRIN IX CONTAINING FE'
|
||||
_pdbx_entity_nonpoly.comp_id HEM
|
||||
#
|
||||
_entity.id 1
|
||||
_entity.type non-polymer
|
||||
_entity.pdbx_description 'PROTOPORPHYRIN IX CONTAINING FE'
|
||||
_entity.formula_weight 616.487000
|
||||
#
|
||||
_struct_asym.id A
|
||||
_struct_asym.entity_id 1
|
||||
_struct_asym.pdbx_blank_PDB_chainid_flag N
|
||||
_struct_asym.pdbx_modified N
|
||||
_struct_asym.details ?
|
||||
#
|
||||
_atom_type.symbol C
|
||||
)"_cf;
|
||||
|
||||
expected.load_dictionary("mmcif_pdbx.dic");
|
||||
|
||||
if (not (expected.front() == structure.get_datablock()))
|
||||
{
|
||||
BOOST_TEST(false);
|
||||
std::cout << expected.front() << std::endl
|
||||
<< std::endl
|
||||
<< structure.get_datablock() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_atom_id)
|
||||
{
|
||||
auto data = R"(
|
||||
data_TEST
|
||||
#
|
||||
_pdbx_nonpoly_scheme.asym_id A
|
||||
_pdbx_nonpoly_scheme.ndb_seq_num 1
|
||||
_pdbx_nonpoly_scheme.entity_id 1
|
||||
_pdbx_nonpoly_scheme.mon_id HEM
|
||||
_pdbx_nonpoly_scheme.pdb_seq_num 1
|
||||
_pdbx_nonpoly_scheme.auth_seq_num 1
|
||||
_pdbx_nonpoly_scheme.pdb_mon_id HEM
|
||||
_pdbx_nonpoly_scheme.auth_mon_id HEM
|
||||
_pdbx_nonpoly_scheme.pdb_strand_id A
|
||||
_pdbx_nonpoly_scheme.pdb_ins_code .
|
||||
#
|
||||
loop_
|
||||
_atom_site.id
|
||||
_atom_site.auth_asym_id
|
||||
_atom_site.label_alt_id
|
||||
_atom_site.label_asym_id
|
||||
_atom_site.label_atom_id
|
||||
_atom_site.label_comp_id
|
||||
_atom_site.label_entity_id
|
||||
_atom_site.label_seq_id
|
||||
_atom_site.type_symbol
|
||||
_atom_site.group_PDB
|
||||
_atom_site.pdbx_PDB_ins_code
|
||||
_atom_site.Cartn_x
|
||||
_atom_site.Cartn_y
|
||||
_atom_site.Cartn_z
|
||||
_atom_site.occupancy
|
||||
_atom_site.B_iso_or_equiv
|
||||
_atom_site.pdbx_formal_charge
|
||||
_atom_site.auth_seq_id
|
||||
_atom_site.auth_comp_id
|
||||
_atom_site.auth_atom_id
|
||||
_atom_site.pdbx_PDB_model_num
|
||||
1 A ? A CHA HEM 1 . C HETATM ? -5.248 39.769 -0.250 1.00 7.67 ? 1 HEM CHA 1
|
||||
3 A ? A CHB HEM 1 . C HETATM ? -3.774 36.790 3.280 1.00 7.05 ? 1 HEM CHB 1
|
||||
2 A ? A CHC HEM 1 . C HETATM ? -2.879 33.328 0.013 1.00 7.69 ? 1 HEM CHC 1
|
||||
4 A ? A CHD HEM 1 . C HETATM ? -4.342 36.262 -3.536 1.00 8.00 ? 1 HEM CHD 1
|
||||
#
|
||||
_chem_comp.id HEM
|
||||
_chem_comp.type NON-POLYMER
|
||||
_chem_comp.name 'PROTOPORPHYRIN IX CONTAINING FE'
|
||||
_chem_comp.formula 'C34 H32 Fe N4 O4'
|
||||
_chem_comp.formula_weight 616.487000
|
||||
#
|
||||
_pdbx_entity_nonpoly.entity_id 1
|
||||
_pdbx_entity_nonpoly.name 'PROTOPORPHYRIN IX CONTAINING FE'
|
||||
_pdbx_entity_nonpoly.comp_id HEM
|
||||
#
|
||||
_entity.id 1
|
||||
_entity.type non-polymer
|
||||
_entity.pdbx_description 'PROTOPORPHYRIN IX CONTAINING FE'
|
||||
_entity.formula_weight 616.487000
|
||||
#
|
||||
_struct_asym.id A
|
||||
_struct_asym.entity_id 1
|
||||
_struct_asym.pdbx_blank_PDB_chainid_flag N
|
||||
_struct_asym.pdbx_modified N
|
||||
_struct_asym.details ?
|
||||
#
|
||||
)"_cf;
|
||||
|
||||
data.load_dictionary("mmcif_pdbx.dic");
|
||||
|
||||
cif::mm::structure s(data);
|
||||
|
||||
BOOST_CHECK_EQUAL(s.get_atom_by_id("1").get_label_atom_id(), "CHA");
|
||||
BOOST_CHECK_EQUAL(s.get_atom_by_id("2").get_label_atom_id(), "CHC");
|
||||
BOOST_CHECK_EQUAL(s.get_atom_by_id("3").get_label_atom_id(), "CHB");
|
||||
BOOST_CHECK_EQUAL(s.get_atom_by_id("4").get_label_atom_id(), "CHD");
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BOOST_AUTO_TEST_CASE(atom_numbers_1)
|
||||
{
|
||||
const std::filesystem::path test1(gTestDir / ".." / "examples" / "1cbs.cif.gz");
|
||||
cif::file file(test1.string());
|
||||
cif::mm::structure structure(file);
|
||||
|
||||
auto &db = file.front();
|
||||
|
||||
auto &atoms = structure.atoms();
|
||||
auto ai = atoms.begin();
|
||||
|
||||
for (const auto &[id, label_asym_id, label_seq_id, label_atom_id, auth_seq_id, label_comp_id] :
|
||||
db["atom_site"].rows<std::string,std::string,int,std::string,std::string,std::string>("id", "label_asym_id", "label_seq_id", "label_atom_id", "auth_seq_id", "label_comp_id"))
|
||||
{
|
||||
auto atom = structure.get_atom_by_id(id);
|
||||
|
||||
BOOST_CHECK_EQUAL(atom.get_label_asym_id(), label_asym_id);
|
||||
BOOST_CHECK_EQUAL(atom.get_label_seq_id(), label_seq_id);
|
||||
BOOST_CHECK_EQUAL(atom.get_label_atom_id(), label_atom_id);
|
||||
BOOST_CHECK_EQUAL(atom.get_auth_seq_id(), auth_seq_id);
|
||||
BOOST_CHECK_EQUAL(atom.get_label_comp_id(), label_comp_id);
|
||||
|
||||
BOOST_ASSERT(ai != atoms.end());
|
||||
|
||||
BOOST_CHECK_EQUAL(ai->id(), id);
|
||||
++ai;
|
||||
}
|
||||
|
||||
BOOST_ASSERT(ai == atoms.end());
|
||||
}
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BOOST_AUTO_TEST_CASE(test_load_2)
|
||||
{
|
||||
using namespace cif::literals;
|
||||
|
||||
const std::filesystem::path example(gTestDir / ".." / "examples" / "1cbs.cif.gz");
|
||||
cif::file file(example.string());
|
||||
|
||||
auto &db = file.front();
|
||||
|
||||
cif::mm::structure s(file);
|
||||
|
||||
BOOST_CHECK(s.polymers().size() == 1);
|
||||
|
||||
auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"];
|
||||
|
||||
for (auto &poly : s.polymers())
|
||||
{
|
||||
BOOST_CHECK_EQUAL(poly.size(), pdbx_poly_seq_scheme.find("asym_id"_key == poly.get_asym_id()).size());
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_AUTO_TEST_CASE(remove_residue_1)
|
||||
{
|
||||
using namespace cif::literals;
|
||||
|
||||
const std::filesystem::path example(gTestDir / ".." / "examples" / "1cbs.cif.gz");
|
||||
cif::file file(example.string());
|
||||
|
||||
cif::mm::structure s(file);
|
||||
s.remove_residue(s.get_residue("B"));
|
||||
|
||||
BOOST_CHECK_NO_THROW(s.validate_atoms());
|
||||
}
|
||||
@@ -1,57 +0,0 @@
|
||||
#include "../include/cif++/Cif++.hpp"
|
||||
#include "../include/cif++/PDB2Cif.hpp"
|
||||
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
#include <boost/program_options.hpp>
|
||||
|
||||
// #include "pdb2cif.h"
|
||||
|
||||
namespace po = boost::program_options;
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
using namespace std::literals;
|
||||
|
||||
po::options_description desc("pdb2cif-test options");
|
||||
desc.add_options()
|
||||
("input,i", po::value<std::string>(), "Input file")
|
||||
("help,h", "Display help message")
|
||||
("version", "Print version")
|
||||
("verbose,v", "Verbose output")
|
||||
("debug,d", po::value<int>(), "Debug level (for even more verbose output)");
|
||||
|
||||
po::positional_options_description p;
|
||||
p.add("input", 1);
|
||||
|
||||
po::variables_map vm;
|
||||
po::store(po::command_line_parser(argc, argv).options(desc).positional(p).run(), vm);
|
||||
po::notify(vm);
|
||||
|
||||
if (vm.count("version"))
|
||||
{
|
||||
std::cout << argv[0] << " version " PACKAGE_VERSION << std::endl;
|
||||
exit(0);
|
||||
}
|
||||
|
||||
if (vm.count("help") or vm.count("input") == 0)
|
||||
{
|
||||
std::cerr << desc << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
cif::VERBOSE = vm.count("verbose") != 0;
|
||||
if (vm.count("debug"))
|
||||
cif::VERBOSE = vm["debug"].as<int>();
|
||||
|
||||
std::ifstream is(vm["input"].as<std::string>());
|
||||
if (not is.is_open())
|
||||
throw std::runtime_error("Could not open file " + vm["input"].as<std::string>());
|
||||
|
||||
cif::File f;
|
||||
ReadPDBFile(is, f);
|
||||
f.save(std::cout);
|
||||
|
||||
return 0;
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user