Compare commits

...

101 Commits

Author SHA1 Message Date
Maarten L. Hekkelman
da8a72a8aa Merge branch 'develop' into trunk 2024-03-06 16:50:33 +01:00
Maarten L. Hekkelman
ac497932b5 Fix loading embedded restraint data 2024-03-06 16:50:12 +01:00
Maarten L. Hekkelman
9927b5061a Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2024-03-06 16:21:15 +01:00
Maarten L. Hekkelman
cedaab9642 load compound info from appended restraint info 2024-03-06 16:20:43 +01:00
Maarten L. Hekkelman
50bf2145ec Merge branch 'develop' into trunk 2024-03-06 15:54:56 +01:00
Maarten L. Hekkelman
dc77729f50 update changelog 2024-03-06 15:48:17 +01:00
Maarten L. Hekkelman
e3330d667a formatting 2024-03-06 13:22:00 +01:00
Maarten L. Hekkelman
9822f397a1 update libcifpp makefile 2024-03-06 12:58:50 +01:00
Maarten L. Hekkelman
a3b5ce9959 loading extra compound info changed
cif::file constructors
2024-03-06 12:33:58 +01:00
Maarten L. Hekkelman
9eb06e929a fixes in iterators 2024-03-05 15:52:36 +01:00
Maarten L. Hekkelman
629e06d647 write uncoloured text 2024-03-05 15:22:05 +01:00
Maarten L. Hekkelman
51ccb92184 small changes in constructors 2024-03-05 13:41:05 +01:00
Maarten L. Hekkelman
3cd27f13fd calculate formula_weight when missing 2024-03-05 13:02:31 +01:00
Maarten L. Hekkelman
ae668530c0 do not install files if they are not downloaded 2024-03-05 11:40:33 +01:00
Maarten L. Hekkelman
4a8b1c056c do not install files if they are not downloaded 2024-03-05 11:39:45 +01:00
Maarten L. Hekkelman
d7a5e598bc Merge branch 'develop' of github.com:pdb-redo/libcifpp into develop 2024-03-05 11:33:25 +01:00
Maarten L. Hekkelman
3f1ee32cc6 better trim 2024-03-05 11:33:19 +01:00
Maarten L. Hekkelman
725d6ead98 stupid typo 2024-02-29 14:56:45 +01:00
Maarten L. Hekkelman
baf70579de version bump 2024-02-27 15:23:51 +01:00
Maarten L. Hekkelman
cd28ab58a3 typo in error message 2024-02-27 14:40:56 +01:00
Maarten L. Hekkelman
a78fa0a81d reconstruction fixes 2024-02-27 13:36:45 +01:00
Maarten L. Hekkelman
82130be5f5 oops 2024-02-27 13:29:38 +01:00
Maarten L. Hekkelman
510ce62dfb reconstruction fixes 2024-02-27 13:24:53 +01:00
Maarten L. Hekkelman
93375a5087 Develop (#54)
* - renamed exists to contains
- fix compare for ints where item is empty

* - checking and optionally dropping ndb_poly_seq_scheme
- fix in iterator_proxy

* formatting data in reconstruction

* Version bump

* Attempt to get code compiling on macOS

* attempt 2 to build on macOS

* Added remove column

* Added rename_column
Added item_alias
Rename columns in reconstruct

* macOS...

* Fixed serious bug in emplace of both datablock and file.

* renaming field and column to item

* replace tag with item or item_name

* Fix validate pdbx

* version bump

* atom_site_anisotrop check

* - changed compound::is_known_peptide/is_know_base
- Add audit_conform only if file is really valid
- Added reconstruction code for PDBx

* pdb2cif work

* gcc diagnostics and clipper

* Fixing pdb2cif, and sequence checking

* work around bug in old gcc

* fix reconstruct sequence

* formatting

* some small optimisations

* Fix url in compound message

* Fix operator= for item_handle

* Fix operator= for item_handle

* new update_value in category

* test builds faster now

* Use Catch2 version 3 if installed

* catch22
2024-02-17 16:03:14 +01:00
Maarten L. Hekkelman
be738e7fb1 catch22 2024-02-17 15:37:53 +01:00
Maarten L. Hekkelman
9c78131df3 Use Catch2 version 3 if installed 2024-02-17 15:20:11 +01:00
Maarten L. Hekkelman
d94f6f4d19 test builds faster now 2024-02-05 13:01:28 +01:00
Maarten L. Hekkelman
9a3eced350 new update_value in category 2024-01-31 16:20:49 +01:00
Maarten L. Hekkelman
2fed7a76fb Fix operator= for item_handle 2024-01-30 16:51:12 +01:00
Maarten L. Hekkelman
f02e59df1b Fix operator= for item_handle 2024-01-30 16:40:02 +01:00
Maarten L. Hekkelman
04147a2fe9 Fix url in compound message 2024-01-30 16:39:32 +01:00
Maarten L. Hekkelman
0e83bc31dc some small optimisations 2024-01-30 16:29:22 +01:00
Maarten L. Hekkelman
75a5f7960f formatting 2024-01-30 11:12:29 +01:00
Maarten L. Hekkelman
3f93c27b07 fix reconstruct sequence 2024-01-29 17:40:24 +01:00
Maarten L. Hekkelman
ab781d4516 work around bug in old gcc 2024-01-29 16:33:53 +01:00
Maarten L. Hekkelman
446438bf8c Fixing pdb2cif, and sequence checking 2024-01-29 16:12:01 +01:00
Maarten L. Hekkelman
4e012cbd48 gcc diagnostics and clipper 2024-01-29 16:11:08 +01:00
Maarten L. Hekkelman
12ee4a792c pdb2cif work 2024-01-29 13:00:50 +01:00
Maarten L. Hekkelman
e59750386f - changed compound::is_known_peptide/is_know_base
- Add audit_conform only if file is really valid
- Added reconstruction code for PDBx
2024-01-24 16:14:08 +01:00
Maarten L. Hekkelman
4e19d54867 atom_site_anisotrop check 2024-01-23 14:08:25 +01:00
Maarten L. Hekkelman
db603e5438 version bump 2024-01-23 11:57:29 +01:00
Maarten L. Hekkelman
5320cb123a Fix validate pdbx 2024-01-23 11:57:21 +01:00
Maarten L. Hekkelman
30a2ebdbb4 Merge remote-tracking branch 'github/develop-cif2fasta' into develop 2024-01-23 11:42:12 +01:00
Maarten L. Hekkelman
a5d43998a3 replace tag with item or item_name 2024-01-23 11:41:13 +01:00
Maarten L. Hekkelman
2792caec70 renaming field and column to item 2024-01-23 11:23:20 +01:00
Maarten L. Hekkelman
fb2b1e984c Fixed serious bug in emplace of both datablock and file. 2024-01-22 16:04:57 +01:00
Maarten L. Hekkelman
13ab1caf95 macOS... 2024-01-22 15:33:43 +01:00
Maarten L. Hekkelman
5d4534fac4 Added rename_column
Added item_alias
Rename columns in reconstruct
2024-01-22 15:15:11 +01:00
Maarten L. Hekkelman
f450643861 Added remove column 2024-01-22 14:03:22 +01:00
Maarten L. Hekkelman
fc14a65511 attempt 2 to build on macOS 2024-01-22 13:08:25 +01:00
Maarten L. Hekkelman
bbd1e27c5e Attempt to get code compiling on macOS 2024-01-22 12:49:11 +01:00
Maarten L. Hekkelman
369a83b718 Version bump 2024-01-22 12:00:38 +01:00
Maarten L. Hekkelman
afc541b956 Merge remote-tracking branch 'github/develop-cif2fasta' into develop 2024-01-22 12:00:11 +01:00
Maarten L. Hekkelman
7e4d2ffb4d formatting data in reconstruction 2024-01-22 11:43:56 +01:00
Maarten L. Hekkelman
e09913a94f - checking and optionally dropping ndb_poly_seq_scheme
- fix in iterator_proxy
2024-01-22 11:18:00 +01:00
Maarten L. Hekkelman
b4d1c4cc04 - renamed exists to contains
- fix compare for ints where item is empty
2024-01-17 16:31:18 +01:00
Maarten L. Hekkelman
22537c0e7e Merge branch 'develop' into trunk 2024-01-09 16:45:46 +01:00
Maarten L. Hekkelman
39c0db8d6a Do not validate on copy category 2024-01-09 16:29:53 +01:00
Maarten L. Hekkelman
9db12761f7 Increase version to reflect changes 2024-01-04 13:14:36 +01:00
Maarten L. Hekkelman
0f8a7c4817 Fix design flaw in category_index (when moving categories) 2024-01-04 13:10:09 +01:00
Maarten L. Hekkelman
47e59a55c5 for compatibility with gcc 9.4 2024-01-03 12:35:22 +01:00
Maarten L. Hekkelman
b3496f4e5d fixes in pdbx validation, compound one letter code 2024-01-03 12:31:42 +01:00
Maarten L. Hekkelman
e866228afd In reconstruct, use embedded compound info 2024-01-03 09:43:13 +01:00
Maarten L. Hekkelman
4aeaa5251e revert change to string_view, not working on macOS 2024-01-03 09:26:32 +01:00
Maarten L. Hekkelman
b36988e64a update changelog 2024-01-03 09:18:08 +01:00
Maarten L. Hekkelman
393aefce8f small optimisation 2024-01-03 08:51:06 +01:00
Maarten L. Hekkelman
227ff1b8be sequence recovery and validation 2024-01-02 16:54:04 +01:00
Maarten L. Hekkelman
82086a93b0 PDBx validation and reconstruction code, take 1 2024-01-02 15:27:26 +01:00
Maarten L. Hekkelman
abd97cc1c9 Merge 2024-01-02 10:52:09 +01:00
Maarten L. Hekkelman
3315fae83e Merge branch 'cif2fasta-develop' into develop-cif2fasta 2024-01-02 10:51:11 +01:00
Maarten L. Hekkelman
d8c3c3f7f0 formatting 2024-01-02 10:44:17 +01:00
Maarten L. Hekkelman
23459879f8 export CIFPP_SHARE_DIR to parent scope in case we're included using sub_directory 2024-01-02 09:43:33 +01:00
Maarten L. Hekkelman
f1ca916d58 Merge remote-tracking branch 'origin/develop' into develop-cif2fasta 2023-12-27 14:10:25 +01:00
Maarten L. Hekkelman
6aae012ae5 Fix url in missing compound message 2023-12-27 09:55:35 +01:00
Maarten L. Hekkelman
516983427a Load inline compound info as restraints, not CCD info 2023-12-20 13:50:27 +01:00
Maarten L. Hekkelman
05d78c92f9 Update changelog 2023-12-20 11:52:49 +01:00
Maarten L. Hekkelman
dc57144472 delete garbage 2023-12-20 11:51:27 +01:00
Maarten L. Hekkelman
dd260ca45e Change order of categories in input and in output
- input matches order in file from now on
- output is ordered by parent/child relations
2023-12-20 11:51:15 +01:00
Maarten L. Hekkelman
3bc2fc4151 add missing get 2023-12-20 11:50:29 +01:00
Maarten L. Hekkelman
6c58eaa7e8 Update changelog 2023-12-13 16:28:37 +01:00
Maarten L. Hekkelman
e1a1c11a01 Add formula_weight to entity in pdb2cif 2023-12-13 16:27:41 +01:00
Maarten L. Hekkelman
95a6b4264d typo in doc 2023-12-12 09:50:04 +01:00
Maarten L. Hekkelman
4782a4e07d Merge branch 'develop' into trunk 2023-12-12 09:31:30 +01:00
Maarten L. Hekkelman
dbc14206dc New readme 2023-12-12 09:21:59 +01:00
Maarten L. Hekkelman
f4296d8858 docs ci 2023-12-12 08:26:44 +01:00
Maarten L. Hekkelman
75c4c2286d readme, docs ci 2023-12-12 08:08:40 +01:00
Maarten L. Hekkelman
b14237e8e6 Not nice 2023-12-05 16:01:20 +01:00
Maarten L. Hekkelman
df3263e4bd test 2023-12-05 15:57:51 +01:00
Maarten L. Hekkelman
ff7b413abf windows gunzip workaround 2023-12-05 15:43:52 +01:00
Maarten L. Hekkelman
07224779e6 find_program works a bit different than expected 2023-12-05 15:26:26 +01:00
Maarten L. Hekkelman
c031a3c24e Moved data file components.cif to rsrc 2023-12-05 11:53:39 +01:00
Maarten L. Hekkelman
1e74f7912c using eigen3 2023-12-05 08:59:10 +01:00
Maarten L. Hekkelman
d91f8d0876 Use fetchcontent for eigen, again 2023-12-04 18:59:03 +01:00
Maarten L. Hekkelman
43d4644fba fix license badge 2023-12-03 15:32:55 +01:00
Maarten L. Hekkelman
61a924d208 attempt to build documentation in github 2023-12-03 15:30:40 +01:00
Maarten L. Hekkelman
2692f2c1bf no update script for macOS
install with sudo in CI
2023-11-29 15:22:59 +01:00
Maarten L. Hekkelman
88b3c87bae fixes for automatic update of data 2023-11-29 14:37:28 +01:00
Maarten L. Hekkelman
24fe6ee583 Unused comparison result... tssk 2023-11-22 14:15:51 +01:00
Maarten L. Hekkelman
b83ef112a9 Fix in structure::change_residue 2023-11-13 10:50:27 +01:00
Maarten L. Hekkelman
dd9110a3a8 Add option to not write data files 2023-11-07 16:48:44 +01:00
Maarten L. Hekkelman
88c23e1b0f Add option to not write data files 2023-11-07 16:38:47 +01:00
60 changed files with 23261 additions and 2082 deletions

View File

@@ -0,0 +1,65 @@
# This starter workflow is for a CMake project running on multiple platforms. There is a different starter workflow if you just want a single platform.
# See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-single-platform.yml
name: publish docs
on:
push:
branches: [ "trunk" ]
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
docs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v1
- name: Set reusable strings
# Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
id: strings
shell: bash
run: |
echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT"
- name: Install dependencies Ubuntu
run: sudo apt-get update && sudo apt-get install cmake doxygen
- uses: actions/setup-python@v4
with:
python-version: '3.9'
cache: 'pip' # caching pip dependencies
- run: pip install -r docs/requirements.txt
- name: Configure CMake
run: cmake -S . -B ${{ steps.strings.outputs.build-output-dir }} -DBUILD_DOCUMENTATION=ON -DBUILD_TESTING=OFF
- name: Run Sphinx
run: |
cmake --build ${{ steps.strings.outputs.build-output-dir }} --target Sphinx-libcifpp
ls -l ${{ steps.strings.outputs.build-output-dir }}
ls -l ${{ steps.strings.outputs.build-output-dir }}/docs/sphinx
- name: Upload artifact
uses: actions/upload-pages-artifact@v2
with:
path: ${{ steps.strings.outputs.build-output-dir }}/docs/sphinx
deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
needs: docs
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v2

View File

@@ -45,13 +45,15 @@ jobs:
cmake -B ${{ steps.strings.outputs.build-output-dir }}
-DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }}
-DCMAKE_BUILD_TYPE=Release
-DCIFPP_DOWNLOAD_CCD=OFF
-S ${{ github.workspace }}
- name: Build
run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config Release
- name: Test
working-directory: ${{ steps.strings.outputs.build-output-dir }}
run: ctest --build-config Release --output-on-failure
- name: Install
if: matrix.os != 'windows-latest'
run: sudo cmake --install ${{ steps.strings.outputs.build-output-dir }} --config Release

5
.gitignore vendored
View File

@@ -2,7 +2,7 @@ build/
.vscode/
.vs/
tools/update-libcifpp-data
data/components.cif*
rsrc/components.cif*
CMakeSettings.json
msvc/
src/revision.hpp
@@ -11,4 +11,5 @@ Testing/
include/cif++/exports.hpp
docs/api
docs/conf.py
build_ci/
build_ci/
data/components.cif

View File

@@ -11,21 +11,24 @@
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required(VERSION 3.16)
# set the project name
project(libcifpp VERSION 6.0.0 LANGUAGES CXX)
project(
libcifpp
VERSION 7.0.1
LANGUAGES CXX)
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
@@ -50,7 +53,9 @@ if(CMAKE_GENERATOR STREQUAL "Ninja Multi-Config")
endif()
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers"
)
elseif(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
endif()
@@ -66,25 +71,26 @@ if(NOT(BUILD_FOR_CCP4 AND WIN32))
option(BUILD_SHARED_LIBS "Build a shared library instead of a static one" OFF)
endif()
# Lots of code depend on the availability of the components.cif file
option(CIFPP_DOWNLOAD_CCD "Download the CCD file components.cif during installation" ON)
if(BUILD_FOR_CCP4)
unset(CIFPP_DOWNLOAD_CCD)
unset(CIFPP_INSTALL_UPDATE_SCRIPT)
else()
# Lots of code depend on the availability of the components.cif file
option(CIFPP_DOWNLOAD_CCD
"Download the CCD file components.cif during installation" ON)
# An optional cron script can be installed to keep the data files up-to-date
if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux" AND NOT BUILD_FOR_CCP4)
option(CIFPP_INSTALL_UPDATE_SCRIPT "Install the script to update CCD and dictionary files" ON)
# An optional cron script can be installed to keep the data files up-to-date
if(UNIX AND NOT APPLE)
option(CIFPP_INSTALL_UPDATE_SCRIPT
"Install the script to update CCD and dictionary files" ON)
endif()
endif()
# When CCP4 is sourced in the environment, we can recreate the symmetry operations table
if(EXISTS "$ENV{CCP4}")
if(EXISTS "$ENV{CCP4}/lib/data/syminfo.lib")
option(CIFPP_RECREATE_SYMOP_DATA "Recreate SymOp data table in case it is out of date" ON)
else()
set(CIFPP_RECREATE_SYMOP_DATA OFF)
message(WARNING "Symop data table recreation requested, but file syminfo.lib was not found in $ENV{CLIBD}")
endif()
else()
set(CIFPP_RECREATE_SYMOP_DATA OFF)
message("Not trying to recreate symop_table_data.hpp since CCP4 is not defined")
# When CCP4 is sourced in the environment, we can recreate the symmetry
# operations table
if(EXISTS "$ENV{CCP4}/lib/data/syminfo.lib")
option(CIFPP_RECREATE_SYMOP_DATA
"Recreate SymOp data table in case it is out of date" ON)
endif()
# CCP4 build
@@ -141,23 +147,30 @@ endif()
# Libraries
# Start by finding out if std:regex is usable. Note that the current
# implementation in GCC is not acceptable, it crashes on long lines.
# The implementation in libc++ (clang) and MSVC seem to be OK.
check_cxx_source_compiles("
# implementation in GCC is not acceptable, it crashes on long lines. The
# implementation in libc++ (clang) and MSVC seem to be OK.
check_cxx_source_compiles(
"
#include <iostream>
#ifndef __GLIBCXX__
#error
#endif
int main(int argc, char *argv[]) { return 0; }" GXX_LIBSTDCPP)
int main(int argc, char *argv[]) { return 0; }"
GXX_LIBSTDCPP)
if(GXX_LIBSTDCPP)
message(STATUS "Testing for known regex bug, since you're using GNU libstdc++")
message(
STATUS "Testing for known regex bug, since you're using GNU libstdc++")
try_run(STD_REGEX_RUNNING STD_REGEX_COMPILING
${CMAKE_CURRENT_BINARY_DIR}/test ${PROJECT_SOURCE_DIR}/cmake/test-rx.cpp)
${CMAKE_CURRENT_BINARY_DIR}/test
${PROJECT_SOURCE_DIR}/cmake/test-rx.cpp)
if(STD_REGEX_RUNNING STREQUAL FAILED_TO_RUN)
message(STATUS "You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will use boost::regex instead")
message(
STATUS
"You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will use boost::regex instead"
)
find_package(Boost 1.80 QUIET COMPONENTS regex)
@@ -167,8 +180,7 @@ if(GXX_LIBSTDCPP)
FetchContent_Declare(
boost-rx
GIT_REPOSITORY https://github.com/boostorg/regex
GIT_TAG boost-1.83.0
)
GIT_TAG boost-1.83.0)
FetchContent_MakeAvailable(boost-rx)
endif()
@@ -182,8 +194,8 @@ set(THREADS_PREFER_PTHREAD_FLAG)
find_package(Threads)
if(MSVC)
# Avoid linking the shared library of zlib
# Search ZLIB_ROOT first if it is set.
# Avoid linking the shared library of zlib Search ZLIB_ROOT first if it is
# set.
if(ZLIB_ROOT)
set(_ZLIB_SEARCH_ROOT PATHS ${ZLIB_ROOT} NO_DEFAULT_PATH)
list(APPEND _ZLIB_SEARCHES _ZLIB_SEARCH_ROOT)
@@ -193,8 +205,7 @@ if(MSVC)
set(_ZLIB_x86 "(x86)")
set(_ZLIB_SEARCH_NORMAL
PATHS "[HKEY_LOCAL_MACHINE\\SOFTWARE\\GnuWin32\\Zlib;InstallPath]"
"$ENV{ProgramFiles}/zlib"
"$ENV{ProgramFiles${_ZLIB_x86}}/zlib")
"$ENV{ProgramFiles}/zlib" "$ENV{ProgramFiles${_ZLIB_x86}}/zlib")
unset(_ZLIB_x86)
list(APPEND _ZLIB_SEARCHES _ZLIB_SEARCH_NORMAL)
@@ -203,26 +214,37 @@ if(MSVC)
endif()
foreach(search ${_ZLIB_SEARCHES})
find_library(ZLIB_LIBRARY NAMES zlibstatic NAMES_PER_DIR ${${search}} PATH_SUFFIXES lib)
find_library(
ZLIB_LIBRARY
NAMES zlibstatic NAMES_PER_DIR ${${search}}
PATH_SUFFIXES lib)
endforeach()
endif()
find_package(ZLIB REQUIRED)
find_package(Eigen3 QUIET)
# Using Eigen3 is a bit of a thing. We don't want to build it completely since
# we only need a couple of header files. Nothing special. But often, eigen3 is
# already installed and then we prefer that.
find_package(Eigen3 3.4 QUIET)
if(Eigen3_FOUND)
get_target_property(EIGEN_INCLUDE_DIR Eigen3::Eigen INTERFACE_INCLUDE_DIRECTORIES)
if(Eigen3_FOUND AND TARGET Eigen3::Eigen)
get_target_property(EIGEN_INCLUDE_DIR Eigen3::Eigen
INTERFACE_INCLUDE_DIRECTORIES)
else()
ExternalProject_Add(
local_Eigen3
# Create a private copy of eigen3 and populate it only, no need to build
FetchContent_Declare(
my-eigen3
GIT_REPOSITORY https://gitlab.com/libeigen/eigen.git
GIT_TAG 3.4.0
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/external
)
GIT_TAG 3.4.0)
set(EIGEN3_D local_Eigen3)
set(EIGEN_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/include/eigen3)
FetchContent_GetProperties(my-eigen3)
if(NOT my-eigen3_POPULATED)
FetchContent_Populate(my-eigen3)
endif()
set(EIGEN_INCLUDE_DIR ${my-eigen3_SOURCE_DIR})
endif()
include(FindFilesystem)
@@ -238,17 +260,20 @@ write_version_header(${PROJECT_SOURCE_DIR}/src/ LIB_NAME "LibCIFPP")
# SymOp data table
if(CIFPP_RECREATE_SYMOP_DATA)
# The tool to create the table
add_executable(symop-map-generator "${PROJECT_SOURCE_DIR}/src/symop-map-generator.cpp")
add_executable(symop-map-generator
"${PROJECT_SOURCE_DIR}/src/symop-map-generator.cpp")
add_custom_command(
OUTPUT ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
COMMAND $<TARGET_FILE:symop-map-generator> $ENV{CLIBD}/syminfo.lib $ENV{CLIBD}/symop.lib ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
)
COMMAND
$<TARGET_FILE:symop-map-generator> $ENV{CLIBD}/syminfo.lib
$ENV{CLIBD}/symop.lib ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp)
add_custom_target(
OUTPUT ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib" "$ENV{CLIBD}/symop.lib"
)
OUTPUT
${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib"
"$ENV{CLIBD}/symop.lib")
endif()
# Sources
@@ -264,62 +289,71 @@ set(project_sources
${PROJECT_SOURCE_DIR}/src/validate.cpp
${PROJECT_SOURCE_DIR}/src/text.cpp
${PROJECT_SOURCE_DIR}/src/utilities.cpp
${PROJECT_SOURCE_DIR}/src/atom_type.cpp
${PROJECT_SOURCE_DIR}/src/compound.cpp
${PROJECT_SOURCE_DIR}/src/point.cpp
${PROJECT_SOURCE_DIR}/src/symmetry.cpp
${PROJECT_SOURCE_DIR}/src/model.cpp
${PROJECT_SOURCE_DIR}/src/pdb/cif2pdb.cpp
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif.cpp
${PROJECT_SOURCE_DIR}/src/pdb/pdb_record.hpp
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.hpp
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.cpp
${PROJECT_SOURCE_DIR}/src/pdb/reconstruct.cpp
${PROJECT_SOURCE_DIR}/src/pdb/validate-pdbx.cpp
)
set(project_headers
${PROJECT_SOURCE_DIR}/include/cif++.hpp
${PROJECT_SOURCE_DIR}/include/cif++/utilities.hpp
${PROJECT_SOURCE_DIR}/include/cif++/item.hpp
${PROJECT_SOURCE_DIR}/include/cif++/datablock.hpp
${PROJECT_SOURCE_DIR}/include/cif++/file.hpp
${PROJECT_SOURCE_DIR}/include/cif++/validate.hpp
${PROJECT_SOURCE_DIR}/include/cif++/iterator.hpp
${PROJECT_SOURCE_DIR}/include/cif++/parser.hpp
${PROJECT_SOURCE_DIR}/include/cif++/forward_decl.hpp
${PROJECT_SOURCE_DIR}/include/cif++/dictionary_parser.hpp
${PROJECT_SOURCE_DIR}/include/cif++/condition.hpp
${PROJECT_SOURCE_DIR}/include/cif++/category.hpp
${PROJECT_SOURCE_DIR}/include/cif++/row.hpp
include/cif++.hpp
include/cif++/utilities.hpp
include/cif++/item.hpp
include/cif++/datablock.hpp
include/cif++/file.hpp
include/cif++/validate.hpp
include/cif++/iterator.hpp
include/cif++/parser.hpp
include/cif++/forward_decl.hpp
include/cif++/dictionary_parser.hpp
include/cif++/condition.hpp
include/cif++/category.hpp
include/cif++/row.hpp
include/cif++/atom_type.hpp
include/cif++/compound.hpp
include/cif++/point.hpp
include/cif++/symmetry.hpp
include/cif++/model.hpp
include/cif++/pdb.hpp
include/cif++/pdb/cif2pdb.hpp
include/cif++/pdb/io.hpp
include/cif++/pdb/pdb2cif.hpp
include/cif++/pdb/tls.hpp)
${PROJECT_SOURCE_DIR}/include/cif++/atom_type.hpp
${PROJECT_SOURCE_DIR}/include/cif++/compound.hpp
${PROJECT_SOURCE_DIR}/include/cif++/point.hpp
${PROJECT_SOURCE_DIR}/include/cif++/symmetry.hpp
add_library(cifpp STATIC)
add_library(cifpp::cifpp ALIAS cifpp)
${PROJECT_SOURCE_DIR}/include/cif++/model.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/cif2pdb.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/io.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/tls.hpp
target_sources(cifpp
PRIVATE ${project_sources}
${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
PUBLIC
FILE_SET cifpp_headers TYPE HEADERS
BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include
FILES ${project_headers}
)
add_library(cifpp ${project_sources} ${project_headers} ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp)
add_library(cifpp::cifpp ALIAS cifpp)
# The code now really requires C++20
target_compile_features(cifpp PUBLIC cxx_std_20)
set(CMAKE_DEBUG_POSTFIX d)
set_target_properties(cifpp PROPERTIES DEBUG_POSTFIX "d")
generate_export_header(cifpp EXPORT_FILE_NAME ${PROJECT_SOURCE_DIR}/include/cif++/exports.hpp)
generate_export_header(cifpp EXPORT_FILE_NAME
${PROJECT_SOURCE_DIR}/include/cif++/exports.hpp)
if(BOOST_REGEX)
target_compile_definitions(cifpp PRIVATE USE_BOOST_REGEX=1 BOOST_REGEX_STANDALONE=1)
get_target_property(BOOST_REGEX_INCLUDE_DIR Boost::regex INTERFACE_INCLUDE_DIRECTORIES)
target_compile_definitions(cifpp PRIVATE USE_BOOST_REGEX=1
BOOST_REGEX_STANDALONE=1)
get_target_property(BOOST_REGEX_INCLUDE_DIR Boost::regex
INTERFACE_INCLUDE_DIRECTORIES)
endif()
if(MSVC)
@@ -328,20 +362,14 @@ endif()
set_target_properties(cifpp PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(cifpp
PUBLIC
"$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>"
target_include_directories(
cifpp
PUBLIC "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
PRIVATE
"${EIGEN_INCLUDE_DIR}"
"${BOOST_REGEX_INCLUDE_DIR}"
)
PRIVATE "${BOOST_REGEX_INCLUDE_DIR}" "${EIGEN_INCLUDE_DIR}")
target_link_libraries(cifpp PUBLIC Threads::Threads ZLIB::ZLIB ${CIFPP_REQUIRED_LIBRARIES})
if(${EIGEN3_D})
add_dependencies(cifpp ${EIGEN3_D})
endif()
target_link_libraries(cifpp PUBLIC Threads::Threads ZLIB::ZLIB
${CIFPP_REQUIRED_LIBRARIES})
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
@@ -349,7 +377,7 @@ endif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
if(CIFPP_DOWNLOAD_CCD)
# download the components.cif file from CCD
set(COMPONENTS_CIF ${PROJECT_SOURCE_DIR}/data/components.cif)
set(COMPONENTS_CIF ${PROJECT_SOURCE_DIR}/rsrc/components.cif)
if(EXISTS ${COMPONENTS_CIF})
file(SIZE ${COMPONENTS_CIF} CCD_FILE_SIZE)
@@ -361,70 +389,83 @@ if(CIFPP_DOWNLOAD_CCD)
endif()
if(NOT EXISTS ${COMPONENTS_CIF})
if(NOT EXISTS ${PROJECT_SOURCE_DIR}/data)
file(MAKE_DIRECTORY ${PROJECT_SOURCE_DIR}/data/)
endif()
# Since the file(DOWNLOAD) command in cmake does not use
# compression, we try to download the gzipped version and
# decompress it ourselves.
# Since the file(DOWNLOAD) command in cmake does not use compression, we try
# to download the gzipped version and decompress it ourselves.
find_program(GUNZIP gunzip)
if(GUNZIP)
if(WIN32 OR GUNZIP STREQUAL "GUNZIP-NOTFOUND")
file(
DOWNLOAD https://files.wwpdb.org/pub/pdb/data/monomers/components.cif
${COMPONENTS_CIF}
SHOW_PROGRESS
STATUS CCD_FETCH_STATUS)
else()
if(NOT EXISTS "${COMPONENTS_CIF}.gz")
file(DOWNLOAD https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz ${COMPONENTS_CIF}.gz
SHOW_PROGRESS STATUS CCD_FETCH_STATUS)
file(
DOWNLOAD
https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz
${COMPONENTS_CIF}.gz
SHOW_PROGRESS
STATUS CCD_FETCH_STATUS)
endif()
add_custom_command(OUTPUT ${COMPONENTS_CIF}
COMMAND ${GUNZIP} ${COMPONENTS_CIF}.gz
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/data/)
else()
file(DOWNLOAD https://files.wwpdb.org/pub/pdb/data/monomers/components.cif ${COMPONENTS_CIF}
SHOW_PROGRESS STATUS CCD_FETCH_STATUS)
add_custom_command(
OUTPUT ${COMPONENTS_CIF}
COMMAND "${GUNZIP}" ${COMPONENTS_CIF}.gz
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/rsrc/)
add_custom_target(COMPONENTS ALL DEPENDS ${COMPONENTS_CIF})
endif()
# Do not continue if downloading went wrong
list(POP_FRONT CCD_FETCH_STATUS CCD_FETCH_STATUS_CODE)
if(CCD_FETCH_STATUS_CODE)
message(FATAL_ERROR "Error trying to download CCD file: ${CCD_FETCH_STATUS}")
message(
FATAL_ERROR "Error trying to download CCD file: ${CCD_FETCH_STATUS}")
endif()
endif()
add_custom_target(COMPONENTS ALL DEPENDS ${COMPONENTS_CIF})
endif()
# Installation directories
if(BUILD_FOR_CCP4)
set(CIFPP_DATA_DIR "$ENV{CCP4}/share/libcifpp" CACHE PATH "Directory where dictionary and other static data is stored")
set(CIFPP_DATA_DIR
"$ENV{CCP4}/share/libcifpp"
CACHE PATH "Directory where dictionary and other static data is stored")
else()
set(CIFPP_DATA_DIR "${CMAKE_INSTALL_FULL_DATADIR}/libcifpp" CACHE PATH "Directory where dictionary and other static data is stored")
set(CIFPP_DATA_DIR
"${CMAKE_INSTALL_FULL_DATADIR}/libcifpp"
CACHE PATH "Directory where dictionary and other static data is stored")
endif()
target_compile_definitions(cifpp PUBLIC DATA_DIR="${CIFPP_DATA_DIR}")
if(CIFPP_DATA_DIR)
target_compile_definitions(cifpp PUBLIC DATA_DIR="${CIFPP_DATA_DIR}")
endif()
if(UNIX AND NOT BUILD_FOR_CCP4)
if("${CMAKE_INSTALL_PREFIX}" STREQUAL "/usr/local")
set(CIFPP_CACHE_DIR "/var/cache/libcifpp" CACHE PATH "The directory where downloaded data files are stored")
set(CIFPP_CACHE_DIR
"/var/cache/libcifpp"
CACHE PATH "The directory where downloaded data files are stored")
else()
set(CIFPP_CACHE_DIR "${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/cache/libcifpp" CACHE PATH "The directory where downloaded data files are stored")
set(CIFPP_CACHE_DIR
"${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/cache/libcifpp"
CACHE PATH "The directory where downloaded data files are stored")
endif()
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
set(CIFPP_ETC_DIR "${CMAKE_INSTALL_FULL_SYSCONFDIR}" CACHE PATH "The directory where the update configuration file is stored")
set(CIFPP_ETC_DIR
"${CMAKE_INSTALL_FULL_SYSCONFDIR}"
CACHE PATH "The directory where the update configuration file is stored")
else()
unset(CIFPP_CACHE_DIR)
endif()
# Install rules
install(TARGETS cifpp
EXPORT cifpp-targets
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
EXPORT cifpp
FILE_SET cifpp_headers DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if(MSVC AND BUILD_SHARED_LIBS)
install(
@@ -439,153 +480,112 @@ file(GLOB OLD_CONFIG_FILES
${CMAKE_INSTALL_FULL_LIBDIR}/cmake/cifpp/cifppTargets*.cmake)
if(OLD_CONFIG_FILES)
message(STATUS "Installation will remove old config files: ${OLD_CONFIG_FILES}")
message(
STATUS "Installation will remove old config files: ${OLD_CONFIG_FILES}")
install(CODE "file(REMOVE ${OLD_CONFIG_FILES})")
endif()
install(EXPORT cifpp-targets
FILE "cifpp-targets.cmake"
install(EXPORT cifpp
NAMESPACE cifpp::
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
)
FILE "cifpp-targets.cmake"
DESTINATION lib/cmake/cifpp)
install(
DIRECTORY include/cif++
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
COMPONENT Devel
)
install(
FILES include/cif++.hpp
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
COMPONENT Devel
)
install(FILES
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic
${COMPONENTS_CIF}
DESTINATION ${CIFPP_DATA_DIR}
)
if(CIFPP_CACHE_DIR)
install(FILES
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
if(CIFPP_DATA_DIR AND CIFPP_DOWNLOAD_CCD)
install(
FILES ${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic
${COMPONENTS_CIF}
DESTINATION ${CIFPP_CACHE_DIR}
)
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic ${COMPONENTS_CIF}
DESTINATION ${CIFPP_DATA_DIR})
endif()
if(CIFPP_CACHE_DIR AND CIFPP_DOWNLOAD_CCD)
install(
FILES ${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic ${COMPONENTS_CIF}
DESTINATION ${CIFPP_CACHE_DIR})
endif()
set(CONFIG_TEMPLATE_FILE ${PROJECT_SOURCE_DIR}/cmake/cifpp-config.cmake.in)
configure_package_config_file(
${CONFIG_TEMPLATE_FILE}
${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
PATH_VARS CIFPP_DATA_DIR
)
${CONFIG_TEMPLATE_FILE} ${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake
INSTALL_DESTINATION lib/cmake/cifpp
PATH_VARS CIFPP_DATA_DIR)
install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake"
install(
FILES "${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config-version.cmake"
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
COMPONENT Devel
)
DESTINATION lib/cmake/cifpp)
set_target_properties(cifpp PROPERTIES
VERSION ${PROJECT_VERSION}
set_target_properties(
cifpp
PROPERTIES VERSION ${PROJECT_VERSION}
SOVERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}"
INTERFACE_cifpp_MAJOR_VERSION ${PROJECT_VERSION_MAJOR})
set_property(TARGET cifpp APPEND PROPERTY
COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION
)
set_property(
TARGET cifpp
APPEND
PROPERTY COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION)
write_basic_package_version_file(
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config-version.cmake"
VERSION ${PROJECT_VERSION}
COMPATIBILITY AnyNewerVersion
)
COMPATIBILITY AnyNewerVersion)
# In case we're included as sub_directory:
if(NOT PROJECT_IS_TOP_LEVEL)
set(CIFPP_SHARE_DIR ${PROJECT_SOURCE_DIR}/rsrc PARENT_SCOPE)
endif()
if(BUILD_TESTING)
# We're using the older version 2 of Catch2
FetchContent_Declare(
Catch2
GIT_REPOSITORY https://github.com/catchorg/Catch2.git
GIT_TAG v2.13.9
)
FetchContent_MakeAvailable(Catch2)
list(APPEND CIFPP_tests
unit-v2
unit-3d
format
model
rename-compound
sugar
spinner
)
foreach(CIFPP_TEST IN LISTS CIFPP_tests)
set(CIFPP_TEST "${CIFPP_TEST}-test")
set(CIFPP_TEST_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/test/${CIFPP_TEST}.cpp")
add_executable(${CIFPP_TEST} ${CIFPP_TEST_SOURCE} "${CMAKE_CURRENT_SOURCE_DIR}/test/test-main.cpp")
target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp::cifpp Catch2::Catch2)
target_include_directories(${CIFPP_TEST} PRIVATE ${EIGEN_INCLUDE_DIR})
if(MSVC)
# Specify unwind semantics so that MSVC knowns how to handle exceptions
target_compile_options(${CIFPP_TEST} PRIVATE /EHsc)
endif()
add_custom_target("run-${CIFPP_TEST}" DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch ${CIFPP_TEST})
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch
COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir ${CMAKE_CURRENT_SOURCE_DIR}/test)
add_test(NAME ${CIFPP_TEST}
COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir ${CMAKE_CURRENT_SOURCE_DIR}/test)
endforeach()
add_subdirectory(test)
endif()
# Optionally install the update scripts for CCD and dictionary files
if(CIFPP_INSTALL_UPDATE_SCRIPT)
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAME} STREQUAL "GNU")
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAME} STREQUAL
"GNU")
if("${CMAKE_INSTALL_PREFIX}" STREQUAL "/usr/local")
set(CIFPP_CRON_DIR "/etc/cron.weekly" CACHE PATH "The cron directory, for the update script")
set(CIFPP_CRON_DIR
"/etc/cron.weekly"
CACHE PATH "The cron directory, for the update script")
else()
set(CIFPP_CRON_DIR "${CIFPP_ETC_DIR}/cron.weekly" CACHE PATH "The cron directory, for the update script")
set(CIFPP_CRON_DIR
"${CIFPP_ETC_DIR}/cron.weekly"
CACHE PATH "The cron directory, for the update script")
endif()
elseif(UNIX) # assume all others are like FreeBSD...
set(CIFPP_CRON_DIR "${CIFPP_ETC_DIR}/periodic/weekly" CACHE PATH "The cron directory, for the update script")
elseif(${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD")
set(CIFPP_CRON_DIR
"${CIFPP_ETC_DIR}/periodic/weekly"
CACHE PATH "The cron directory, for the update script")
else()
message(FATAL_ERROR "Don't know where to install the update script")
endif()
configure_file(${PROJECT_SOURCE_DIR}/tools/update-libcifpp-data.in update-libcifpp-data @ONLY)
configure_file(${PROJECT_SOURCE_DIR}/tools/update-libcifpp-data.in
update-libcifpp-data @ONLY)
install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/update-libcifpp-data
DESTINATION ${CIFPP_CRON_DIR}
PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ
)
PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE
WORLD_READ)
install(DIRECTORY DESTINATION ${CIFPP_CACHE_DIR})
# a config file, to make it complete
if(NOT EXISTS "${CIFPP_ETC_DIR}/libcifpp.conf")
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf [[# Uncomment the next line to enable automatic updates
file(
WRITE ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf
[[# Uncomment the next line to enable automatic updates
# update=true
]])
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf DESTINATION "${CIFPP_ETC_DIR}")
install(CODE "message(\"A configuration file has been written to ${CIFPP_ETC_DIR}/libcifpp.conf, please edit this file to enable automatic updates\")")
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf
DESTINATION "${CIFPP_ETC_DIR}")
install(
CODE "message(\"A configuration file has been written to ${CIFPP_ETC_DIR}/libcifpp.conf, please edit this file to enable automatic updates\")"
)
install(DIRECTORY DESTINATION "${CIFPP_ETC_DIR}/libcifpp/cache-update.d")
endif()
@@ -602,7 +602,7 @@ set(CPACK_SOURCE_TGZ ON)
set(CPACK_SOURCE_TBZ2 OFF)
set(CPACK_SOURCE_TXZ OFF)
set(CPACK_SOURCE_TZ OFF)
set(CPACK_SOURCE_IGNORE_FILES "/data/components.cif;/build;/.vscode;/.git")
set(CPACK_SOURCE_IGNORE_FILES "/rsrc/components.cif;/build;/.vscode;/.git")
set(CPACK_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PROJECT_VERSION}")
set(CPACK_SOURCE_PACKAGE_FILE_NAME ${CPACK_PACKAGE_FILE_NAME})
include(CPack)

162
README.md
View File

@@ -1,10 +1,34 @@
[![github CI](https://github.com/pdb-redo/libcifpp/actions/workflows/cmake-multi-platform.yml/badge.svg)](https://github.com/pdb-redo/libcifpp/actions)
[![GitHub License](https://img.shields.io/github/license/pdb-redo/libcifpp)](https://github.com/pdb-redo/libcifpp/LICENSE)
# libcifpp
This library contains code to work with mmCIF and legacy PDB files.
As the name implies, this library was originally written to work with mmCIF files
using C++ as programming language. The design of this library leanes heavily on
the structure of CIF files. These files can be thought of as a text dump of a
relational databank with, often but not always, a very strict schema describing
the data. These schema's are called dictionaries.
Using information from the content of a mmCIF file and an optional schema,
libcifpp allows you to access the data in the file as a collection of datablock
each containing a collection of categories with rows of data. The categories can
be searched for data using queries written in regular C++ syntax. When a dictionary
was specified, inserted data is checked for validity. Likewise removal of data
may result in cascaded removal of linked data in other categories using
parent/child relationship information.
Since there were still many programs using the legacy PDB format at the time
development started, a layer was added that converts data to and from PDB format
into mmCIF format. This means you can manipulate PDB files as if they were
normal mmCIF files.
Apart from this basic functionality, libcifpp also offers code to help with
symmetry calculations, 3d manipulations and obtaining information from the CCD
[Chemical Component Dictionary](https://www.wwpdb.org/data/ccd).
## Documentation
The documentation can be found at https://www.hekkelman.com/libcifpp-doc/
The documentation can be found at [github.io](https://pdb-redo.github.io/libcifpp/)
## Synopsis
@@ -64,54 +88,138 @@ int main(int argc, char *argv[])
You might be able to use libcifpp from a package manager used by your
OS distribution. But most likely this package will be out-of-date.
Therefore it is recommended to build *libcifpp* from code. It is not
hard to do.
hard to do. But it is recommended to read the following instructions
carefully.
### Requirements
The code for this library was written in C++17. You therefore need a
recent compiler to build it. For the development gcc 9.4 and clang 9.0
recent compiler to build it. For the development gcc >= 9.4 and clang >= 9.0
have been used as well as MSVC version 2019.
Other requirements are:
The other requirement you really need to have installed on your computer
is a version of [CMake](https://cmake.org). For now the minimum version
is 3.16 but that may soon change into a higher version. You should also
install the gui version of CMake to set build options easily, on Debian
I prefer to use the curses version installed with `cmake-curses-gui`.
It is very useful to have [mrc](https://github.com/mhekkel/mrc) available.
However, this is only an option if you use Windows or an operating system
using the ELF executable format (i.e. Linux or FreeBSD). MRC is a resource
compiler that allows including data files into the executable making them
easier to install.
Other libraries you might want to install beforehand are:
- [cmake](https://cmake.org) A build tool.
- [mrc](https://github.com/mhekkel/mrc), a resource compiler that
allows including data files into the executable making them easier to
install. Strictly speaking this is optional, but at the expense of
functionality.
- [libeigen](https://eigen.tuxfamily.org/index.php?title=Main_Page), a
library to do amongst others matrix calculations. This usually can be
installed using your package manager, in Debian/Ubuntu it is called
`libeigen3-dev`
- [zlib](https://github.com/madler/zlib), the development version of this
library. On Debian/Ubuntu this is the package `zlib1g-dev`.
- [boost](https://www.boost.org).
The Boost libraries are only needed in case you want to build the test
code or if you are using GCC. That last condition is due to a long
standing bug in the implementation of std::regex. It simply crashes
on the regular expressions used in the mmcif_pdbx dictionary and so
we use the boost regex implementation instead.
- [boost](https://www.boost.org), in Debian/Ubuntu this is `libboost-dev`.
The Boost libraries are only needed in case you are using GCC due to a long
standing bug in GNU's implementation of std::regex. It simply crashes
on the regular expressions used in the mmcif_pdbx dictionary and so
we use the boost regex implementation instead.
### Building
Building the code is as simple as typing:
First you need to download the code:
```console
git clone https://github.com/PDB-REDO/libcifpp.git --recurse-submodules
git clone https://github.com/PDB-REDO/libcifpp.git
cd libcifpp
cmake -S . -B build -DCMAKE_INSTALL_PREFIX=$HOME/.local -DCMAKE_BUILD_TYPE=Release
cmake --build build
cmake --install build
```
This checks out the source code from github, creates a new directory
where cmake stores its files. Run a configure, build the code and then
it installs the library and auxiliary files.
You should start by considering where to install libcifpp. If you have
sufficient permissions on your computer you perhaps should use the
default but libcifpp can be configured to be installed anywhere
including e.g. *$HOME/.local*.
Next step is to configure, for this use the CMake gui application. If you
installed the curses version of cmake you can type `ccmake`. On Windows
you can use `cmake-gui.exe`.
To install in the default location:
```console
ccmake -S . -B build
```
To install elsewhere, e.g. *$HOME/.local*:
```console
ccmake -S . -B build -DCMAKE_INSTALL_PREFIX=$HOME/.local
```
In the cmake window, start the configure command (use button or press 'c').
After the first configure step you will see a list of settable options.
Alter these to match your preferences. Most options are self explaining
and contain a description. Some may need a bit more explanation:
- CIFPP_DATA_DIR, this directory will be used to store initial versions
of the mmcif_pdbx dictionary as well as the optional CCD file.
- CIFPP_DOWNLOAD_CCD
The CCD file is huge and perhaps you think you don't
need it. In that case you can leave this OFF. But that will limit the
use cases.
- CIFPP_INSTALL_UPDATE_SCRIPT
The files in CIFPP_DATA_DIR are quickly becoming out of date. On
FreeBSD and Linux you can install a script that updates these files
on a weekly basis.
- CIFPP_CRON_DIR
The directory where the update script is to be installed.
- CIFPP_ETC_DIR
The update script will only work if the file called *libcifpp.conf*
in this *etc* directory will contain an uncommented line with
```console
update=true
```
- CIFPP_CACHE_DIR
When you installed and enabled the update script, new files are
written to this directory.
- CIFPP_RECREATE_SYMOP_DATA
If you had CCP4 sourced into your environment, this option allows
you to recreate the symop data file.
- BUILD_FOR_CCP4
Build a special version of libcifpp to be installed in the CCP4
environment.
After setting these options you can run the configure step again and
then use generate to create the makefiles.
Building and installing is then as simple as:
```console
cmake --build build
cmake --install build
```
If this fails due to lack of permissions, you can try:
```console
sudo cmake --install build
```
Tests are created by default, and to test the code you can run:
```console
cmake --build build
ctest --test-dir build
ctest --test-dir build
```

View File

@@ -1,3 +1,26 @@
Version 7.0.1
- Various reconstruction fixes
- category order in output fixed
- better implementation of constructors for file, datablock and category
- small optimisation in iterator
Version 7.0.0
- Renaming many methods and parameters to be more
consistent with the mmCIF dictionaries.
(Most notably, item used to be called column or
tag sometimes).
- validation_error is now a std::system_error error
value. The exception is gone.
- Added repairSequenceInfo to repair invalid files
Version 6.1.0
- Add formula weight to entity in pdb2cif
- Change order of categories inside a datablock to match order in file
- Change default order to write out categories in a file based on
parent/child relationship
- Added validate_pdbx and recover_pdbx
- Fixed a serious bug in category_index when moving categories
Version 6.0.0
- Drop the use of CCP4's monomer library for compound information

View File

@@ -18,7 +18,7 @@ Loading Resources
No matter where the resource is located, you should always use the single libcifpp API call :cpp:func:`cif::load_resource` to load them. This function returns a *std::istream* wrapped inside a *std::unique_ptr*.
The order in which resources are search for is:
The order in which resources are searched for is:
* Use the resource that was defined by calling :cpp:func:`cif::add_file_resource`
for this name.

View File

@@ -31,22 +31,22 @@
#include "cif++/condition.hpp"
#include "cif++/iterator.hpp"
#include "cif++/row.hpp"
#include "cif++/validate.hpp"
#include "cif++/text.hpp"
#include "cif++/validate.hpp"
#include <array>
/** \file category.hpp
* Documentation for the cif::category class
*
* The category class should meet the requirements of Container and
* SequenceContainer.
*
* TODO: implement all of:
* https://en.cppreference.com/w/cpp/named_req/Container
* https://en.cppreference.com/w/cpp/named_req/SequenceContainer
* and more?
*/
* Documentation for the cif::category class
*
* The category class should meet the requirements of Container and
* SequenceContainer.
*
* TODO: implement all of:
* https://en.cppreference.com/w/cpp/named_req/Container
* https://en.cppreference.com/w/cpp/named_req/SequenceContainer
* and more?
*/
namespace cif
{
@@ -61,23 +61,43 @@ namespace cif
class duplicate_key_error : public std::runtime_error
{
public:
/**
* @brief Construct a new duplicate key error object
*/
/**
* @brief Construct a new duplicate key error object
*/
duplicate_key_error(const std::string &msg)
: std::runtime_error(msg)
{
}
};
/// @brief A missing_key_error is thrown when an attempt is made
/// to create an index when one of the key items is missing.
class missing_key_error : public std::runtime_error
{
public:
/**
* @brief Construct a new duplicate key error object
*/
missing_key_error(const std::string &msg, const std::string &key)
: std::runtime_error(msg)
, m_key(key)
{
}
const std::string &get_key() const noexcept { return m_key; }
private:
std::string m_key;
};
/// @brief A multiple_results_error is throw when you request a single
/// row using a query but the query contains more than exactly one row.
class multiple_results_error : public std::runtime_error
{
public:
/**
* @brief Construct a new multiple results error object
*/
/**
* @brief Construct a new multiple results error object
*/
multiple_results_error()
: std::runtime_error("query should have returned exactly one row")
{
@@ -121,23 +141,39 @@ class category
/// \endcond
category() = default; ///< Default constructor
category(std::string_view name); ///< Constructor taking a \a name
category(const category &rhs); ///< Copy constructor
category(category &&rhs); ///< Move constructor
category &operator=(const category &rhs); ///< Copy assignement operator
category &operator=(category &&rhs); ///< Move assignement operator
category() = default; ///< Default constructor
category(std::string_view name); ///< Constructor taking a \a name
category(const category &rhs); ///< Copy constructor
category(category &&rhs) noexcept ///< Move constructor
{
swap(*this, rhs);
}
category &operator=(category rhs) ///< assignement operator
{
swap(*this, rhs);
return *this;
}
/// @brief Destructor
/// @note Please note that the destructor is not virtual. It is assumed that
/// you will not derive from this class.
~category();
friend void swap(category &a, category &b) noexcept;
// --------------------------------------------------------------------
const std::string &name() const { return m_name; } ///< Returns the name of the category
iset key_fields() const; ///< Returns the cif::iset of key field names. Retrieved from the @ref category_validator for this category
std::set<uint16_t> key_field_indices() const; ///< Returns a set of indices for the key fields.
[[deprecated("use key_items instead")]] iset key_fields() const; ///< Returns the cif::iset of key item names. Retrieved from the @ref category_validator for this category
iset key_items() const; ///< Returns the cif::iset of key item names. Retrieved from the @ref category_validator for this category
[[deprecated("use key_item_indices instead")]] std::set<uint16_t> key_field_indices() const; ///< Returns a set of indices for the key items.
std::set<uint16_t> key_item_indices() const; ///< Returns a set of indices for the key items.
/// @brief Set the validator for this category to @a v
/// @param v The category_validator to assign. A nullptr value is allowed.
@@ -162,7 +198,7 @@ class category
/// @brief Validate links, that means, values in this category should have an
/// accompanying value in parent categories.
///
///
/// @note
/// The code makes one exception when validating missing links and that's between
/// *atom_site* and a parent *pdbx_poly_seq_scheme* or *entity_poly_seq*.
@@ -265,7 +301,7 @@ class category
/// Return the theoretical maximum number or rows that can be stored
size_t max_size() const
{
return std::numeric_limits<size_t>::max(); // this is a bit optimistic, I guess
return std::numeric_limits<size_t>::max(); // this is a bit optimistic, I guess
}
/// Return true if the category is empty
@@ -281,12 +317,12 @@ class category
using key_type = row_initializer;
/// @brief Return a row_handle for the row specified by \a key
/// @param key The value for the key, fields specified in the dictionary should have a value
/// @param key The value for the key, items specified in the dictionary should have a value
/// @return The row found in the index, or an undefined row_handle
row_handle operator[](const key_type &key);
/// @brief Return a const row_handle for the row specified by \a key
/// @param key The value for the key, fields specified in the dictionary should have a value
/// @param key The value for the key, items specified in the dictionary should have a value
/// @return The row found in the index, or an undefined row_handle
const row_handle operator[](const key_type &key) const
{
@@ -301,15 +337,15 @@ class category
/// @code{.cpp}
/// for (const auto &[name, value] : cat.rows<std::string,int>("item_name", "item_value"))
/// std::cout << name << ": " << value << '\n';
/// @endcode
/// @endcode
///
/// @tparam Ts The types for the columns requested
/// @param names The names for the columns requested
/// @tparam Ts The types for the items requested
/// @param names The names for the items requested
template <typename... Ts, typename... Ns>
iterator_proxy<const category, Ts...> rows(Ns... names) const
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of item names should be equal to the number of types to return");
return iterator_proxy<const category, Ts...>(*this, begin(), { names... });
}
@@ -320,19 +356,19 @@ class category
/// for (const auto &[name, value] : cat.rows<std::string,int>("item_name", "item_value"))
/// std::cout << name << ": " << value << '\n';
///
/// // or in case we only need one column:
/// // or in case we only need one item:
///
/// for (int id : cat.rows<int>("id"))
/// std::cout << id << '\n';
/// @endcode
/// @endcode
///
/// @tparam Ts The types for the columns requested
/// @param names The names for the columns requested
/// @tparam Ts The types for the items requested
/// @param names The names for the items requested
template <typename... Ts, typename... Ns>
iterator_proxy<category, Ts...> rows(Ns... names)
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of item names should be equal to the number of types to return");
return iterator_proxy<category, Ts...>(*this, begin(), { names... });
}
@@ -343,7 +379,7 @@ class category
/// @code{.cpp}
/// for (row_handle rh : cat.find(cif::key("first_name") == "John" and cif::key("last_name") == "Doe"))
/// .. // do something with rh
/// @endcode
/// @endcode
///
/// @param cond The condition for the query
/// @return A special iterator that loops over all elements that match. The iterator can be dereferenced
@@ -397,17 +433,17 @@ class category
/// @code{.cpp}
/// for (const auto &[name, value] : cat.find<std::string,int>(cif::key("item_value") > 10, "item_name", "item_value"))
/// std::cout << name << ": " << value << '\n';
/// @endcode
/// @endcode
///
/// @param cond The condition for the query
/// @tparam Ts The types for the columns requested
/// @param names The names for the columns requested
/// @tparam Ts The types for the items requested
/// @param names The names for the items requested
/// @return A special iterator that loops over all elements that match.
template <typename... Ts, typename... Ns>
conditional_iterator_proxy<category, Ts...> find(condition &&cond, Ns... names)
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of item names should be equal to the number of types to return");
return find<Ts...>(cbegin(), std::move(cond), std::forward<Ns>(names)...);
}
@@ -415,14 +451,14 @@ class category
/// iterator can be used in a structured binding context.
///
/// @param cond The condition for the query
/// @tparam Ts The types for the columns requested
/// @param names The names for the columns requested
/// @tparam Ts The types for the items requested
/// @param names The names for the items requested
/// @return A special iterator that loops over all elements that match.
template <typename... Ts, typename... Ns>
conditional_iterator_proxy<const category, Ts...> find(condition &&cond, Ns... names) const
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of item names should be equal to the number of types to return");
return find<Ts...>(cbegin(), std::move(cond), std::forward<Ns>(names)...);
}
@@ -431,14 +467,14 @@ class category
///
/// @param pos Iterator pointing to the location where to start
/// @param cond The condition for the query
/// @tparam Ts The types for the columns requested
/// @param names The names for the columns requested
/// @tparam Ts The types for the items requested
/// @param names The names for the items requested
/// @return A special iterator that loops over all elements that match.
template <typename... Ts, typename... Ns>
conditional_iterator_proxy<category, Ts...> find(const_iterator pos, condition &&cond, Ns... names)
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of item names should be equal to the number of types to return");
return { *this, pos, std::move(cond), std::forward<Ns>(names)... };
}
@@ -447,14 +483,14 @@ class category
///
/// @param pos Iterator pointing to the location where to start
/// @param cond The condition for the query
/// @tparam Ts The types for the columns requested
/// @param names The names for the columns requested
/// @tparam Ts The types for the items requested
/// @param names The names for the items requested
/// @return A special iterator that loops over all elements that match.
template <typename... Ts, typename... Ns>
conditional_iterator_proxy<const category, Ts...> find(const_iterator pos, condition &&cond, Ns... names) const
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of item names should be equal to the number of types to return");
return { *this, pos, std::move(cond), std::forward<Ns>(names)... };
}
@@ -509,30 +545,30 @@ class category
return *h.begin();
}
/// @brief Return value for the column named @a column for the single row that
/// @brief Return value for the item named @a item for the single row that
/// matches @a cond. Throws @a multiple_results_error if there are is not exactly one row
/// @tparam The type to use for the result
/// @param cond The condition to search for
/// @param column The name of the column to return the value for
/// @param item The name of the item to return the value for
/// @return The value found
template <typename T>
T find1(condition &&cond, const char *column) const
T find1(condition &&cond, std::string_view item) const
{
return find1<T>(cbegin(), std::move(cond), column);
return find1<T>(cbegin(), std::move(cond), item);
}
/// @brief Return value for the column named @a column for the single row that
/// @brief Return value for the item named @a item for the single row that
/// matches @a cond when starting to search at @a pos.
/// Throws @a multiple_results_error if there are is not exactly one row
/// @tparam The type to use for the result
/// @param pos The location to start the search
/// @param cond The condition to search for
/// @param column The name of the column to return the value for
/// @param item The name of the item to return the value for
/// @return The value found
template <typename T, std::enable_if_t<not is_optional_v<T>, int> = 0>
T find1(const_iterator pos, condition &&cond, const char *column) const
T find1(const_iterator pos, condition &&cond, std::string_view item) const
{
auto h = find<T>(pos, std::move(cond), column);
auto h = find<T>(pos, std::move(cond), item);
if (h.size() != 1)
throw multiple_results_error();
@@ -540,18 +576,18 @@ class category
return *h.begin();
}
/// @brief Return a value of type std::optional<T> for the column named @a column for the single row that
/// @brief Return a value of type std::optional<T> for the item named @a item for the single row that
/// matches @a cond when starting to search at @a pos.
/// If the row was not found, an empty value is returned.
/// @tparam The type to use for the result
/// @param pos The location to start the search
/// @param cond The condition to search for
/// @param column The name of the column to return the value for
/// @param item The name of the item to return the value for
/// @return The value found, can be empty if no row matches the condition
template <typename T, std::enable_if_t<is_optional_v<T>, int> = 0>
T find1(const_iterator pos, condition &&cond, const char *column) const
T find1(const_iterator pos, condition &&cond, std::string_view item) const
{
auto h = find<typename T::value_type>(pos, std::move(cond), column);
auto h = find<typename T::value_type>(pos, std::move(cond), item);
if (h.size() > 1)
throw multiple_results_error();
@@ -562,34 +598,34 @@ class category
return *h.begin();
}
/// @brief Return a std::tuple for the values for the columns named in @a columns
/// @brief Return a std::tuple for the values for the items named in @a items
/// for the single row that matches @a cond
/// Throws @a multiple_results_error if there are is not exactly one row
/// @tparam The types to use for the resulting tuple
/// @param cond The condition to search for
/// @param columns The names of the columns to return the value for
/// @param items The names of the items to return the value for
/// @return The values found as a single tuple of type std::tuple<Ts...>
template <typename... Ts, typename... Cs, typename U = std::enable_if_t<sizeof...(Ts) != 1>>
std::tuple<Ts...> find1(condition &&cond, Cs... columns) const
std::tuple<Ts...> find1(condition &&cond, Cs... items) const
{
static_assert(sizeof...(Ts) == sizeof...(Cs), "The number of column titles should be equal to the number of types to return");
// static_assert(std::is_same_v<Cs, const char*>..., "The column names should be const char");
return find1<Ts...>(cbegin(), std::move(cond), std::forward<Cs>(columns)...);
static_assert(sizeof...(Ts) == sizeof...(Cs), "The number of item names should be equal to the number of types to return");
// static_assert(std::is_same_v<Cs, const char*>..., "The item names should be const char");
return find1<Ts...>(cbegin(), std::move(cond), std::forward<Cs>(items)...);
}
/// @brief Return a std::tuple for the values for the columns named in @a columns
/// @brief Return a std::tuple for the values for the items named in @a items
/// for the single row that matches @a cond when starting to search at @a pos
/// Throws @a multiple_results_error if there are is not exactly one row
/// @tparam The types to use for the resulting tuple
/// @param pos The location to start the search
/// @param cond The condition to search for
/// @param columns The names of the columns to return the value for
/// @param items The names of the items to return the value for
/// @return The values found as a single tuple of type std::tuple<Ts...>
template <typename... Ts, typename... Cs, typename U = std::enable_if_t<sizeof...(Ts) != 1>>
std::tuple<Ts...> find1(const_iterator pos, condition &&cond, Cs... columns) const
std::tuple<Ts...> find1(const_iterator pos, condition &&cond, Cs... items) const
{
static_assert(sizeof...(Ts) == sizeof...(Cs), "The number of column titles should be equal to the number of types to return");
auto h = find<Ts...>(pos, std::move(cond), std::forward<Cs>(columns)...);
static_assert(sizeof...(Ts) == sizeof...(Cs), "The number of item names should be equal to the number of types to return");
auto h = find<Ts...>(pos, std::move(cond), std::forward<Cs>(items)...);
if (h.size() != 1)
throw multiple_results_error();
@@ -638,74 +674,74 @@ class category
return h.empty() ? row_handle{} : *h.begin();
}
/// @brief Return the value for column @a column for the first row that matches condition @a cond
/// @brief Return the value for item @a item for the first row that matches condition @a cond
/// @tparam The type of the value to return
/// @param cond The condition to search for
/// @param column The column for which the value should be returned
/// @param item The item for which the value should be returned
/// @return The value found or a default constructed value if not found
template <typename T>
T find_first(condition &&cond, const char *column) const
T find_first(condition &&cond, std::string_view item) const
{
return find_first<T>(cbegin(), std::move(cond), column);
return find_first<T>(cbegin(), std::move(cond), item);
}
/// @brief Return the value for column @a column for the first row that matches condition @a cond
/// @brief Return the value for item @a item for the first row that matches condition @a cond
/// when starting the search at @a pos
/// @tparam The type of the value to return
/// @param pos The location to start searching
/// @param cond The condition to search for
/// @param column The column for which the value should be returned
/// @param item The item for which the value should be returned
/// @return The value found or a default constructed value if not found
template <typename T>
T find_first(const_iterator pos, condition &&cond, const char *column) const
T find_first(const_iterator pos, condition &&cond, std::string_view item) const
{
auto h = find<T>(pos, std::move(cond), column);
auto h = find<T>(pos, std::move(cond), item);
return h.empty() ? T{} : *h.begin();
}
/// @brief Return a tuple containing the values for the columns @a columns for the first row that matches condition @a cond
/// @brief Return a tuple containing the values for the items @a items for the first row that matches condition @a cond
/// @tparam The types of the values to return
/// @param cond The condition to search for
/// @param columns The columns for which the values should be returned
/// @param items The items for which the values should be returned
/// @return The values found or default constructed values if not found
template <typename... Ts, typename... Cs, typename U = std::enable_if_t<sizeof...(Ts) != 1>>
std::tuple<Ts...> find_first(condition &&cond, Cs... columns) const
std::tuple<Ts...> find_first(condition &&cond, Cs... items) const
{
static_assert(sizeof...(Ts) == sizeof...(Cs), "The number of column titles should be equal to the number of types to return");
// static_assert(std::is_same_v<Cs, const char*>..., "The column names should be const char");
return find_first<Ts...>(cbegin(), std::move(cond), std::forward<Cs>(columns)...);
static_assert(sizeof...(Ts) == sizeof...(Cs), "The number of item names should be equal to the number of types to return");
// static_assert(std::is_same_v<Cs, const char*>..., "The item names should be const char");
return find_first<Ts...>(cbegin(), std::move(cond), std::forward<Cs>(items)...);
}
/// @brief Return a tuple containing the values for the columns @a columns for the first row that matches condition @a cond
/// @brief Return a tuple containing the values for the items @a items for the first row that matches condition @a cond
/// when starting the search at @a pos
/// @tparam The types of the values to return
/// @param pos The location to start searching
/// @param cond The condition to search for
/// @param columns The columns for which the values should be returned
/// @param items The items for which the values should be returned
/// @return The values found or default constructed values if not found
template <typename... Ts, typename... Cs, typename U = std::enable_if_t<sizeof...(Ts) != 1>>
std::tuple<Ts...> find_first(const_iterator pos, condition &&cond, Cs... columns) const
std::tuple<Ts...> find_first(const_iterator pos, condition &&cond, Cs... items) const
{
static_assert(sizeof...(Ts) == sizeof...(Cs), "The number of column titles should be equal to the number of types to return");
auto h = find<Ts...>(pos, std::move(cond), std::forward<Cs>(columns)...);
static_assert(sizeof...(Ts) == sizeof...(Cs), "The number of item names should be equal to the number of types to return");
auto h = find<Ts...>(pos, std::move(cond), std::forward<Cs>(items)...);
return h.empty() ? std::tuple<Ts...>{} : *h.begin();
}
// --------------------------------------------------------------------
/// @brief Return the maximum value for column @a column for all rows that match condition @a cond
/// @brief Return the maximum value for item @a item for all rows that match condition @a cond
/// @tparam The type of the value to return
/// @param column The column to use for the value
/// @param item The item to use for the value
/// @param cond The condition to search for
/// @return The value found or the minimal value for the type
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
T find_max(const char *column, condition &&cond) const
T find_max(std::string_view item, condition &&cond) const
{
T result = std::numeric_limits<T>::min();
for (auto v : find<T>(std::move(cond), column))
for (auto v : find<T>(std::move(cond), item))
{
if (result < v)
result = v;
@@ -714,27 +750,27 @@ class category
return result;
}
/// @brief Return the maximum value for column @a column for all rows
/// @brief Return the maximum value for item @a item for all rows
/// @tparam The type of the value to return
/// @param column The column to use for the value
/// @param item The item to use for the value
/// @return The value found or the minimal value for the type
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
T find_max(const char *column) const
T find_max(std::string_view item) const
{
return find_max<T>(column, all());
return find_max<T>(item, all());
}
/// @brief Return the minimum value for column @a column for all rows that match condition @a cond
/// @brief Return the minimum value for item @a item for all rows that match condition @a cond
/// @tparam The type of the value to return
/// @param column The column to use for the value
/// @param item The item to use for the value
/// @param cond The condition to search for
/// @return The value found or the maximum value for the type
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
T find_min(const char *column, condition &&cond) const
T find_min(std::string_view item, condition &&cond) const
{
T result = std::numeric_limits<T>::max();
for (auto v : find<T>(std::move(cond), column))
for (auto v : find<T>(std::move(cond), item))
{
if (result > v)
result = v;
@@ -743,20 +779,28 @@ class category
return result;
}
/// @brief Return the maximum value for column @a column for all rows
/// @brief Return the maximum value for item @a item for all rows
/// @tparam The type of the value to return
/// @param column The column to use for the value
/// @param item The item to use for the value
/// @return The value found or the maximum value for the type
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
T find_min(const char *column) const
T find_min(std::string_view item) const
{
return find_min<T>(column, all());
return find_min<T>(item, all());
}
/// @brief Return whether a row exists that matches condition @a cond
/// @param cond The condition to match
/// @return True if a row exists
bool exists(condition &&cond) const
[[deprecated("Use contains instead")]] bool exists(condition &&cond) const
{
return contains(std::move(cond));
}
/// @brief Return whether a row exists that matches condition @a cond
/// @param cond The condition to match
/// @return True if a row exists
bool contains(condition &&cond) const
{
bool result = false;
@@ -846,7 +890,7 @@ class category
// insert_impl(pos, std::move(row));
// }
/// Erase the row pointed to by @a pos and return the iterator to the
/// Erase the row pointed to by @a pos and return the iterator to the
/// row following pos.
iterator erase(iterator pos);
@@ -890,7 +934,7 @@ class category
for (auto i = b; i != e; ++i)
{
// item_value *new_item = this->create_item(*i);
r->append(add_column(i->name()), { i->value() });
r->append(add_item(i->name()), { i->value() });
}
}
catch (...)
@@ -912,7 +956,6 @@ class category
/// result is unique in the context of this category
std::string get_unique_id(std::function<std::string(int)> generator = cif::cif_id_for_number);
/// @brief Generate a new, unique ID based on a string prefix followed by a number
/// @param prefix The string prefix
/// @return a new unique ID
@@ -922,98 +965,195 @@ class category
{ return prefix + std::to_string(nr + 1); });
}
/// @brief Generate a new, unique value for a item named @a item_name
/// @param item_name The name of the item
/// @return a new unique value
std::string get_unique_value(std::string_view item_name);
// --------------------------------------------------------------------
/// \brief Update a single column named @a tag in the rows that match \a cond to value \a value
using value_provider_type = std::function<std::string_view(std::string_view)>;
/// \brief Update a single item named @a item_name in the rows that match
/// \a cond to values provided by a callback function \a value_provider
/// making sure the linked categories are updated according to the link.
/// That means, child categories are updated if the links are absolute
/// and unique. If they are not, the child category rows are split.
void update_value(condition &&cond, std::string_view tag, std::string_view value)
void update_value(condition &&cond, std::string_view item_name,
value_provider_type &&value_provider)
{
auto rs = find(std::move(cond));
std::vector<row_handle> rows;
std::copy(rs.begin(), rs.end(), std::back_inserter(rows));
update_value(rows, tag, value);
update_value(rows, item_name, std::move(value_provider));
}
/// \brief Update a single column named @a tag in @a rows to value \a value
/// \brief Update a single item named @a item_name in the rows \a rows
/// to values provided by a callback function \a value_provider
/// making sure the linked categories are updated according to the link.
/// That means, child categories are updated if the links are absolute
/// and unique. If they are not, the child category rows are split.
void update_value(const std::vector<row_handle> &rows, std::string_view tag, std::string_view value);
void update_value(const std::vector<row_handle> &rows, std::string_view item_name,
value_provider_type &&value_provider);
/// \brief Update a single item named @a item_name in the rows that match \a cond to value \a value
/// making sure the linked categories are updated according to the link.
/// That means, child categories are updated if the links are absolute
/// and unique. If they are not, the child category rows are split.
void update_value(condition &&cond, std::string_view item_name, std::string_view value)
{
auto rs = find(std::move(cond));
std::vector<row_handle> rows;
std::copy(rs.begin(), rs.end(), std::back_inserter(rows));
update_value(rows, item_name, value);
}
/// \brief Update a single item named @a item_name in @a rows to value \a value
/// making sure the linked categories are updated according to the link.
/// That means, child categories are updated if the links are absolute
/// and unique. If they are not, the child category rows are split.
void update_value(const std::vector<row_handle> &rows, std::string_view item_name, std::string_view value)
{
update_value(rows, item_name, [value](std::string_view)
{ return value; });
}
// --------------------------------------------------------------------
// Naming used to be very inconsistent. For backward compatibility,
// the old function names are here as deprecated variants.
/// \brief Return the index number for \a column_name
uint16_t get_column_ix(std::string_view column_name) const
[[deprecated("Use get_item_ix instead")]] uint16_t get_column_ix(std::string_view column_name) const
{
uint16_t result;
for (result = 0; result < m_columns.size(); ++result)
{
if (iequals(column_name, m_columns[result].m_name))
break;
}
if (VERBOSE > 0 and result == m_columns.size() and m_cat_validator != nullptr) // validate the name, if it is known at all (since it was not found)
{
auto iv = m_cat_validator->get_validator_for_item(column_name);
if (iv == nullptr)
std::cerr << "Invalid name used '" << column_name << "' is not a known column in " + m_name << '\n';
}
return result;
return get_item_ix(column_name);
}
/// @brief Return the name for column with index @a ix
/// @param ix The index number
/// @return The name of the column
std::string_view get_column_name(uint16_t ix) const
[[deprecated("use get_item_name instead")]] std::string_view get_column_name(uint16_t ix) const
{
if (ix >= m_columns.size())
throw std::out_of_range("column index is out of range");
return m_columns[ix].m_name;
return get_item_name(ix);
}
/// @brief Make sure a column with name @a column_name is known and return its index number
/// @param column_name The name of the column
/// @return The index number of the column
uint16_t add_column(std::string_view column_name)
/// @brief Make sure a item with name @a item_name is known and return its index number
/// @param item_name The name of the item
/// @return The index number of the item
[[deprecated("use add_item instead")]] uint16_t add_column(std::string_view item_name)
{
using namespace std::literals;
return add_item(item_name);
}
uint16_t result = get_column_ix(column_name);
/** @brief Remove column name @a colum_name
* @param column_name The column to be removed
*/
[[deprecated("use remove_item instead")]] void remove_column(std::string_view column_name)
{
remove_item(column_name);
}
if (result == m_columns.size())
{
const item_validator *item_validator = nullptr;
if (m_cat_validator != nullptr)
{
item_validator = m_cat_validator->get_validator_for_item(column_name);
if (item_validator == nullptr)
m_validator->report_error("tag " + std::string(column_name) + " not allowed in category " + m_name, false);
}
m_columns.emplace_back(column_name, item_validator);
}
return result;
/** @brief Rename column @a from_name to @a to_name */
[[deprecated("use rename_item instead")]] void rename_column(std::string_view from_name, std::string_view to_name)
{
rename_item(from_name, to_name);
}
/// @brief Return whether a column with name @a name exists in this category
/// @param name The name of the column
/// @return True if the column exists
bool has_column(std::string_view name) const
[[deprecated("use has_item instead")]] bool has_column(std::string_view name) const
{
return get_column_ix(name) < m_columns.size();
return has_item(name);
}
/// @brief Return the cif::iset of columns in this category
iset get_columns() const;
[[deprecated("use get_items instead")]] iset get_columns() const
{
return get_items();
}
// --------------------------------------------------------------------
/// \brief Return the index number for \a item_name
uint16_t get_item_ix(std::string_view item_name) const
{
uint16_t result;
for (result = 0; result < m_items.size(); ++result)
{
if (iequals(item_name, m_items[result].m_name))
break;
}
if (VERBOSE > 0 and result == m_items.size() and m_cat_validator != nullptr) // validate the name, if it is known at all (since it was not found)
{
auto iv = m_cat_validator->get_validator_for_item(item_name);
if (iv == nullptr)
std::cerr << "Invalid name used '" << item_name << "' is not a known item in " + m_name << '\n';
}
return result;
}
/// @brief Return the name for item with index @a ix
/// @param ix The index number
/// @return The name of the item
std::string_view get_item_name(uint16_t ix) const
{
if (ix >= m_items.size())
throw std::out_of_range("item index is out of range");
return m_items[ix].m_name;
}
/// @brief Make sure a item with name @a item_name is known and return its index number
/// @param item_name The name of the item
/// @return The index number of the item
uint16_t add_item(std::string_view item_name)
{
using namespace std::literals;
uint16_t result = get_item_ix(item_name);
if (result == m_items.size())
{
const item_validator *item_validator = nullptr;
if (m_cat_validator != nullptr)
{
item_validator = m_cat_validator->get_validator_for_item(item_name);
if (item_validator == nullptr)
m_validator->report_error(validation_error::item_not_allowed_in_category, m_name, item_name, false);
}
m_items.emplace_back(item_name, item_validator);
}
return result;
}
/** @brief Remove item name @a colum_name
* @param item_name The item to be removed
*/
void remove_item(std::string_view item_name);
/** @brief Rename item @a from_name to @a to_name */
void rename_item(std::string_view from_name, std::string_view to_name);
/// @brief Return whether a item with name @a name exists in this category
/// @param name The name of the item
/// @return True if the item exists
bool has_item(std::string_view name) const
{
return get_item_ix(name) < m_items.size();
}
/// @brief Return the cif::iset of items in this category
iset get_items() const;
// --------------------------------------------------------------------
@@ -1029,30 +1169,36 @@ class category
// --------------------------------------------------------------------
/// This function returns effectively the list of fully qualified column
/// names, that is category_name + '.' + column_name for each column
std::vector<std::string> get_tag_order() const;
/// This function returns effectively the list of fully qualified item
/// names, that is category_name + '.' + item_name for each item
[[deprecated("use get_item_order instead")]] std::vector<std::string> get_tag_order() const
{
return get_item_order();
}
/// This function returns effectively the list of fully qualified item
/// names, that is category_name + '.' + item_name for each item
std::vector<std::string> get_item_order() const;
/// Write the contents of the category to the std::ostream @a os
void write(std::ostream &os) const;
/// @brief Write the contents of the category to the std::ostream @a os and
/// use @a order as the order of the columns. If @a addMissingColumns is
/// false, columns that do not contain any value will be suppressed
/// use @a order as the order of the items. If @a addMissingItems is
/// false, items that do not contain any value will be suppressed
/// @param os The std::ostream to write to
/// @param order The order in which the columns should appear
/// @param addMissingColumns When false, empty columns are suppressed from the output
void write(std::ostream &os, const std::vector<std::string> &order, bool addMissingColumns = true);
/// @param order The order in which the items should appear
/// @param addMissingItems When false, empty items are suppressed from the output
void write(std::ostream &os, const std::vector<std::string> &order, bool addMissingItems = true);
private:
void write(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyColumns) const;
void write(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyItems) const;
public:
/// friend function to make it possible to do:
/// @code {.cpp}
/// std::cout << my_category;
/// @endcode
/// @endcode
friend std::ostream &operator<<(std::ostream &os, const category &cat)
{
cat.write(os);
@@ -1060,7 +1206,7 @@ class category
}
private:
void update_value(row *row, uint16_t column, std::string_view value, bool updateLinked, bool validate = true);
void update_value(row *row, uint16_t item, std::string_view value, bool updateLinked, bool validate = true);
void erase_orphans(condition &&cond, category &parent);
@@ -1097,12 +1243,12 @@ class category
row_handle create_copy(row_handle r);
struct item_column
struct item_entry
{
std::string m_name;
const item_validator *m_validator;
item_column(std::string_view name, const item_validator *validator)
item_entry(std::string_view name, const item_validator *validator)
: m_name(name)
, m_validator(validator)
{
@@ -1132,12 +1278,12 @@ class category
// --------------------------------------------------------------------
void swap_item(uint16_t column_ix, row_handle &a, row_handle &b);
void swap_item(uint16_t item_ix, row_handle &a, row_handle &b);
// --------------------------------------------------------------------
std::string m_name;
std::vector<item_column> m_columns;
std::vector<item_entry> m_items;
const validator *m_validator = nullptr;
const category_validator *m_cat_validator = nullptr;
std::vector<link> m_parent_links, m_child_links;

View File

@@ -166,17 +166,28 @@ class compound
return m_id == "HOH" or m_id == "H2O" or m_id == "WAT";
}
/** \brief Return whether this compound has a type of either 'peptide linking' or 'L-peptide linking' */
bool is_peptide() const;
/** \brief Return whether this compound has a type of either 'DNA linking' or 'RNA linking' */
bool is_base() const;
char one_letter_code() const { return m_one_letter_code; }; ///< Return the one letter code to use in a canonical sequence. If unknown the value '\0' is returned
std::string parent_id() const { return m_parent_id; }; ///< Return the parent id code in case a parent is specified (e.g. MET for MSE)
private:
friend class compound_factory_impl;
friend class local_compound_factory_impl;
compound(cif::datablock &db);
compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group);
compound(cif::datablock &db, int);
std::string m_id;
std::string m_name;
std::string m_type;
std::string m_group;
std::string m_formula;
char m_one_letter_code = 0;
std::string m_parent_id;
float m_formula_weight = 0;
int m_formal_charge = 0;
std::vector<compound_atom> m_atoms;
@@ -214,29 +225,71 @@ class compound_factory
/// Override any previously loaded dictionary with @a inDictFile
void push_dictionary(const std::filesystem::path &inDictFile);
/** @brief Override any previously loaded dictionary with the data in @a file
*
* @note experimental feature
*
* Load the file @a file as a source for compound information. This may
* be e.g. a regular mmCIF file with extra files containing compound
* information.
*
* Be carefull to remove the block again, best use @ref cif::compound_source
* as a stack based object.
*/
void push_dictionary(const file &file);
/// Remove the last pushed dictionary
void pop_dictionary();
/// Return whether @a res_name is a valid and known peptide
[[deprecated("use is_peptide or is_std_peptide instead)")]]
bool is_known_peptide(const std::string &res_name) const;
/// Return whether @a res_name is a valid and known base
[[deprecated("use is_base or is_std_base instead)")]]
bool is_known_base(const std::string &res_name) const;
/// Return whether @a res_name is a peptide
bool is_peptide(std::string_view res_name) const;
/// Return whether @a res_name is a base
bool is_base(std::string_view res_name) const;
/// Return whether @a res_name is one of the standard peptides
bool is_std_peptide(std::string_view res_name) const;
/// Return whether @a res_name is one of the standard bases
bool is_std_base(std::string_view res_name) const;
/// Return whether @a res_name is a monomer (either base or peptide)
bool is_monomer(std::string_view res_name) const;
/// Return whether @a res_name is one of the standard bases or peptides
bool is_std_monomer(std::string_view res_name) const
{
return is_std_base(res_name) or is_std_peptide(res_name);
}
bool is_water(std::string_view res_name) const
{
return res_name == "HOH" or res_name == "H2O" or res_name == "WAT";
}
/// \brief Create the compound object for \a id
///
/// This will create the compound instance for \a id if it doesn't exist already.
/// The result is owned by this factory and should not be deleted by the user.
/// \param id The compound ID, a three letter code usually
/// \result The compound, or nullptr if it could not be created (missing info)
const compound *create(std::string id);
const compound *create(std::string_view id);
~compound_factory();
CIFPP_EXPORT static const std::map<std::string, char> kAAMap, ///< Globally accessible static list of the default amino acids
kBaseMap; ///< Globally accessible static list of the default bases
void report_missing_compound(const std::string &compound_id);
void report_missing_compound(std::string_view compound_id);
private:
compound_factory();
@@ -251,4 +304,35 @@ class compound_factory
std::shared_ptr<compound_factory_impl> m_impl;
};
// --------------------------------------------------------------------
/**
* @brief Stack based source for compound info.
*
* Use this class to temporarily add a compound source to the
* compound_factory.
*
* @code{.cpp}
* cif::file f("1cbs-with-custom-rea.cif");
* cif::compound_source cs(f);
*
* auto &cf = cif::compound_factory::instance();
* auto rea_compound = cf.create("REA");
* @endcode
*/
class compound_source
{
public:
compound_source(const cif::file &file)
{
cif::compound_factory::instance().push_dictionary(file);
}
~compound_source()
{
cif::compound_factory::instance().pop_dictionary();
}
};
} // namespace cif

View File

@@ -39,17 +39,17 @@
* query you can use to find rows in a @ref cif::category
*
* Conditions are created as standard C++ expressions. That means
* you can use the standard comparison operators to compare field
* you can use the standard comparison operators to compare item
* contents with a value and boolean operators to chain everything
* together.
*
* To create a query that simply compares one field with one value:
* To create a query that simply compares one item with one value:
*
* @code {.cpp}
* cif::condition c = cif::key("id") == 1;
* @endcode
*
* That will find rows where the ID field contains the number 1. If
* That will find rows where the ID item contains the number 1. If
* using cif::key is a bit too much typing, you can also write:
*
* @code{.cpp}
@@ -64,7 +64,7 @@
* auto c3 = "id"_key == 1 or "id"_key == 2;
* @endcode
*
* There are some special values you can use. To find rows with field that
* There are some special values you can use. To find rows with item that
* do not have a value:
*
* @code{.cpp}
@@ -83,7 +83,7 @@
* auto c6 = cif::all;
* @endcode
*
* And when you want to search for any column containing the value 'foo':
* And when you want to search for any item containing the value 'foo':
*
* @code{.cpp}
* auto c7 = cif::any == "foo";
@@ -104,31 +104,40 @@ namespace cif
/// we declare a function to access its contents
/**
* @brief Get the fields that can be used as key in conditions for a category
* @brief Get the items that can be used as key in conditions for a category
*
* @param cat The category whose fields to return
* @return iset The set of key field names
* @param cat The category whose items to return
* @return iset The set of key item names
*/
[[deprecated("use get_category_items instead")]]
iset get_category_fields(const category &cat);
/**
* @brief Get the column index for column @a col in category @a cat
* @brief Get the items that can be used as key in conditions for a category
*
* @param cat The category
* @param col The name of the column
* @return uint16_t The index
* @param cat The category whose items to return
* @return iset The set of key field names
*/
uint16_t get_column_ix(const category &cat, std::string_view col);
iset get_category_items(const category &cat);
/**
* @brief Return whether the column @a col in category @a cat has a primitive type of *uchar*
* @brief Get the item index for item @a col in category @a cat
*
* @param cat The category
* @param col The column name
* @param col The name of the item
* @return uint16_t The index
*/
uint16_t get_item_ix(const category &cat, std::string_view col);
/**
* @brief Return whether the item @a col in category @a cat has a primitive type of *uchar*
*
* @param cat The category
* @param col The item name
* @return true If the primitive type is of type *uchar*
* @return false If the primitive type is not of type *uchar*
*/
bool is_column_type_uchar(const category &cat, std::string_view col);
bool is_item_type_uchar(const category &cat, std::string_view col);
// --------------------------------------------------------------------
// some more templates to be able to do querying
@@ -219,7 +228,7 @@ class condition
/**
* @brief Prepare the condition to be used on category @a c. This will
* take care of setting the correct indices for fields e.g.
* take care of setting the correct indices for items e.g.
*
* @param c The category this query should act upon
*/
@@ -305,14 +314,14 @@ namespace detail
{
struct key_is_empty_condition_impl : public condition_impl
{
key_is_empty_condition_impl(const std::string &item_tag)
: m_item_tag(item_tag)
key_is_empty_condition_impl(const std::string &item_name)
: m_item_name(item_name)
{
}
condition_impl *prepare(const category &c) override
{
m_item_ix = get_column_ix(c, m_item_tag);
m_item_ix = get_item_ix(c, m_item_name);
return this;
}
@@ -323,23 +332,23 @@ namespace detail
void str(std::ostream &os) const override
{
os << m_item_tag << " IS NULL";
os << m_item_name << " IS NULL";
}
std::string m_item_tag;
std::string m_item_name;
uint16_t m_item_ix = 0;
};
struct key_is_not_empty_condition_impl : public condition_impl
{
key_is_not_empty_condition_impl(const std::string &item_tag)
: m_item_tag(item_tag)
key_is_not_empty_condition_impl(const std::string &item_name)
: m_item_name(item_name)
{
}
condition_impl *prepare(const category &c) override
{
m_item_ix = get_column_ix(c, m_item_tag);
m_item_ix = get_item_ix(c, m_item_name);
return this;
}
@@ -350,18 +359,18 @@ namespace detail
void str(std::ostream &os) const override
{
os << m_item_tag << " IS NOT NULL";
os << m_item_name << " IS NOT NULL";
}
std::string m_item_tag;
std::string m_item_name;
uint16_t m_item_ix = 0;
};
struct key_equals_condition_impl : public condition_impl
{
key_equals_condition_impl(item &&i)
: m_item_tag(i.name())
, m_value(i.value())
: m_item_name(i.name())
, m_value(std::forward<item>(i).value())
{
}
@@ -374,7 +383,7 @@ namespace detail
void str(std::ostream &os) const override
{
os << m_item_tag << (m_icase ? "^ " : " ") << " == " << m_value;
os << m_item_name << (m_icase ? "^ " : " ") << " == " << m_value;
}
virtual std::optional<row_handle> single() const override
@@ -390,13 +399,13 @@ namespace detail
if (m_single_hit.has_value() or ri->m_single_hit.has_value())
return m_single_hit == ri->m_single_hit;
else
// watch out, both m_item_ix might be the same while tags might be diffent (in case they both do not exist in the category)
return m_item_ix == ri->m_item_ix and m_value == ri->m_value and m_item_tag == ri->m_item_tag;
// watch out, both m_item_ix might be the same while item_names might be diffent (in case they both do not exist in the category)
return m_item_ix == ri->m_item_ix and m_value == ri->m_value and m_item_name == ri->m_item_name;
}
return this == rhs;
}
std::string m_item_tag;
std::string m_item_name;
uint16_t m_item_ix = 0;
bool m_icase = false;
std::string m_value;
@@ -406,7 +415,7 @@ namespace detail
struct key_equals_or_empty_condition_impl : public condition_impl
{
key_equals_or_empty_condition_impl(key_equals_condition_impl *equals)
: m_item_tag(equals->m_item_tag)
: m_item_name(equals->m_item_name)
, m_value(equals->m_value)
, m_icase(equals->m_icase)
, m_single_hit(equals->m_single_hit)
@@ -415,8 +424,8 @@ namespace detail
condition_impl *prepare(const category &c) override
{
m_item_ix = get_column_ix(c, m_item_tag);
m_icase = is_column_type_uchar(c, m_item_tag);
m_item_ix = get_item_ix(c, m_item_name);
m_icase = is_item_type_uchar(c, m_item_name);
return this;
}
@@ -432,7 +441,7 @@ namespace detail
void str(std::ostream &os) const override
{
os << '(' << m_item_tag << (m_icase ? "^ " : " ") << " == " << m_value << " OR " << m_item_tag << " IS NULL)";
os << '(' << m_item_name << (m_icase ? "^ " : " ") << " == " << m_value << " OR " << m_item_name << " IS NULL)";
}
virtual std::optional<row_handle> single() const override
@@ -448,13 +457,13 @@ namespace detail
if (m_single_hit.has_value() or ri->m_single_hit.has_value())
return m_single_hit == ri->m_single_hit;
else
// watch out, both m_item_ix might be the same while tags might be diffent (in case they both do not exist in the category)
return m_item_ix == ri->m_item_ix and m_value == ri->m_value and m_item_tag == ri->m_item_tag;
// watch out, both m_item_ix might be the same while item_names might be diffent (in case they both do not exist in the category)
return m_item_ix == ri->m_item_ix and m_value == ri->m_value and m_item_name == ri->m_item_name;
}
return this == rhs;
}
std::string m_item_tag;
std::string m_item_name;
uint16_t m_item_ix = 0;
std::string m_value;
bool m_icase = false;
@@ -464,8 +473,8 @@ namespace detail
struct key_compare_condition_impl : public condition_impl
{
template <typename COMP>
key_compare_condition_impl(const std::string &item_tag, COMP &&comp, const std::string &s)
: m_item_tag(item_tag)
key_compare_condition_impl(const std::string &item_name, COMP &&comp, const std::string &s)
: m_item_name(item_name)
, m_compare(std::move(comp))
, m_str(s)
{
@@ -473,8 +482,8 @@ namespace detail
condition_impl *prepare(const category &c) override
{
m_item_ix = get_column_ix(c, m_item_tag);
m_icase = is_column_type_uchar(c, m_item_tag);
m_item_ix = get_item_ix(c, m_item_name);
m_icase = is_item_type_uchar(c, m_item_name);
return this;
}
@@ -485,10 +494,10 @@ namespace detail
void str(std::ostream &os) const override
{
os << m_item_tag << (m_icase ? "^ " : " ") << m_str;
os << m_item_name << (m_icase ? "^ " : " ") << m_str;
}
std::string m_item_tag;
std::string m_item_name;
uint16_t m_item_ix = 0;
bool m_icase = false;
std::function<bool(row_handle, bool)> m_compare;
@@ -497,8 +506,8 @@ namespace detail
struct key_matches_condition_impl : public condition_impl
{
key_matches_condition_impl(const std::string &item_tag, const std::regex &rx)
: m_item_tag(item_tag)
key_matches_condition_impl(const std::string &item_name, const std::regex &rx)
: m_item_name(item_name)
, m_item_ix(0)
, mRx(rx)
{
@@ -506,7 +515,7 @@ namespace detail
condition_impl *prepare(const category &c) override
{
m_item_ix = get_column_ix(c, m_item_tag);
m_item_ix = get_item_ix(c, m_item_name);
return this;
}
@@ -518,10 +527,10 @@ namespace detail
void str(std::ostream &os) const override
{
os << m_item_tag << " =~ expression";
os << m_item_name << " =~ expression";
}
std::string m_item_tag;
std::string m_item_name;
uint16_t m_item_ix;
std::regex mRx;
};
@@ -541,7 +550,7 @@ namespace detail
auto &c = r.get_category();
bool result = false;
for (auto &f : get_category_fields(c))
for (auto &f : get_category_items(c))
{
try
{
@@ -579,7 +588,7 @@ namespace detail
auto &c = r.get_category();
bool result = false;
for (auto &f : get_category_fields(c))
for (auto &f : get_category_items(c))
{
try
{
@@ -864,7 +873,7 @@ inline condition operator or(condition &&a, condition &&b)
auto ci = static_cast<detail::key_equals_condition_impl *>(a.m_impl);
auto ce = static_cast<detail::key_is_empty_condition_impl *>(b.m_impl);
if (ci->m_item_tag == ce->m_item_tag)
if (ci->m_item_name == ce->m_item_name)
return condition(new detail::key_equals_or_empty_condition_impl(ci));
}
else if (typeid(*b.m_impl) == typeid(detail::key_equals_condition_impl) and
@@ -873,7 +882,7 @@ inline condition operator or(condition &&a, condition &&b)
auto ci = static_cast<detail::key_equals_condition_impl *>(b.m_impl);
auto ce = static_cast<detail::key_is_empty_condition_impl *>(a.m_impl);
if (ci->m_item_tag == ce->m_item_tag)
if (ci->m_item_name == ce->m_item_name)
return condition(new detail::key_equals_or_empty_condition_impl(ci));
}
@@ -887,7 +896,7 @@ inline condition operator or(condition &&a, condition &&b)
}
/**
* @brief A helper class to make it possible to search for empty fields (NULL)
* @brief A helper class to make it possible to search for empty items (NULL)
*
* @code{.cpp}
* "id"_key == cif::empty_type();
@@ -909,35 +918,45 @@ struct empty_type
inline constexpr empty_type null = empty_type();
/**
* @brief Class to use in creating conditions, creates a reference to a field or column
* @brief Class to use in creating conditions, creates a reference to a item or item
*
*/
struct key
{
/**
* @brief Construct a new key object using @a itemTag as name
* @brief Construct a new key object using @a item_name as name
*
* @param itemTag
* @param item_name
*/
explicit key(const std::string &itemTag)
: m_item_tag(itemTag)
explicit key(const std::string &item_name)
: m_item_name(item_name)
{
}
/**
* @brief Construct a new key object using @a itemTag as name
* @brief Construct a new key object using @a item_name as name
*
* @param itemTag
* @param item_name
*/
explicit key(const char *itemTag)
: m_item_tag(itemTag)
explicit key(const char *item_name)
: m_item_name(item_name)
{
}
/**
* @brief Construct a new key object using @a item_name as name
*
* @param item_name
*/
explicit key(std::string_view item_name)
: m_item_name(item_name)
{
}
key(const key &) = delete;
key &operator=(const key &) = delete;
std::string m_item_tag; ///< The column name
std::string m_item_name; ///< The item name
};
/**
@@ -946,7 +965,7 @@ struct key
template <typename T>
condition operator==(const key &key, const T &v)
{
return condition(new detail::key_equals_condition_impl({ key.m_item_tag, v }));
return condition(new detail::key_equals_condition_impl({ key.m_item_name, v }));
}
/**
@@ -955,9 +974,9 @@ condition operator==(const key &key, const T &v)
inline condition operator==(const key &key, std::string_view value)
{
if (not value.empty())
return condition(new detail::key_equals_condition_impl({ key.m_item_tag, value }));
return condition(new detail::key_equals_condition_impl({ key.m_item_name, value }));
else
return condition(new detail::key_is_empty_condition_impl(key.m_item_tag));
return condition(new detail::key_is_empty_condition_impl(key.m_item_name));
}
/**
@@ -987,8 +1006,8 @@ condition operator>(const key &key, const T &v)
s << " > " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_tag, [tag = key.m_item_tag, v](row_handle r, bool icase)
{ return r[tag].template compare<T>(v, icase) > 0; },
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].template compare<T>(v, icase) > 0; },
s.str()));
}
@@ -1002,8 +1021,8 @@ condition operator>=(const key &key, const T &v)
s << " >= " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_tag, [tag = key.m_item_tag, v](row_handle r, bool icase)
{ return r[tag].template compare<T>(v, icase) >= 0; },
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].template compare<T>(v, icase) >= 0; },
s.str()));
}
@@ -1017,8 +1036,8 @@ condition operator<(const key &key, const T &v)
s << " < " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_tag, [tag = key.m_item_tag, v](row_handle r, bool icase)
{ return r[tag].template compare<T>(v, icase) < 0; },
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].template compare<T>(v, icase) < 0; },
s.str()));
}
@@ -1032,8 +1051,8 @@ condition operator<=(const key &key, const T &v)
s << " <= " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_tag, [tag = key.m_item_tag, v](row_handle r, bool icase)
{ return r[tag].template compare<T>(v, icase) <= 0; },
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].template compare<T>(v, icase) <= 0; },
s.str()));
}
@@ -1042,7 +1061,7 @@ condition operator<=(const key &key, const T &v)
*/
inline condition operator==(const key &key, const std::regex &rx)
{
return condition(new detail::key_matches_condition_impl(key.m_item_tag, rx));
return condition(new detail::key_matches_condition_impl(key.m_item_name, rx));
}
/**
@@ -1050,7 +1069,7 @@ inline condition operator==(const key &key, const std::regex &rx)
*/
inline condition operator==(const key &key, const empty_type &)
{
return condition(new detail::key_is_empty_condition_impl(key.m_item_tag));
return condition(new detail::key_is_empty_condition_impl(key.m_item_name));
}
/**
@@ -1058,20 +1077,20 @@ inline condition operator==(const key &key, const empty_type &)
*/
inline condition operator!=(const key &key, const empty_type &)
{
return condition(new detail::key_is_not_empty_condition_impl(key.m_item_tag));
return condition(new detail::key_is_not_empty_condition_impl(key.m_item_name));
}
/**
* @brief Create a condition to search any column for a value @a v if @a v contains a value
* @brief Create a condition to search any item for a value @a v if @a v contains a value
* compare to null if not.
*/
template <typename T>
condition operator==(const key &key, const std::optional<T> &v)
{
if (v.has_value())
return condition(new detail::key_equals_condition_impl({ key.m_item_tag, *v }));
return condition(new detail::key_equals_condition_impl({ key.m_item_name, *v }));
else
return condition(new detail::key_is_empty_condition_impl(key.m_item_tag));
return condition(new detail::key_is_empty_condition_impl(key.m_item_name));
}
/**
@@ -1089,12 +1108,12 @@ struct any_type
/** @endcond */
/**
* @brief A helper for any field constructs
* @brief A helper for any item constructs
*/
inline constexpr any_type any = any_type{};
/**
* @brief Create a condition to search any column for a value @a v
* @brief Create a condition to search any item for a value @a v
*/
template <typename T>
condition operator==(const any_type &, const T &v)
@@ -1103,7 +1122,7 @@ condition operator==(const any_type &, const T &v)
}
/**
* @brief Create a condition to search any column for a regular expression @a rx
* @brief Create a condition to search any item for a regular expression @a rx
*/
inline condition operator==(const any_type &, const std::regex &rx)
{
@@ -1121,9 +1140,9 @@ inline condition all()
namespace literals
{
/**
* @brief Return a cif::key for the column name @a text
* @brief Return a cif::key for the item name @a text
*
* @param text The name of the column
* @param text The name of the item
* @param length The length of @a text
* @return key The cif::key created
*/

View File

@@ -61,12 +61,26 @@ class datablock : public std::list<category>
/** @cond */
datablock(const datablock &);
datablock(datablock &&) = default;
datablock &operator=(const datablock &);
datablock &operator=(datablock &&) = default;
datablock(datablock &&db) noexcept
{
swap_(*this, db);
}
datablock &operator=(datablock db)
{
swap_(*this, db);
return *this;
}
/** @endcond */
friend void swap_(datablock &a, datablock &b) noexcept
{
std::swap(a.m_name, b.m_name);
std::swap(a.m_validator, b.m_validator);
std::swap(static_cast<std::list<category>&>(a), static_cast<std::list<category>&>(b));
}
// --------------------------------------------------------------------
/**
@@ -106,6 +120,15 @@ class datablock : public std::list<category>
*/
bool is_valid() const;
/**
* @brief Validates the content of this datablock and all its content
* and updates or removes the audit_conform category to match the result.
*
* @return true If the content is valid
* @return false If the content is not valid
*/
bool is_valid();
/**
* @brief Validates all contained data for valid links between parents and children
* as defined in the validator
@@ -169,7 +192,16 @@ class datablock : public std::list<category>
/**
* @brief Get the preferred order of the categories when writing them
*/
std::vector<std::string> get_tag_order() const;
[[deprecated("use get_item_order instead")]]
std::vector<std::string> get_tag_order() const
{
return get_item_order();
}
/**
* @brief Get the preferred order of the categories when writing them
*/
std::vector<std::string> get_item_order() const;
/**
* @brief Write out the contents to @a os
@@ -177,9 +209,9 @@ class datablock : public std::list<category>
void write(std::ostream &os) const;
/**
* @brief Write out the contents to @a os using the order defined in @a tag_order
* @brief Write out the contents to @a os using the order defined in @a item_name_order
*/
void write(std::ostream &os, const std::vector<std::string> &tag_order);
void write(std::ostream &os, const std::vector<std::string> &item_name_order);
/**
* @brief Friend operator<< to write datablock @a db to std::ostream @a os

View File

@@ -100,10 +100,22 @@ class file : public std::list<datablock>
}
/** @cond */
file(const file &) = default;
file(file &&) = default;
file &operator=(const file &) = default;
file &operator=(file &&) = default;
file(const file &rhs)
: std::list<datablock>(rhs)
{
}
file(file &&rhs)
{
this->swap(rhs);
}
file &operator=(file f)
{
this->swap(f);
return *this;
}
/** @endcond */
/**

View File

@@ -44,7 +44,7 @@
/** \file item.hpp
*
* This file contains the declaration of item but also the item_value and item_handle
* These handle the storage of and access to the data for a single data field.
* These handle the storage of and access to the data for a single data item.
*/
namespace cif
@@ -120,8 +120,6 @@ class item
if (r.ec != std::errc())
throw std::runtime_error("Could not format number");
assert(r.ptr >= buffer and r.ptr < buffer + sizeof(buffer));
*r.ptr = 0;
m_value.assign(buffer, r.ptr - buffer);
}
@@ -141,8 +139,6 @@ class item
if (r.ec != std::errc())
throw std::runtime_error("Could not format number");
assert(r.ptr >= buffer and r.ptr < buffer + sizeof(buffer));
*r.ptr = 0;
m_value.assign(buffer, r.ptr - buffer);
}
@@ -158,8 +154,6 @@ class item
if (r.ec != std::errc())
throw std::runtime_error("Could not format number");
assert(r.ptr >= buffer and r.ptr < buffer + sizeof(buffer));
*r.ptr = 0;
m_value.assign(buffer, r.ptr - buffer);
}
@@ -174,12 +168,21 @@ class item
/// \brief constructor for an item with name \a name and as
/// content value \a value
item(const std::string_view name, const std::string_view value)
item(const std::string_view name, std::string_view value)
: m_name(name)
, m_value(value)
{
}
/// \brief constructor for an item with name \a name and as
/// content value \a value
template<typename T, std::enable_if_t<std::is_same_v<T, std::string>, int> = 0>
item(const std::string_view name, T &&value)
: m_name(name)
, m_value(std::move(value))
{
}
/// \brief constructor for an item with name \a name and as
/// content the optional value \a value
template <typename T>
@@ -219,7 +222,8 @@ class item
/** @endcond */
std::string_view name() const { return m_name; } ///< Return the name of the item
std::string_view value() const { return m_value; } ///< Return the value of the item
std::string_view value() const & { return m_value; } ///< Return the value of the item
std::string value() const && { return std::move(m_value); } ///< Return the value of the item
/// \brief replace the content of the stored value with \a v
void value(std::string_view v) { m_value = v; }
@@ -227,10 +231,10 @@ class item
/// \brief empty means either null or unknown
bool empty() const { return m_value.empty(); }
/// \brief returns true if the field contains '.'
/// \brief returns true if the item contains '.'
bool is_null() const { return m_value == "."; }
/// \brief returns true if the field contains '?'
/// \brief returns true if the item contains '?'
bool is_unknown() const { return m_value == "?"; }
/// \brief the length of the value string
@@ -284,19 +288,16 @@ struct item_value
}
/** @cond */
item_value(item_value &&rhs)
item_value(item_value &&rhs) noexcept
: m_length(std::exchange(rhs.m_length, 0))
, m_storage(std::exchange(rhs.m_storage, 0))
{
}
item_value &operator=(item_value &&rhs)
item_value &operator=(item_value &&rhs) noexcept
{
if (this != &rhs)
{
m_length = std::exchange(rhs.m_length, m_length);
m_storage = std::exchange(rhs.m_storage, m_storage);
}
std::swap(m_length, rhs.m_length);
std::swap(m_storage, rhs.m_storage);
return *this;
}
@@ -363,8 +364,35 @@ struct item_handle
template <typename T>
item_handle &operator=(const T &value)
{
item v{ "", value };
assign_value(v);
assign_value(item{ "", value }.value());
return *this;
}
/**
* @brief Assign value @a value to the item referenced
*
* @tparam T Type of the value
* @param value The value
* @return reference to this item_handle
*/
template <typename T>
item_handle &operator=(T &&value)
{
assign_value(item{ "", std::move(value) }.value());
return *this;
}
/**
* @brief Assign value @a value to the item referenced
*
* @tparam T Type of the value
* @param value The value
* @return reference to this item_handle
*/
template <size_t N>
item_handle &operator=(const char (&value)[N])
{
assign_value(item{ "", std::move(value) }.value());
return *this;
}
@@ -464,14 +492,14 @@ struct item_handle
/** Easy way to test for an empty item */
explicit operator bool() const { return not empty(); }
/// is_null return true if the field contains '.'
/// is_null return true if the item contains '.'
bool is_null() const
{
auto txt = text();
return txt.length() == 1 and txt.front() == '.';
}
/// is_unknown returns true if the field contains '?'
/// is_unknown returns true if the item contains '?'
bool is_unknown() const
{
auto txt = text();
@@ -484,11 +512,11 @@ struct item_handle
/**
* @brief Construct a new item handle object
*
* @param column Column index
* @param item Item index
* @param row Reference to the row
*/
item_handle(uint16_t column, row_handle &row)
: m_column(column)
item_handle(uint16_t item, row_handle &row)
: m_item_ix(item)
, m_row_handle(row)
{
}
@@ -505,10 +533,10 @@ struct item_handle
private:
item_handle();
uint16_t m_column;
uint16_t m_item_ix;
row_handle &m_row_handle;
void assign_value(const item &value);
void assign_value(std::string_view value);
};
// So sad that older gcc implementations of from_chars did not support floats yet...
@@ -556,7 +584,7 @@ struct item_handle::item_value_as<T, std::enable_if_t<std::is_arithmetic_v<T> an
auto txt = ref.text();
if (txt.empty())
if (ref.empty())
result = 1;
else
{

View File

@@ -49,7 +49,7 @@ namespace cif
/**
* @brief Implementation of an iterator that can return
* multiple values in a tuple. Of course, that tuple can
* then used in structured binding to receive the values
* then be used in structured binding to receive the values
* in a for loop e.g.
*
* @tparam Category The category for this iterator
@@ -84,41 +84,38 @@ class iterator_impl
iterator_impl() = default;
iterator_impl(const iterator_impl &rhs) = default;
iterator_impl(iterator_impl &&rhs) = default;
template <typename C2, typename... T2s>
iterator_impl(const iterator_impl<C2, T2s...> &rhs)
: m_category(rhs.m_category)
, m_current(rhs.m_current)
: m_current(const_cast<row_handle&>(rhs.m_current))
, m_value(rhs.m_value)
, m_column_ix(rhs.m_column_ix)
, m_item_ix(rhs.m_item_ix)
{
}
template <typename IRowType>
iterator_impl(iterator_impl<IRowType, Ts...> &rhs)
: m_category(rhs.m_category)
, m_current(const_cast<row_type *>(rhs.m_current))
: m_current(const_cast<row_handle&>(rhs.m_current))
, m_value(rhs.m_value)
, m_column_ix(rhs.m_column_ix)
, m_item_ix(rhs.m_item_ix)
{
m_value = get(std::make_index_sequence<N>());
}
template <typename IRowType>
iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<uint16_t, N> &cix)
: m_category(rhs.m_category)
, m_current(rhs.m_current)
, m_column_ix(cix)
: m_current(const_cast<row_handle&>(rhs.m_current))
, m_item_ix(cix)
{
m_value = get(std::make_index_sequence<N>());
}
iterator_impl &operator=(const iterator_impl &i)
iterator_impl &operator=(iterator_impl i)
{
m_category = i.m_category;
m_current = i.m_current;
m_column_ix = i.m_column_ix;
m_value = i.m_value;
std::swap(m_current, i.m_current);
std::swap(m_item_ix, i.m_item_ix);
std::swap(m_value, i.m_value);
return *this;
}
@@ -136,18 +133,18 @@ class iterator_impl
operator const row_handle() const
{
return { *m_category, *m_current };
return m_current;
}
operator row_handle()
{
return { *m_category, *m_current };
return m_current;
}
iterator_impl &operator++()
{
if (m_current != nullptr)
m_current = m_current->m_next;
if (m_current)
m_current.m_row = m_current.m_row->m_next;
m_value = get(std::make_index_sequence<N>());
@@ -182,19 +179,12 @@ class iterator_impl
template <size_t... Is>
tuple_type get(std::index_sequence<Is...>) const
{
if (m_current != nullptr)
{
row_handle rh{ *m_category, *m_current };
return tuple_type{ rh[m_column_ix[Is]].template as<Ts>()... };
}
return {};
return m_current ? tuple_type{ m_current[m_item_ix[Is]].template as<Ts>()... } : tuple_type{};
}
category_type *m_category = nullptr;
row_type *m_current = nullptr;
row_handle m_current;
value_type m_value;
std::array<uint16_t, N> m_column_ix;
std::array<uint16_t, N> m_item_ix;
};
/**
@@ -219,37 +209,34 @@ class iterator_impl<Category>
using iterator_category = std::forward_iterator_tag;
using value_type = row_handle;
using difference_type = std::ptrdiff_t;
using pointer = row_handle;
using reference = row_handle;
using pointer = value_type *;
using reference = value_type &;
iterator_impl() = default;
iterator_impl(const iterator_impl &rhs) = default;
iterator_impl(iterator_impl &&rhs) = default;
template <typename C2>
iterator_impl(const iterator_impl<C2> &rhs)
: m_category(rhs.m_category)
, m_current(const_cast<row_type *>(rhs.m_current))
: m_current(const_cast<row_handle &>(rhs.m_current))
{
}
iterator_impl(Category &cat, row *current)
: m_category(const_cast<category_type *>(&cat))
, m_current(current)
: m_current(cat, *current)
{
}
template <typename IRowType>
iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<uint16_t, 0> &)
: m_category(rhs.m_category)
, m_current(rhs.m_current)
: m_current(const_cast<row_handle &>(rhs.m_current))
{
}
iterator_impl &operator=(const iterator_impl &i)
iterator_impl &operator=(iterator_impl i)
{
m_category = i.m_category;
m_current = i.m_current;
std::swap(m_current, i.m_current);
return *this;
}
@@ -257,7 +244,7 @@ class iterator_impl<Category>
reference operator*()
{
return { *m_category, *m_current };
return m_current;
}
pointer operator->()
@@ -267,18 +254,18 @@ class iterator_impl<Category>
operator const row_handle() const
{
return { *m_category, *m_current };
return m_current;
}
operator row_handle()
{
return { *m_category, *m_current };
return m_current;
}
iterator_impl &operator++()
{
if (m_current != nullptr)
m_current = m_current->m_next;
if (m_current)
m_current.m_row = m_current.m_row->m_next;
return *this;
}
@@ -308,8 +295,7 @@ class iterator_impl<Category>
/** @endcond */
private:
category_type *m_category = nullptr;
row_type *m_current = nullptr;
row_handle m_current;
};
/**
@@ -342,41 +328,38 @@ class iterator_impl<Category, T>
iterator_impl() = default;
iterator_impl(const iterator_impl &rhs) = default;
iterator_impl(iterator_impl &&rhs) = default;
template <typename C2, typename T2>
iterator_impl(const iterator_impl<C2, T2> &rhs)
: m_category(rhs.m_category)
, m_current(rhs.m_current)
: m_current(rhs.m_current)
, m_value(rhs.m_value)
, m_column_ix(rhs.m_column_ix)
, m_item_ix(rhs.m_item_ix)
{
}
template <typename IRowType>
iterator_impl(iterator_impl<IRowType, T> &rhs)
: m_category(rhs.m_category)
, m_current(const_cast<row_type *>(rhs.m_current))
: m_current(const_cast<row_handle&>(rhs.m_current))
, m_value(rhs.m_value)
, m_column_ix(rhs.m_column_ix)
{
m_value = get(m_current);
}
template <typename IRowType>
iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<uint16_t, 1> &cix)
: m_category(rhs.m_category)
, m_current(rhs.m_current)
, m_column_ix(cix[0])
, m_item_ix(rhs.m_item_ix)
{
m_value = get();
}
iterator_impl &operator=(const iterator_impl &i)
template <typename IRowType>
iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<uint16_t, 1> &cix)
: m_current(const_cast<row_handle&>(rhs.m_current))
, m_item_ix(cix[0])
{
m_category = i.m_category;
m_current = i.m_current;
m_column_ix = i.m_column_ix;
m_value = i.m_value;
m_value = get();
}
iterator_impl &operator=(iterator_impl i)
{
std::swap(m_current, i.m_current);
std::swap(m_item_ix, i.m_item_ix);
std::swap(m_value, i.m_value);
return *this;
}
@@ -394,18 +377,18 @@ class iterator_impl<Category, T>
operator const row_handle() const
{
return { *m_category, *m_current };
return m_current;
}
operator row_handle()
{
return { *m_category, *m_current };
return m_current;
}
iterator_impl &operator++()
{
if (m_current != nullptr)
m_current = m_current->m_next;
if (m_current)
m_current.m_row = m_current.m_row->m_next;
m_value = get();
@@ -439,19 +422,12 @@ class iterator_impl<Category, T>
private:
value_type get() const
{
if (m_current != nullptr)
{
row_handle rh{ *m_category, *m_current };
return rh[m_column_ix].template as<T>();
}
return {};
return m_current ? m_current[m_item_ix].template as<value_type>() : value_type{};
}
category_type *m_category = nullptr;
row_type *m_current = nullptr;
row_handle m_current;
value_type m_value;
uint16_t m_column_ix;
uint16_t m_item_ix;
};
// --------------------------------------------------------------------
@@ -482,8 +458,8 @@ class iterator_proxy
using iterator = iterator_impl<category_type, Ts...>;
using row_iterator = iterator_impl<category_type>;
iterator_proxy(category_type &cat, row_iterator pos, char const *const columns[N]);
iterator_proxy(category_type &cat, row_iterator pos, std::initializer_list<char const *> columns);
iterator_proxy(category_type &cat, row_iterator pos, char const *const items[N]);
iterator_proxy(category_type &cat, row_iterator pos, std::initializer_list<char const *> items);
iterator_proxy(iterator_proxy &&p);
iterator_proxy &operator=(iterator_proxy &&p);
@@ -492,8 +468,8 @@ class iterator_proxy
iterator_proxy &operator=(const iterator_proxy &) = delete;
/** @endcond */
iterator begin() const { return iterator(m_begin, m_column_ix); } ///< Return the iterator pointing to the first row
iterator end() const { return iterator(m_end, m_column_ix); } ///< Return the iterator pointing past the last row
iterator begin() const { return iterator(m_begin, m_item_ix); } ///< Return the iterator pointing to the first row
iterator end() const { return iterator(m_end, m_item_ix); } ///< Return the iterator pointing past the last row
bool empty() const { return m_begin == m_end; } ///< Return true if the range is empty
explicit operator bool() const { return not empty(); } ///< Easy way to detect if the range is empty
@@ -510,13 +486,13 @@ class iterator_proxy
std::swap(m_category, rhs.m_category);
std::swap(m_begin, rhs.m_begin);
std::swap(m_end, rhs.m_end);
std::swap(m_column_ix, rhs.m_column_ix);
std::swap(m_item_ix, rhs.m_item_ix);
}
private:
category_type *m_category;
row_iterator m_begin, m_end;
std::array<uint16_t, N> m_column_ix;
std::array<uint16_t, N> m_item_ix;
};
// --------------------------------------------------------------------
@@ -562,22 +538,23 @@ class conditional_iterator_proxy
reference operator*()
{
return *mBegin;
return *m_begin;
}
pointer operator->()
{
return &*mBegin;
m_current = *m_begin;
return &m_current;
}
conditional_iterator_impl &operator++()
{
while (mBegin != mEnd)
while (m_begin != m_end)
{
if (++mBegin == mEnd)
if (++m_begin == m_end)
break;
if (m_condition->operator()(mBegin))
if (m_condition->operator()(m_begin))
break;
}
@@ -591,18 +568,22 @@ class conditional_iterator_proxy
return result;
}
bool operator==(const conditional_iterator_impl &rhs) const { return mBegin == rhs.mBegin; }
bool operator!=(const conditional_iterator_impl &rhs) const { return mBegin != rhs.mBegin; }
bool operator==(const conditional_iterator_impl &rhs) const { return m_begin == rhs.m_begin; }
bool operator!=(const conditional_iterator_impl &rhs) const { return m_begin != rhs.m_begin; }
bool operator==(const row_iterator &rhs) const { return m_begin == rhs; }
bool operator!=(const row_iterator &rhs) const { return m_begin != rhs; }
template <typename IRowType, typename... ITs>
bool operator==(const iterator_impl<IRowType, ITs...> &rhs) const { return mBegin == rhs; }
bool operator==(const iterator_impl<IRowType, ITs...> &rhs) const { return m_begin == rhs; }
template <typename IRowType, typename... ITs>
bool operator!=(const iterator_impl<IRowType, ITs...> &rhs) const { return mBegin != rhs; }
bool operator!=(const iterator_impl<IRowType, ITs...> &rhs) const { return m_begin != rhs; }
private:
CategoryType *mCat;
base_iterator mBegin, mEnd;
CategoryType *m_cat;
base_iterator m_begin, m_end;
value_type m_current;
const condition *m_condition;
};
@@ -646,26 +627,26 @@ class conditional_iterator_proxy
/** @cond */
template <typename Category, typename... Ts>
iterator_proxy<Category, Ts...>::iterator_proxy(Category &cat, row_iterator pos, char const *const columns[N])
iterator_proxy<Category, Ts...>::iterator_proxy(Category &cat, row_iterator pos, char const *const items[N])
: m_category(&cat)
, m_begin(pos)
, m_end(cat.end())
{
for (uint16_t i = 0; i < N; ++i)
m_column_ix[i] = m_category->get_column_ix(columns[i]);
m_item_ix[i] = m_category->get_item_ix(items[i]);
}
template <typename Category, typename... Ts>
iterator_proxy<Category, Ts...>::iterator_proxy(Category &cat, row_iterator pos, std::initializer_list<char const *> columns)
iterator_proxy<Category, Ts...>::iterator_proxy(Category &cat, row_iterator pos, std::initializer_list<char const *> items)
: m_category(&cat)
, m_begin(pos)
, m_end(cat.end())
{
// static_assert(columns.size() == N, "The list of column names should be exactly the same as the list of requested columns");
// static_assert(items.size() == N, "The list of item names should be exactly the same as the list of requested items");
std::uint16_t i = 0;
for (auto column : columns)
m_column_ix[i++] = m_category->get_column_ix(column);
for (auto item : items)
m_item_ix[i++] = m_category->get_item_ix(item);
}
// --------------------------------------------------------------------
@@ -673,13 +654,13 @@ iterator_proxy<Category, Ts...>::iterator_proxy(Category &cat, row_iterator pos,
template <typename Category, typename... Ts>
conditional_iterator_proxy<Category, Ts...>::conditional_iterator_impl::conditional_iterator_impl(
Category &cat, row_iterator pos, const condition &cond, const std::array<uint16_t, N> &cix)
: mCat(&cat)
, mBegin(pos, cix)
, mEnd(cat.end(), cix)
: m_cat(&cat)
, m_begin(pos, cix)
, m_end(cat.end(), cix)
, m_condition(&cond)
{
if (m_condition == nullptr or m_condition->empty())
mBegin = mEnd;
m_begin = m_end;
}
template <typename Category, typename... Ts>
@@ -702,7 +683,7 @@ conditional_iterator_proxy<Category, Ts...>::conditional_iterator_proxy(Category
, mCBegin(pos)
, mCEnd(cat.end())
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "Number of column names should be equal to number of requested value types");
static_assert(sizeof...(Ts) == sizeof...(Ns), "Number of item names should be equal to number of requested value types");
if (m_condition)
{
@@ -715,7 +696,7 @@ conditional_iterator_proxy<Category, Ts...>::conditional_iterator_proxy(Category
mCBegin = mCEnd;
uint16_t i = 0;
((mCix[i++] = m_cat->get_column_ix(names)), ...);
((mCix[i++] = m_cat->get_item_ix(names)), ...);
}
template <typename Category, typename... Ts>

View File

@@ -72,7 +72,7 @@ class structure;
*
* The class atom is a kind of flyweight class. It can be copied
* with low overhead. All data is stored in the underlying mmCIF
* categories but some very often used fields are cached in the
* categories but some very often used items are cached in the
* impl.
*
* It is also possible to have symmetry copies of atoms. They
@@ -207,7 +207,7 @@ class atom
/// \brief Copy assignement operator
atom &operator=(const atom &rhs) = default;
/// \brief Return the field named @a name in the _atom_site category for this atom
/// \brief Return the item named @a name in the _atom_site category for this atom
std::string get_property(std::string_view name) const
{
if (not m_impl)
@@ -215,7 +215,7 @@ class atom
return m_impl->get_property(name);
}
/// \brief Return the field named @a name in the _atom_site category for this atom cast to an int
/// \brief Return the item named @a name in the _atom_site category for this atom cast to an int
int get_property_int(std::string_view name) const
{
if (not m_impl)
@@ -223,7 +223,7 @@ class atom
return m_impl->get_property_int(name);
}
/// \brief Return the field named @a name in the _atom_site category for this atom cast to a float
/// \brief Return the item named @a name in the _atom_site category for this atom cast to a float
float get_property_float(std::string_view name) const
{
if (not m_impl)
@@ -231,7 +231,7 @@ class atom
return m_impl->get_property_float(name);
}
/// \brief Set value for the field named @a name in the _atom_site category to @a value
/// \brief Set value for the item named @a name in the _atom_site category to @a value
void set_property(const std::string_view name, const std::string &value)
{
if (not m_impl)
@@ -239,7 +239,7 @@ class atom
m_impl->set_property(name, value);
}
/// \brief Set value for the field named @a name in the _atom_site category to @a value
/// \brief Set value for the item named @a name in the _atom_site category to @a value
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
void set_property(const std::string_view name, const T &value)
{
@@ -730,7 +730,7 @@ class sugar : public residue
/**
* @brief Return the sugar number in the glycosylation tree
*
* To store the sugar number, the auth_seq_id field has been overloaded
* To store the sugar number, the auth_seq_id item has been overloaded
* in the specification. But since a sugar number should be, ehm, a number
* and auth_seq_id is specified to contain a string, we do a check here
* to see if it really is a number.
@@ -1115,11 +1115,4 @@ class structure
std::vector<residue> m_non_polymers;
};
// --------------------------------------------------------------------
/// \brief Reconstruct all missing categories for an assumed PDBx file.
/// Some people believe that simply dumping some atom records is enough.
/// \param db The cif::datablock that hopefully contains some valid data
void reconstruct_pdbx(datablock &db);
} // namespace cif::mm

View File

@@ -143,9 +143,9 @@ class sac_parser
enum class CIFToken
{
Unknown,
UNKNOWN,
Eof,
END_OF_FILE,
DATA,
LOOP,
@@ -153,24 +153,24 @@ class sac_parser
SAVE_,
SAVE_NAME,
STOP,
Tag,
Value
ITEM_NAME,
VALUE
};
static constexpr const char *get_token_name(CIFToken token)
{
switch (token)
{
case CIFToken::Unknown: return "Unknown";
case CIFToken::Eof: return "Eof";
case CIFToken::UNKNOWN: return "Unknown";
case CIFToken::END_OF_FILE: return "Eof";
case CIFToken::DATA: return "DATA";
case CIFToken::LOOP: return "LOOP";
case CIFToken::GLOBAL: return "GLOBAL";
case CIFToken::SAVE_: return "SAVE";
case CIFToken::SAVE_NAME: return "SAVE+name";
case CIFToken::STOP: return "STOP";
case CIFToken::Tag: return "Tag";
case CIFToken::Value: return "Value";
case CIFToken::ITEM_NAME: return "Tag";
case CIFToken::VALUE: return "Value";
default: return "Invalid token parameter";
}
}
@@ -267,9 +267,9 @@ class sac_parser
QuotedString,
QuotedStringQuote,
UnquotedString,
Tag,
TextField,
TextFieldNL,
ItemName,
TextItem,
TextItemNL,
Reserved,
Value
};

View File

@@ -1,17 +1,17 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
*
* Copyright (c) 2023 NKI/AVL, Netherlands Cancer Institute
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -28,15 +28,17 @@
#include "cif++/file.hpp"
#include <system_error>
/**
* @file pdb.hpp
*
*
* This file presents the API to read and write files in the
* legacy and ancient PDB format.
*
*
* The code works on the basis of best effort since it is
* impossible to have correct round trip fidelity.
*
*
*/
namespace cif::pdb
@@ -81,7 +83,7 @@ inline void write(std::ostream &os, const file &f)
/** @brief Write out the data in @a db to file @a file
* in legacy PDB format or mmCIF format, depending on the
* filename extension.
*
*
* If extension of @a file is *.gz* the resulting file will
* be written in gzip compressed format.
*/
@@ -90,7 +92,7 @@ void write(const std::filesystem::path &file, const datablock &db);
/** @brief Write out the data in @a f to file @a file
* in legacy PDB format or mmCIF format, depending on the
* filename extension.
*
*
* If extension of @a file is *.gz* the resulting file will
* be written in gzip compressed format.
*/
@@ -99,6 +101,74 @@ inline void write(const std::filesystem::path &p, const file &f)
write(p, f.front());
}
// --------------------------------------------------------------------
/** \brief Reconstruct all missing categories for an assumed PDBx file.
*
* Some people believe that simply dumping some atom records is enough.
*
* \param file The cif::file that hopefully contains some valid data
* \param dictionary The mmcif dictionary to use
* \result Returns true if the resulting file is valid
*/
bool reconstruct_pdbx(file &pdbx_file, std::string_view dictionary = "mmcif_pdbx");
/** \brief This is an extension to cif::validator, use the logic in common
* PDBx files to see if the file is internally consistent.
*
* This function for now checks if the following categories are consistent:
*
* atom_site -> pdbx_poly_seq_scheme -> entity_poly_seq -> entity_poly -> entity
*
* Use the common \ref cif::VERBOSE flag to turn on diagnostic messages.
*
* This function throws a std::system_error in case of an error
*
* \param file The input file
* \param dictionary The mmcif dictionary to use
* \result Returns true if the file was valid and consistent
*/
bool is_valid_pdbx_file(const file &pdbx_file, std::string_view dictionary = "mmcif_pdbx");
/** \brief This is an extension to cif::validator, use the logic in common
* PDBx files to see if the file is internally consistent.
*
* This function for now checks if the following categories are consistent:
*
* atom_site -> pdbx_poly_seq_scheme -> entity_poly_seq -> entity_poly -> entity
*
* Use the common \ref cif::VERBOSE flag to turn on diagnostic messages.
*
* The dictionary is assumed to be specified in the file or to be the
* default mmcif_pdbx.dic dictionary.
*
* \param file The input file
* \param ec The error_code in case something was wrong
* \result Returns true if the file was valid and consistent
*/
bool is_valid_pdbx_file(const file &pdbx_file, std::error_code &ec);
/** \brief This is an extension to cif::validator, use the logic in common
* PDBx files to see if the file is internally consistent.
*
* This function for now checks if the following categories are consistent:
*
* atom_site -> pdbx_poly_seq_scheme -> entity_poly_seq -> entity_poly -> entity
*
* Use the common \ref cif::VERBOSE flag to turn on diagnostic messages.
*
* \param file The input file
* \param dictionary The dictionary to use
* \param ec The error_code in case something was wrong
* \result Returns true if the file was valid and consistent
*/
bool is_valid_pdbx_file(const file &pdbx_file, std::string_view dictionary,
std::error_code &ec);
// --------------------------------------------------------------------
// Other I/O related routines
@@ -106,7 +176,7 @@ inline void write(const std::filesystem::path &p, const file &f)
*
* The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program.
*
*
* @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned
*/
@@ -116,7 +186,7 @@ std::string get_HEADER_line(const datablock &data, std::string::size_type trunca
*
* The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program.
*
*
* @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned
*/
@@ -126,7 +196,7 @@ std::string get_COMPND_line(const datablock &data, std::string::size_type trunca
*
* The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program.
*
*
* @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned
*/
@@ -136,12 +206,11 @@ std::string get_SOURCE_line(const datablock &data, std::string::size_type trunca
*
* The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program.
*
*
* @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned
*/
std::string get_AUTHOR_line(const datablock &data, std::string::size_type truncate_at = 127);
} // namespace pdbx
} // namespace cif::pdb

View File

@@ -35,7 +35,10 @@
#if __has_include(<clipper/core/coords.h>)
#define HAVE_LIBCLIPPER 1
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wignored-qualifiers"
#include <clipper/core/coords.h>
#pragma GCC diagnostic pop
#endif
/** \file point.hpp

View File

@@ -51,7 +51,7 @@
* std::string name = rh["label_atom_id"].as<std::string>();
*
* // by index:
* uint16_t ix = atom_site.get_column_ix("label_atom_id");
* uint16_t ix = atom_site.get_item_ix("label_atom_id");
* assert(rh[ix].as<std::string() == name);
* @endcode
*
@@ -87,15 +87,15 @@ namespace detail
{
static constexpr size_t N = sizeof...(C);
get_row_result(const row_handle &r, std::array<uint16_t, N> &&columns)
get_row_result(const row_handle &r, std::array<uint16_t, N> &&items)
: m_row(r)
, m_columns(std::move(columns))
, m_items(std::move(items))
{
}
const item_handle operator[](uint16_t ix) const
{
return m_row[m_columns[ix]];
return m_row[m_items[ix]];
}
template <typename... Ts, std::enable_if_t<N == sizeof...(Ts), int> = 0>
@@ -107,11 +107,11 @@ namespace detail
template <typename... Ts, size_t... Is>
std::tuple<Ts...> get(std::index_sequence<Is...>) const
{
return std::tuple<Ts...>{ m_row[m_columns[Is]].template as<Ts>()... };
return std::tuple<Ts...>{ m_row[m_items[Is]].template as<Ts>()... };
}
const row_handle &m_row;
std::array<uint16_t, N> m_columns;
std::array<uint16_t, N> m_items;
};
// we want to be able to tie some variables to a get_row_result, for this we use tiewraps
@@ -208,6 +208,7 @@ class row_handle
friend class category;
friend class category_index;
friend class row_initializer;
template <typename, typename...> friend class iterator_impl;
row_handle() = default;
@@ -244,63 +245,70 @@ class row_handle
return not empty();
}
/// \brief return a cif::item_handle to the item in column @a column_ix
item_handle operator[](uint16_t column_ix)
/// \brief return a cif::item_handle to the item in item @a item_ix
item_handle operator[](uint16_t item_ix)
{
return empty() ? item_handle::s_null_item : item_handle(column_ix, *this);
return empty() ? item_handle::s_null_item : item_handle(item_ix, *this);
}
/// \brief return a const cif::item_handle to the item in column @a column_ix
const item_handle operator[](uint16_t column_ix) const
/// \brief return a const cif::item_handle to the item in item @a item_ix
const item_handle operator[](uint16_t item_ix) const
{
return empty() ? item_handle::s_null_item : item_handle(column_ix, const_cast<row_handle &>(*this));
return empty() ? item_handle::s_null_item : item_handle(item_ix, const_cast<row_handle &>(*this));
}
/// \brief return a cif::item_handle to the item in the column named @a column_name
item_handle operator[](std::string_view column_name)
/// \brief return a cif::item_handle to the item in the item named @a item_name
item_handle operator[](std::string_view item_name)
{
return empty() ? item_handle::s_null_item : item_handle(add_column(column_name), *this);
return empty() ? item_handle::s_null_item : item_handle(add_item(item_name), *this);
}
/// \brief return a const cif::item_handle to the item in the column named @a column_name
const item_handle operator[](std::string_view column_name) const
/// \brief return a const cif::item_handle to the item in the item named @a item_name
const item_handle operator[](std::string_view item_name) const
{
return empty() ? item_handle::s_null_item : item_handle(get_column_ix(column_name), const_cast<row_handle &>(*this));
return empty() ? item_handle::s_null_item : item_handle(get_item_ix(item_name), const_cast<row_handle &>(*this));
}
/// \brief Return an object that can be used in combination with cif::tie
/// to assign the values for the columns @a columns
/// to assign the values for the items @a items
template <typename... C>
auto get(C... columns) const
auto get(C... items) const
{
return detail::get_row_result<C...>(*this, { get_column_ix(columns)... });
return detail::get_row_result<C...>(*this, { get_item_ix(items)... });
}
/// \brief Return a tuple of values of types @a Ts for the columns @a columns
/// \brief Return a tuple of values of types @a Ts for the items @a items
template <typename... Ts, typename... C, std::enable_if_t<sizeof...(Ts) == sizeof...(C) and sizeof...(C) != 1, int> = 0>
std::tuple<Ts...> get(C... columns) const
std::tuple<Ts...> get(C... items) const
{
return detail::get_row_result<Ts...>(*this, { get_column_ix(columns)... });
return detail::get_row_result<Ts...>(*this, { get_item_ix(items)... });
}
/// \brief Get the value of column @a column cast to type @a T
/// \brief Get the value of item @a item cast to type @a T
template <typename T>
T get(const char *column) const
T get(const char *item) const
{
return operator[](get_column_ix(column)).template as<T>();
return operator[](get_item_ix(item)).template as<T>();
}
/// \brief assign each of the columns named in @a values to their respective value
/// \brief Get the value of item @a item cast to type @a T
template <typename T>
T get(std::string_view item) const
{
return operator[](get_item_ix(item)).template as<T>();
}
/// \brief assign each of the items named in @a values to their respective value
void assign(const std::vector<item> &values)
{
for (auto &value : values)
assign(value, true);
}
/** \brief assign the value @a value to the column named @a name
/** \brief assign the value @a value to the item named @a name
*
* If updateLinked it true, linked records are updated as well.
* That means that if column @a name is part of the link definition
* That means that if item @a name is part of the link definition
* and the link results in a linked record in another category
* this record in the linked category is updated as well.
*
@@ -310,13 +318,13 @@ class row_handle
void assign(std::string_view name, std::string_view value, bool updateLinked, bool validate = true)
{
assign(add_column(name), value, updateLinked, validate);
assign(add_item(name), value, updateLinked, validate);
}
/** \brief assign the value @a value to column at index @a column
/** \brief assign the value @a value to item at index @a item
*
* If updateLinked it true, linked records are updated as well.
* That means that if column @a column is part of the link definition
* That means that if item @a item is part of the link definition
* and the link results in a linked record in another category
* this record in the linked category is updated as well.
*
@@ -324,7 +332,7 @@ class row_handle
* checked to see if it conforms to the rules defined in the dictionary
*/
void assign(uint16_t column, std::string_view value, bool updateLinked, bool validate = true);
void assign(uint16_t item, std::string_view value, bool updateLinked, bool validate = true);
/// \brief compare two rows
bool operator==(const row_handle &rhs) const { return m_category == rhs.m_category and m_row == rhs.m_row; }
@@ -333,10 +341,10 @@ class row_handle
bool operator!=(const row_handle &rhs) const { return m_category != rhs.m_category or m_row != rhs.m_row; }
private:
uint16_t get_column_ix(std::string_view name) const;
std::string_view get_column_name(uint16_t ix) const;
uint16_t get_item_ix(std::string_view name) const;
std::string_view get_item_name(uint16_t ix) const;
uint16_t add_column(std::string_view name);
uint16_t add_item(std::string_view name);
row *get_row()
{
@@ -353,7 +361,7 @@ class row_handle
assign(i.name(), i.value(), updateLinked);
}
void swap(uint16_t column, row_handle &r);
void swap(uint16_t item, row_handle &r);
category *m_category = nullptr;
row *m_row = nullptr;

View File

@@ -317,7 +317,7 @@ inline char tolower(int ch)
// --------------------------------------------------------------------
/** \brief return a tuple consisting of the category and item name for @a tag
/** \brief return a tuple consisting of the category and item name for @a item_name
*
* The category name is stripped of its leading underscore character.
*
@@ -325,7 +325,19 @@ inline char tolower(int ch)
* cif 1.0 formatted data.
*/
std::tuple<std::string, std::string> split_tag_name(std::string_view tag);
[[deprecated("use split_item_name instead")]]
std::tuple<std::string, std::string> split_tag_name(std::string_view item_name);
/** \brief return a tuple consisting of the category and item name for @a item_name
*
* The category name is stripped of its leading underscore character.
*
* If no dot character was found, the category name is empty. That's for
* cif 1.0 formatted data.
*/
std::tuple<std::string, std::string> split_item_name(std::string_view item_name);
// --------------------------------------------------------------------

View File

@@ -113,16 +113,12 @@ namespace colour
/**
* @brief Struct for delimited strings.
*/
template <typename StringType>
struct coloured_string_t
{
static_assert(std::is_reference_v<StringType> or std::is_pointer_v<StringType>,
"String type must be pointer or reference");
/**
* @brief Construct a new coloured string t object
*/
coloured_string_t(StringType s, colour_type fc, colour_type bc, style_type st)
coloured_string_t(std::string_view s, colour_type fc, colour_type bc, style_type st)
: m_str(s)
, m_fore_colour(static_cast<int>(fc) + 30)
, m_back_colour(static_cast<int>(bc) + 40)
@@ -152,12 +148,14 @@ namespace colour
<< cs.m_str
<< "\033[0m";
}
else
os << cs.m_str;
return os;
}
/// @cond
StringType m_str;
std::string_view m_str;
int m_fore_colour, m_back_colour;
int m_style;
/// @endcond
@@ -191,39 +189,13 @@ namespace colour
* @param st Text style to use
*/
template <typename char_type>
inline auto coloured(const char_type *str,
template <typename T>
requires std::is_assignable_v<std::string_view, T>
inline auto coloured(T str,
colour::colour_type fg, colour::colour_type bg = colour::colour_type::none,
colour::style_type st = colour::style_type::regular)
{
return colour::detail::coloured_string_t<const char_type *>(str, fg, bg, st);
}
/// @brief Manipulator for coloured strings.
template <typename char_type, typename traits_type, typename allocator_type>
inline auto coloured(const std::basic_string<char_type, traits_type, allocator_type> &str,
colour::colour_type fg, colour::colour_type bg = colour::colour_type::none,
colour::style_type st = colour::style_type::regular)
{
return colour::detail::coloured_string_t<const std::basic_string<char_type, traits_type, allocator_type> &>(str, fg, bg, st);
}
/// @brief Manipulator for coloured strings.
template <typename char_type, typename traits_type, typename allocator_type>
inline auto coloured(std::basic_string<char_type, traits_type, allocator_type> &str,
colour::colour_type fg, colour::colour_type bg = colour::colour_type::none,
colour::style_type st = colour::style_type::regular)
{
return colour::detail::coloured_string_t<std::basic_string<char_type, traits_type, allocator_type> &>(str, fg, bg, st);
}
/// @brief Manipulator for coloured strings.
template <typename char_type, typename traits_type>
inline auto coloured(std::basic_string_view<char_type, traits_type> &str,
colour::colour_type fg, colour::colour_type bg = colour::colour_type::none,
colour::style_type st = colour::style_type::regular)
{
return colour::detail::coloured_string_t<std::basic_string_view<char_type, traits_type> &>(str, fg, bg, st);
return colour::detail::coloured_string_t(str, fg, bg, st);
}
// --------------------------------------------------------------------

View File

@@ -28,9 +28,11 @@
#include "cif++/text.hpp"
#include <cassert>
#include <filesystem>
#include <list>
#include <mutex>
#include <system_error>
#include <utility>
/**
@@ -49,27 +51,148 @@ namespace cif
struct category_validator;
// --------------------------------------------------------------------
// New: error_code
/**
* @brief The exception thrown when a validation error occurs
* @enum validation_error
*
* @brief A stronly typed class containing the error codes reported by @ref cif::validator and friends
*/
enum class validation_error
{
value_does_not_match_rx = 1, /**< The value of an item does not conform to the regular expression specified for it */
value_is_not_in_enumeration_list, /**< The value of an item is not in the list of values allowed */
not_a_known_primitive_type, /**< The type is not a known primitive type */
undefined_category, /**< Category has no definition in the dictionary */
unknown_item, /**< The item is not defined to be part of the category */
incorrect_item_validator, /**< Incorrectly specified validator for item */
missing_mandatory_items, /**< Missing mandatory items */
missing_key_items, /**< An index could not be constructed due to missing key items */
item_not_allowed_in_category, /**< Requested item allowed in category according to dictionary */
empty_file, /**< The file contains no datablocks */
empty_datablock, /**< The datablock contains no categories */
empty_category, /**< The category is empty */
not_valid_pdbx, /**< The file is not a valid PDBx file */
};
/**
* @brief The implementation for @ref validation_category error messages
*
*/
class validation_error : public std::exception
class validation_category_impl : public std::error_category
{
public:
/// @brief Constructor
validation_error(const std::string &msg);
/**
* @brief User friendly name
*
* @return const char*
*/
/// @brief Constructor
validation_error(const std::string &cat, const std::string &item,
const std::string &msg);
const char *name() const noexcept override
{
return "cif::validation";
}
/// @brief The description of the error
const char *what() const noexcept { return m_msg.c_str(); }
/**
* @brief Provide the error message as a string for the error code @a ev
*
* @param ev The error code
* @return std::string
*/
/// @cond
std::string m_msg;
/// @endcond
std::string message(int ev) const override
{
switch (static_cast<validation_error>(ev))
{
case validation_error::value_does_not_match_rx:
return "Value in item does not match regular expression";
case validation_error::value_is_not_in_enumeration_list:
return "Value is not in the enumerated list of valid values";
case validation_error::not_a_known_primitive_type:
return "The type is not a known primitive type";
case validation_error::undefined_category:
return "Category has no definition in the dictionary";
case validation_error::unknown_item:
return "Item is not defined to be part of the category";
case validation_error::incorrect_item_validator:
return "Incorrectly specified validator for item";
case validation_error::missing_mandatory_items:
return "Missing mandatory items";
case validation_error::missing_key_items:
return "An index could not be constructed due to missing key items";
case validation_error::item_not_allowed_in_category:
return "Requested item not allowed in category according to dictionary";
case validation_error::empty_file:
return "The file contains no datablocks";
case validation_error::empty_datablock:
return "The datablock contains no categories";
case validation_error::empty_category:
return "The category is empty";
case validation_error::not_valid_pdbx:
return "The file is not a valid PDBx file";
default:
assert(false);
return "unknown error code";
}
}
/**
* @brief Return whether two error codes are equivalent, always false in this case
*
*/
bool equivalent(const std::error_code & /*code*/, int /*condition*/) const noexcept override
{
return false;
}
};
/**
* @brief Return the implementation for the validation_category
*
* @return std::error_category&
*/
inline std::error_category &validation_category()
{
static validation_category_impl instance;
return instance;
}
inline std::error_code make_error_code(validation_error e)
{
return std::error_code(static_cast<int>(e), validation_category());
}
inline std::error_condition make_error_condition(validation_error e)
{
return std::error_condition(static_cast<int>(e), validation_category());
}
// --------------------------------------------------------------------
class validation_exception : public std::runtime_error
{
public:
validation_exception(validation_error err)
: validation_exception(make_error_code(err))
{
}
validation_exception(validation_error err, std::string_view category)
: validation_exception(make_error_code(err), category)
{
}
validation_exception(validation_error err, std::string_view category, std::string_view item)
: validation_exception(make_error_code(err), category, item)
{
}
validation_exception(std::error_code ec);
validation_exception(std::error_code ec, std::string_view category);
validation_exception(std::error_code ec, std::string_view category, std::string_view item);
};
// --------------------------------------------------------------------
@@ -85,6 +208,9 @@ enum class DDL_PrimitiveType
/// @brief Return the DDL_PrimitiveType encoded in @a s
DDL_PrimitiveType map_to_primitive_type(std::string_view s);
/// @brief Return the DDL_PrimitiveType encoded in @a s, error reporting variant
DDL_PrimitiveType map_to_primitive_type(std::string_view s, std::error_code &ec) noexcept;
struct regex_impl;
/**
@@ -146,6 +272,26 @@ struct type_validator
int compare(std::string_view a, std::string_view b) const;
};
/** @brief Item alias, items can be renamed over time
*/
struct item_alias
{
item_alias(const std::string &alias_name, const std::string &dictionary, const std::string &version)
: m_name(alias_name)
, m_dict(dictionary)
, m_vers(version)
{
}
item_alias(const item_alias &) = default;
item_alias &operator=(const item_alias &) = default;
std::string m_name; ///< The alias_name
std::string m_dict; ///< The dictionary in which it was known
std::string m_vers; ///< The version of the dictionary
};
/**
* @brief An item_validator binds a type_validator to an item in
* a category along with other information found in the dictionary.
@@ -157,28 +303,32 @@ struct type_validator
*/
struct item_validator
{
std::string m_tag; ///< The item name
std::string m_item_name; ///< The item name
bool m_mandatory; ///< Flag indicating this item is mandatory
const type_validator *m_type; ///< The type for this item
cif::iset m_enums; ///< If filled, the set of allowed values
std::string m_default; ///< If filled, a default value for this item
category_validator *m_category = nullptr; ///< The category_validator this item_validator belongs to
std::vector<item_alias> m_aliases; ///< The aliases for this item
/// @brief Compare based on the name
bool operator<(const item_validator &rhs) const
{
return icompare(m_tag, rhs.m_tag) < 0;
return icompare(m_item_name, rhs.m_item_name) < 0;
}
/// @brief Compare based on the name
bool operator==(const item_validator &rhs) const
{
return iequals(m_tag, rhs.m_tag);
return iequals(m_item_name, rhs.m_item_name);
}
/// @brief Validate the value in @a value for this item
/// Will throw a validation_error exception if it fails
/// Will throw a std::system_error exception if it fails
void operator()(std::string_view value) const;
/// @brief A more gentle version of value validation
bool validate_value(std::string_view value, std::error_code &ec) const noexcept;
};
/**
@@ -191,8 +341,8 @@ struct category_validator
{
std::string m_name; ///< The name of the category
std::vector<std::string> m_keys; ///< The list of items that make up the key
cif::iset m_groups; ///< The category groups this category belongs to
cif::iset m_mandatory_fields; ///< The mandatory fields for this category
cif::iset m_groups; ///< The category groups this category belongs to
cif::iset m_mandatory_items; ///< The mandatory items for this category
std::set<item_validator> m_item_validators; ///< The item validators for the items in this category
/// @brief return true if this category sorts before @a rhs
@@ -202,10 +352,13 @@ struct category_validator
}
/// @brief Add item_validator @a v to the list of item validators
void addItemValidator(item_validator &&v);
void add_item_validator(item_validator &&v);
/// @brief Return the item_validator for item @a tag, may return nullptr
const item_validator *get_validator_for_item(std::string_view tag) const;
/// @brief Return the item_validator for item @a item_name, may return nullptr
const item_validator *get_validator_for_item(std::string_view item_name) const;
/// @brief Return the item_validator for an item that has as alias name @a item_name, may return nullptr
const item_validator *get_validator_for_aliased_item(std::string_view item_name) const;
};
/**
@@ -284,7 +437,24 @@ class validator
std::vector<const link_validator *> get_links_for_child(std::string_view category) const;
/// @brief Bottleneck function to report an error in validation
void report_error(const std::string &msg, bool fatal) const;
void report_error(validation_error err, bool fatal = true) const
{
report_error(make_error_code(err), fatal);
}
/// @brief Bottleneck function to report an error in validation
void report_error(std::error_code ec, bool fatal = true) const;
/// @brief Bottleneck function to report an error in validation
void report_error(validation_error err, std::string_view category,
std::string_view item, bool fatal = true) const
{
report_error(make_error_code(err), category, item, fatal);
}
/// @brief Bottleneck function to report an error in validation
void report_error(std::error_code ec, std::string_view category,
std::string_view item, bool fatal = true) const;
const std::string &name() const { return m_name; } ///< Get the name of this validator
void set_name(const std::string &name) { m_name = name; } ///< Set the name of this validator

View File

@@ -2712,4 +2712,96 @@ _pdbx_chem_comp_audit.processing_site
HIS "Create component" 1999-07-08 EBI
HIS "Modify descriptor" 2011-06-04 RCSB
#
data_HOH
#
_chem_comp.id HOH
_chem_comp.name WATER
_chem_comp.type NON-POLYMER
_chem_comp.pdbx_type HETAS
_chem_comp.formula "H2 O"
_chem_comp.mon_nstd_parent_comp_id ?
_chem_comp.pdbx_synonyms ?
_chem_comp.pdbx_formal_charge 0
_chem_comp.pdbx_initial_date 1999-07-08
_chem_comp.pdbx_modified_date 2011-06-04
_chem_comp.pdbx_ambiguous_flag N
_chem_comp.pdbx_release_status REL
_chem_comp.pdbx_replaced_by ?
_chem_comp.pdbx_replaces MTO
_chem_comp.formula_weight 18.015
_chem_comp.one_letter_code ?
_chem_comp.three_letter_code HOH
_chem_comp.pdbx_model_coordinates_details ?
_chem_comp.pdbx_model_coordinates_missing_flag N
_chem_comp.pdbx_ideal_coordinates_details ?
_chem_comp.pdbx_ideal_coordinates_missing_flag N
_chem_comp.pdbx_model_coordinates_db_code 1NHE
_chem_comp.pdbx_subcomponent_list ?
_chem_comp.pdbx_processing_site RCSB
# #
loop_
_chem_comp_atom.comp_id
_chem_comp_atom.atom_id
_chem_comp_atom.alt_atom_id
_chem_comp_atom.type_symbol
_chem_comp_atom.charge
_chem_comp_atom.pdbx_align
_chem_comp_atom.pdbx_aromatic_flag
_chem_comp_atom.pdbx_leaving_atom_flag
_chem_comp_atom.pdbx_stereo_config
_chem_comp_atom.model_Cartn_x
_chem_comp_atom.model_Cartn_y
_chem_comp_atom.model_Cartn_z
_chem_comp_atom.pdbx_model_Cartn_x_ideal
_chem_comp_atom.pdbx_model_Cartn_y_ideal
_chem_comp_atom.pdbx_model_Cartn_z_ideal
_chem_comp_atom.pdbx_component_atom_id
_chem_comp_atom.pdbx_component_comp_id
_chem_comp_atom.pdbx_ordinal
HOH O O O 0 1 N N N -23.107 18.401 -21.626 -0.064 0.000 0.000 O HOH 1
HOH H1 1H H 0 1 N N N -22.157 18.401 -21.626 0.512 0.000 -0.776 H1 HOH 2
HOH H2 2H H 0 1 N N N -23.424 18.401 -20.730 0.512 0.000 0.776 H2 HOH 3
# #
loop_
_chem_comp_bond.comp_id
_chem_comp_bond.atom_id_1
_chem_comp_bond.atom_id_2
_chem_comp_bond.value_order
_chem_comp_bond.pdbx_aromatic_flag
_chem_comp_bond.pdbx_stereo_config
_chem_comp_bond.pdbx_ordinal
HOH O H1 SING N N 1
HOH O H2 SING N N 2
# #
loop_
_pdbx_chem_comp_descriptor.comp_id
_pdbx_chem_comp_descriptor.type
_pdbx_chem_comp_descriptor.program
_pdbx_chem_comp_descriptor.program_version
_pdbx_chem_comp_descriptor.descriptor
HOH SMILES ACDLabs 10.04 O
HOH SMILES_CANONICAL CACTVS 3.341 O
HOH SMILES CACTVS 3.341 O
HOH SMILES_CANONICAL "OpenEye OEToolkits" 1.5.0 O
HOH SMILES "OpenEye OEToolkits" 1.5.0 O
HOH InChI InChI 1.03 InChI=1S/H2O/h1H2
HOH InChIKey InChI 1.03 XLYOFNOQVPJJNP-UHFFFAOYSA-N
# #
loop_
_pdbx_chem_comp_identifier.comp_id
_pdbx_chem_comp_identifier.type
_pdbx_chem_comp_identifier.program
_pdbx_chem_comp_identifier.program_version
_pdbx_chem_comp_identifier.identifier
HOH "SYSTEMATIC NAME" ACDLabs 10.04 water
HOH "SYSTEMATIC NAME" "OpenEye OEToolkits" 1.5.0 oxidane
# #
loop_
_pdbx_chem_comp_audit.comp_id
_pdbx_chem_comp_audit.action_type
_pdbx_chem_comp_audit.date
_pdbx_chem_comp_audit.processing_site
HOH "Create component" 1999-07-08 RCSB
HOH "Modify descriptor" 2011-06-04 RCSB
##

File diff suppressed because it is too large Load Diff

View File

@@ -136,14 +136,17 @@ compound::compound(cif::datablock &db)
if (chemComp.size() != 1)
throw std::runtime_error("Invalid compound file, chem_comp should contain a single row");
cif::tie(m_id, m_name, m_type, m_formula, m_formula_weight, m_formal_charge) =
chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge");
std::string one_letter_code;
cif::tie(m_id, m_name, m_type, m_formula, m_formula_weight, m_formal_charge, one_letter_code, m_parent_id) =
chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge", "one_letter_code", "mon_nstd_parent_comp_id");
if (one_letter_code.length() == 1)
m_one_letter_code = one_letter_code.front();
// The name should not contain newline characters since that triggers validation errors later on
cif::replace_all(m_name, "\n", "");
m_group = "non-polymer";
auto &chemCompAtom = db["chem_comp_atom"];
for (auto row : chemCompAtom)
{
@@ -153,6 +156,9 @@ compound::compound(cif::datablock &db)
row.get("atom_id", "type_symbol", "charge", "pdbx_aromatic_flag", "pdbx_leaving_atom_flag", "pdbx_stereo_config",
"model_Cartn_x", "model_Cartn_y", "model_Cartn_z");
atom.type_symbol = atom_type_traits(type_symbol).type();
if (stereo_config.empty())
atom.stereo_config = stereo_config_type::N;
else
atom.stereo_config = parse_stereo_config_from_string(stereo_config);
m_atoms.push_back(std::move(atom));
}
@@ -163,17 +169,28 @@ compound::compound(cif::datablock &db)
compound_bond bond;
std::string valueOrder;
cif::tie(bond.atom_id[0], bond.atom_id[1], valueOrder, bond.aromatic, bond.stereo_config) = row.get("atom_id_1", "atom_id_2", "value_order", "pdbx_aromatic_flag", "pdbx_stereo_config");
if (valueOrder.empty())
bond.type = bond_type::sing;
else
bond.type = parse_bond_type_from_string(valueOrder);
m_bonds.push_back(std::move(bond));
}
}
compound::compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group)
: m_id(id)
, m_name(name)
, m_type(type)
, m_group(group)
compound::compound(cif::datablock &db, int)
{
auto &chemComp = db["chem_comp"];
if (chemComp.size() != 1)
throw std::runtime_error("Invalid compound file, chem_comp should contain a single row");
cif::tie(m_id, m_name) =
chemComp.front().get("id", "name");
cif::trim(m_name);
m_type = "NON-POLYMER";
auto &chemCompAtom = db["chem_comp_atom"];
for (auto row : chemCompAtom)
{
@@ -184,7 +201,6 @@ compound::compound(cif::datablock &db, const std::string &id, const std::string
atom.type_symbol = atom_type_traits(type_symbol).type();
m_formal_charge += atom.charge;
m_formula_weight += atom_type_traits(atom.type_symbol).weight();
m_atoms.push_back(std::move(atom));
}
@@ -209,11 +225,39 @@ compound::compound(cif::datablock &db, const std::string &id, const std::string
else
{
if (cif::VERBOSE > 0)
std::cerr << "Unimplemented chem_comp_bond.type " << btype << " in " << id << '\n';
std::cerr << "Unimplemented chem_comp_bond.type " << btype << " in " << db.name() << '\n';
bond.type = bond_type::sing;
}
m_bonds.push_back(std::move(bond));
}
// reconstruct a formula and weight
m_formula_weight = 0;
std::map<atom_type, int> f;
for (auto &atom : m_atoms)
f[atom.type_symbol] += 1;
if (f.count(atom_type::C))
{
atom_type_traits att(atom_type::C);
m_formula += att.symbol() + std::to_string(f[atom_type::C]) + ' ';
m_formula_weight += att.weight() * f[atom_type::C];
}
for (const auto &[type, count] : f)
{
if (type == atom_type::C)
continue;
atom_type_traits att(type);
m_formula += att.symbol() + std::to_string(count) + ' ';
m_formula_weight += att.weight() * count;
}
if (not m_formula.empty())
m_formula.pop_back();
}
compound_atom compound::get_atom_by_atom_id(const std::string &atom_id) const
@@ -260,12 +304,23 @@ float compound::bond_length(const std::string &atomId_1, const std::string &atom
auto a = get_atom_by_atom_id(atomId_1);
auto b = get_atom_by_atom_id(atomId_2);
result = distance(point{a.x, a.y, a.z}, point{b.x, b.y, b.z});
result = distance(point{ a.x, a.y, a.z }, point{ b.x, b.y, b.z });
}
return result;
}
// --------------------------------------------------------------------
bool compound::is_peptide() const
{
return iequals(m_type, "l-peptide linking") or iequals(m_type, "peptide linking");
}
bool compound::is_base() const
{
return iequals(m_type, "dna linking") or iequals(m_type, "rna linking");
}
// --------------------------------------------------------------------
// known amino acids and bases
@@ -316,7 +371,7 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto
compound_factory_impl();
compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next);
~compound_factory_impl()
virtual ~compound_factory_impl()
{
for (auto c : m_compounds)
delete c;
@@ -373,13 +428,15 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto
os << "CCD components.cif resource\n";
else
os << "CCD components file: " << std::quoted(m_file.string()) << '\n';
if (m_next)
m_next->describe(os);
}
private:
compound *create(const std::string &id);
protected:
compound_factory_impl(std::shared_ptr<compound_factory_impl> next);
virtual compound *create(const std::string &id);
std::shared_timed_mutex mMutex;
@@ -395,12 +452,17 @@ compound_factory_impl::compound_factory_impl()
{
}
compound_factory_impl::compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next)
: m_file(file)
, m_next(next)
compound_factory_impl::compound_factory_impl(std::shared_ptr<compound_factory_impl> next)
: m_next(next)
{
}
compound_factory_impl::compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next)
: compound_factory_impl(next)
{
m_file = file;
}
compound *compound_factory_impl::create(const std::string &id)
{
compound *result = nullptr;
@@ -476,6 +538,165 @@ compound *compound_factory_impl::create(const std::string &id)
// --------------------------------------------------------------------
class local_compound_factory_impl : public compound_factory_impl
{
public:
local_compound_factory_impl(const cif::file &file, std::shared_ptr<compound_factory_impl> next)
: compound_factory_impl(next)
, m_local_file(file)
{
const std::regex peptideRx("(?:[lmp]-)?peptide", std::regex::icase);
for (const auto &[id, name, threeLetterCode, group] :
file["comp_list"]["chem_comp"].rows<std::string, std::string, std::string, std::string>("id", "name", "three_letter_code", "group"))
{
auto &rdb = m_local_file["comp_" + id];
if (rdb.empty())
{
std::cerr << "Missing data in restraint file for id " + id + '\n';
continue;
}
construct_compound(rdb, id, name, threeLetterCode, group);
}
}
compound *create(const std::string &id) override;
private:
compound *construct_compound(const datablock &db, const std::string &id, const std::string &name, const std::string &three_letter_code, const std::string &group);
cif::file m_local_file;
};
compound *local_compound_factory_impl::create(const std::string &id)
{
compound *result = nullptr;
for (auto &db : m_local_file)
{
if (db.name() == "comp_" + id)
{
auto chem_comp = db.get("chem_comp");
if (not chem_comp)
break;
try
{
const auto &[id, name, threeLetterCode, group] =
chem_comp->front().get<std::string, std::string, std::string, std::string>("id", "name", "three_letter_code", "group");
result = construct_compound(db, id, name, threeLetterCode, group);
}
catch (const std::exception &ex)
{
std::throw_with_nested(std::runtime_error("Error loading compound " + id));
}
break;
}
}
return result;
}
compound *local_compound_factory_impl::construct_compound(const datablock &rdb, const std::string &id,
const std::string &name, const std::string &three_letter_code, const std::string &group)
{
cif::datablock db(id);
float formula_weight = 0;
int formal_charge = 0;
std::map<std::string,size_t> formula_data;
for (size_t ord = 1; const auto &[atom_id, type_symbol, type, charge, x, y, z] :
rdb["chem_comp_atom"].rows<std::string, std::string, std::string, int, float, float, float>(
"atom_id", "type_symbol", "type", "charge", "x", "y", "z"))
{
auto atom = cif::atom_type_traits(type_symbol);
formula_weight += atom.weight();
formula_data[type_symbol] += 1;
db["chem_comp_atom"].emplace({
{ "comp_id", id },
{ "atom_id", atom_id },
{ "type_symbol", type_symbol },
{ "charge", charge },
{ "model_Cartn_x", x, 3 },
{ "model_Cartn_y", y, 3 },
{ "model_Cartn_z", z, 3 },
{ "pdbx_ordinal", ord++ }
});
formal_charge += charge;
}
for (size_t ord = 1; const auto &[atom_id_1, atom_id_2, type, aromatic] :
rdb["chem_comp_bond"].rows<std::string, std::string, std::string, bool>("atom_id_1", "atom_id_2", "type", "aromatic"))
{
std::string value_order("SING");
if (cif::iequals(type, "single") or cif::iequals(type, "sing"))
value_order = "SING";
else if (cif::iequals(type, "double") or cif::iequals(type, "doub"))
value_order = "DOUB";
else if (cif::iequals(type, "triple") or cif::iequals(type, "trip"))
value_order = "TRIP";
db["chem_comp_bond"].emplace({
{ "comp_id", id },
{ "atom_id_1", atom_id_1 },
{ "atom_id_2", atom_id_2 },
{ "value_order", value_order },
{ "pdbx_aromatic_flag", aromatic },
// TODO: fetch stereo_config info from chem_comp_chir
{ "pdbx_ordinal", ord++ }
});
}
db.emplace_back(rdb["pdbx_chem_comp_descriptor"]);
std::string formula;
for (bool first = true; const auto &[symbol, count]: formula_data)
{
if (std::exchange(first, false))
formula += ' ';
formula += symbol;
if (count > 1)
formula += std::to_string(count);
}
std::string type;
if (cif::iequals(group, "peptide") or cif::iequals(group, "l-peptide") or cif::iequals(group, "l-peptide linking"))
type = "L-PEPTIDE LINKING";
else if (cif::iequals(group, "dna"))
type = "DNA LINKING";
else if (cif::iequals(group, "rna"))
type = "RNA LINKING";
else
type = "NON-POLYMER";
db["chem_comp"].emplace({
{ "id", id },
{ "name", name },
{ "type", type },
{ "formula", formula },
{ "pdbx_formal_charge", formal_charge },
{ "formula_weight", formula_weight },
{ "three_letter_code", three_letter_code }
});
std::shared_lock lock(mMutex);
auto result = new compound(db);
m_compounds.push_back(result);
return result;
}
// --------------------------------------------------------------------
std::unique_ptr<compound_factory> compound_factory::s_instance;
thread_local std::unique_ptr<compound_factory> compound_factory::tl_instance;
bool compound_factory::s_use_thread_local_instance;
@@ -553,56 +774,111 @@ void compound_factory::push_dictionary(const fs::path &inDictFile)
}
}
void compound_factory::push_dictionary(const cif::file &inDictFile)
{
try
{
m_impl.reset(new local_compound_factory_impl(inDictFile, m_impl));
}
catch (const std::exception &)
{
std::throw_with_nested(std::runtime_error("Error loading dictionary from local mmCIF file"));
}
}
void compound_factory::pop_dictionary()
{
if (m_impl)
m_impl = m_impl->next();
}
const compound *compound_factory::create(std::string id)
const compound *compound_factory::create(std::string_view id)
{
auto result = m_impl ? m_impl->get(id) : nullptr;
auto result = m_impl ? m_impl->get(std::string{ id }) : nullptr;
if (not result)
report_missing_compound(id);
return result;
}
bool compound_factory::is_known_peptide(const std::string &resName) const
bool compound_factory::is_known_peptide(const std::string &res_name) const
{
return kAAMap.count(resName) > 0;
return kAAMap.count(res_name) > 0;
}
bool compound_factory::is_known_base(const std::string &resName) const
bool compound_factory::is_known_base(const std::string &res_name) const
{
return kBaseMap.count(resName) > 0;
return kBaseMap.count(res_name) > 0;
}
void compound_factory::report_missing_compound(const std::string &compound_id)
/// Return whether @a res_name is a peptide
bool compound_factory::is_peptide(std::string_view res_name) const
{
bool result = is_std_peptide(res_name);
if (not result and m_impl)
{
auto compound = const_cast<compound_factory&>(*this).create(res_name);
result = compound != nullptr and compound->is_peptide();
}
return result;
}
/// Return whether @a res_name is a base
bool compound_factory::is_base(std::string_view res_name) const
{
bool result = is_std_base(res_name);
if (not result and m_impl)
{
auto compound = const_cast<compound_factory&>(*this).create(res_name);
result = compound != nullptr and compound->is_base();
}
return result;
}
/// Return whether @a res_name is one of the standard peptides
bool compound_factory::is_std_peptide(std::string_view res_name) const
{
return kAAMap.count(std::string{ res_name }) > 0;
}
/// Return whether @a res_name is one of the standard bases
bool compound_factory::is_std_base(std::string_view res_name) const
{
return kBaseMap.count(std::string{ res_name }) > 0;
}
/// Return whether @a res_name is a monomer (either base or peptide)
bool compound_factory::is_monomer(std::string_view res_name) const
{
return is_peptide(res_name) or is_base(res_name);
}
void compound_factory::report_missing_compound(std::string_view compound_id)
{
static bool s_reported = false;
if (std::exchange(s_reported, true) == false)
{
using namespace cif::colour;
std::clog << "\n" << cif::coloured("Configuration error:", white, red) << "\n\n"
std::clog << "\n"
<< cif::coloured("Configuration error:", white, red) << "\n\n"
<< "The attempt to retrieve compound information for " << std::quoted(compound_id) << " failed.\n\n"
<< "This information is searched for in a CCD file called components.cif or\n"
<< "components.cif.gz which should be located in one of the following directories:\n\n";
cif::list_data_directories(std::clog);
std::clog << "\n(Note that you can add a directory to the search paths by setting the \n"
<< "LIBCIFPP_DATA_DIR environmental variable)\n\n";
#if defined(CACHE_DIR)
#if defined(CACHE_DIR)
std::clog << "On Linux an optional cron script might have been installed that automatically updates\n"
<< "components.cif and mmCIF dictionary files. This script only works when the file\n"
<< "libcifpp.conf contains an uncommented line with the text:\n\n"
<< "update=true\n\n"
<< "If you do not have a working cron script, you can manually update the files\n"
<< "in /var/cache/libcifpp using the following commands:\n\n"
<< "curl -o " << CACHE_DIR << "/components.cif https://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz\n"
<< "curl -o " << CACHE_DIR << "/mmcif_pdbx.dic https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic.gz\n"
<< "curl -o " << CACHE_DIR << "/components.cif https://files.wwpdb.org/pub/pdb/data/monomers/components.cif\n"
<< "curl -o " << CACHE_DIR << "/mmcif_pdbx.dic https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic\n"
<< "curl -o " << CACHE_DIR << "/mmcif_ma.dic https://github.com/ihmwg/ModelCIF/raw/master/dist/mmcif_ma.dic\n\n";
#endif
@@ -613,9 +889,9 @@ void compound_factory::report_missing_compound(const std::string &compound_id)
}
else
std::clog << "No compound factory objects are created since none of the data sources is found.\n";
cif::list_file_resources(std::clog);
std::clog.flush();
}
}

View File

@@ -30,17 +30,17 @@
namespace cif
{
iset get_category_fields(const category &cat)
iset get_category_items(const category &cat)
{
return cat.key_fields();
return cat.key_items();
}
uint16_t get_column_ix(const category &cat, std::string_view col)
uint16_t get_item_ix(const category &cat, std::string_view col)
{
return cat.get_column_ix(col);
return cat.get_item_ix(col);
}
bool is_column_type_uchar(const category &cat, std::string_view col)
bool is_item_type_uchar(const category &cat, std::string_view col)
{
bool result = false;
@@ -63,14 +63,14 @@ namespace detail
condition_impl *key_equals_condition_impl::prepare(const category &c)
{
m_item_ix = c.get_column_ix(m_item_tag);
m_icase = is_column_type_uchar(c, m_item_tag);
m_item_ix = c.get_item_ix(m_item_name);
m_icase = is_item_type_uchar(c, m_item_name);
if (c.get_cat_validator() != nullptr and
c.key_field_indices().contains(m_item_ix) and
c.key_field_indices().size() == 1)
c.key_item_indices().contains(m_item_ix) and
c.key_item_indices().size() == 1)
{
m_single_hit = c[{ { m_item_tag, m_value } }];
m_single_hit = c[{ { m_item_name, m_value } }];
}
return this;

View File

@@ -38,21 +38,6 @@ datablock::datablock(const datablock &db)
cat.update_links(*this);
}
datablock &datablock::operator=(const datablock &db)
{
if (this != &db)
{
std::list<category>::operator=(db);
m_name = db.m_name;
m_validator = db.m_validator;
for (auto &cat : *this)
cat.update_links(*this);
}
return *this;
}
void datablock::set_validator(const validator *v)
{
m_validator = v;
@@ -85,13 +70,47 @@ bool datablock::is_valid() const
return result;
}
bool datablock::is_valid()
{
if (m_validator == nullptr)
throw std::runtime_error("Validator not specified");
bool result = true;
for (auto &cat : *this)
result = cat.is_valid() and result;
// Add or remove the audit_conform block here.
if (result)
{
// If the dictionary declares an audit_conform category, put it in,
// but only if it does not exist already!
if (m_validator->get_validator_for_category("audit_conform") != nullptr)
{
auto &audit_conform = operator[]("audit_conform");
audit_conform.clear();
audit_conform.emplace({
// clang-format off
{ "dict_name", m_validator->name() },
{ "dict_version", m_validator->version() }
// clang-format on
});
}
}
else
erase(std::find_if(begin(), end(), [](category &cat) { return cat.name() == "audit_conform"; }), end());
return result;
}
bool datablock::validate_links() const
{
bool result = true;
for (auto &cat : *this)
result = cat.validate_links() and result;
return result;
}
@@ -143,13 +162,6 @@ std::tuple<datablock::iterator, bool> datablock::emplace(std::string_view name)
if (iequals(name, i->name()))
{
is_new = false;
if (i != begin())
{
auto n = std::next(i);
splice(begin(), *this, i, n);
}
break;
}
@@ -158,30 +170,32 @@ std::tuple<datablock::iterator, bool> datablock::emplace(std::string_view name)
if (is_new)
{
auto &c = emplace_front(name);
c.set_validator(m_validator, *this);
i = insert(end(), {name});
i->set_validator(m_validator, *this);
}
return std::make_tuple(begin(), is_new);
assert(i != end());
return std::make_tuple(i, is_new);
}
std::vector<std::string> datablock::get_tag_order() const
std::vector<std::string> datablock::get_item_order() const
{
std::vector<std::string> result;
// for entry and audit_conform on top
auto ci = find_if(begin(), end(), [](const category &cat) { return cat.name() == "entry"; });
auto ci = find_if(begin(), end(), [](const category &cat)
{ return cat.name() == "entry"; });
if (ci != end())
{
auto cto = ci->get_tag_order();
auto cto = ci->get_item_order();
result.insert(result.end(), cto.begin(), cto.end());
}
ci = find_if(begin(), end(), [](const category &cat) { return cat.name() == "audit_conform"; });
ci = find_if(begin(), end(), [](const category &cat)
{ return cat.name() == "audit_conform"; });
if (ci != end())
{
auto cto = ci->get_tag_order();
auto cto = ci->get_item_order();
result.insert(result.end(), cto.begin(), cto.end());
}
@@ -189,62 +203,131 @@ std::vector<std::string> datablock::get_tag_order() const
{
if (cat.name() == "entry" or cat.name() == "audit_conform")
continue;
auto cto = cat.get_tag_order();
auto cto = cat.get_item_order();
result.insert(result.end(), cto.begin(), cto.end());
}
return result;
}
namespace
{
using elem_t = std::tuple<std::string, int, bool>;
using cat_order_t = std::vector<elem_t>;
using iter_t = cat_order_t::iterator;
inline int get_count(iter_t i)
{
return std::get<1>(*i);
}
inline bool is_on_stack(iter_t i)
{
return std::get<2>(*i);
}
void calculate_cat_order(cat_order_t &cat_order, iter_t i, const validator &validator)
{
if (i == cat_order.end() or get_count(i) >= 0)
return;
auto &&[cat, count, on_stack] = *i;
on_stack = true;
int parent_count = 0;
for (auto link : validator.get_links_for_child(cat))
{
auto ei = std::find_if(cat_order.begin(), cat_order.end(), [parent = link->m_parent_category](elem_t &a)
{ return std::get<0>(a) == parent; });
if (ei == cat_order.end())
continue;
if (not is_on_stack(ei))
calculate_cat_order(cat_order, ei, validator);
parent_count += get_count(ei);
}
count = parent_count + 1;
}
} // namespace
void datablock::write(std::ostream &os) const
{
os << "data_" << m_name << '\n'
<< "# \n";
// mmcif support, sort of. First write the 'entry' Category
// and if it exists, _AND_ we have a Validator, write out the
// audit_conform record.
for (auto &cat : *this)
if (m_validator and size() > 0)
{
if (cat.name() != "entry")
continue;
// base order on parent child relationships, parents first
cat.write(os);
cat_order_t cat_order;
break;
for (auto &cat : *this)
{
if (cat.name() == "entry" or cat.name() == "audit_conform")
continue;
cat_order.emplace_back(cat.name(), -1, false);
}
for (auto i = cat_order.begin(); i != cat_order.end(); ++i)
calculate_cat_order(cat_order, i, *m_validator);
std::sort(cat_order.begin(), cat_order.end(), [](const elem_t &a, const elem_t &b)
{
const auto &[cat_a, count_a, on_stack_a] = a;
const auto &[cat_b, count_b, on_stack_b] = b;
int d = std::get<1>(a) - std::get<1>(b);
if (d == 0)
d = cat_b.compare(cat_a);
return d < 0; });
if (auto entry = get("entry"); entry != nullptr)
entry->write(os);
if (auto audit_conform = get("audit_conform"); audit_conform != nullptr)
audit_conform->write(os);
for (auto &&[cat, count, on_stack] : cat_order)
get(cat)->write(os);
}
// If the dictionary declares an audit_conform category, put it in,
// but only if it does not exist already!
if (get("audit_conform"))
get("audit_conform")->write(os);
else if (m_validator != nullptr and m_validator->get_validator_for_category("audit_conform") != nullptr)
else
{
category auditConform("audit_conform");
auditConform.emplace({
{"dict_name", m_validator->name()},
{"dict_version", m_validator->version()}});
auditConform.write(os);
}
// mmcif support, sort of. First write the 'entry' Category
// and if it exists, _AND_ we have a Validator, write out the
// audit_conform record.
for (auto &cat : *this)
{
if (cat.name() != "entry" and cat.name() != "audit_conform")
cat.write(os);
if (auto entry = get("entry"); entry != nullptr)
entry->write(os);
// If the dictionary declares an audit_conform category, put it in,
// but only if it does not exist already!
if (auto audit_conform = get("audit_conform"); audit_conform != nullptr)
audit_conform->write(os);
for (auto &cat : *this)
{
if (cat.name() != "entry" and cat.name() != "audit_conform")
cat.write(os);
}
}
}
void datablock::write(std::ostream &os, const std::vector<std::string> &tag_order)
void datablock::write(std::ostream &os, const std::vector<std::string> &item_name_order)
{
os << "data_" << m_name << '\n'
<< "# \n";
std::vector<std::string> cat_order;
for (auto &o : tag_order)
std::vector<std::string> cat_order{ "entry", "audit_conform" };
for (auto &o : item_name_order)
{
std::string cat_name, item_name;
std::tie(cat_name, item_name) = split_tag_name(o);
std::tie(cat_name, item_name) = split_item_name(o);
if (find_if(cat_order.rbegin(), cat_order.rend(), [cat_name](const std::string &s) -> bool
{ return iequals(cat_name, s); }) == cat_order.rend())
cat_order.push_back(cat_name);
@@ -257,10 +340,10 @@ void datablock::write(std::ostream &os, const std::vector<std::string> &tag_orde
continue;
std::vector<std::string> items;
for (auto &o : tag_order)
for (auto &o : item_name_order)
{
std::string cat_name, item_name;
std::tie(cat_name, item_name) = split_tag_name(o);
std::tie(cat_name, item_name) = split_item_name(o);
if (cat_name == c)
items.push_back(item_name);
@@ -282,6 +365,10 @@ void datablock::write(std::ostream &os, const std::vector<std::string> &tag_orde
bool datablock::operator==(const datablock &rhs) const
{
// shortcut
if (this == &rhs)
return true;
auto &dbA = *this;
auto &dbB = rhs;
@@ -337,7 +424,7 @@ bool datablock::operator==(const datablock &rhs) const
++catA_i;
else
{
if (not (*dbA.get(*catA_i) == *dbB.get(*catB_i)))
if (not(*dbA.get(*catA_i) == *dbB.get(*catB_i)))
return false;
++catA_i;
++catB_i;
@@ -347,4 +434,4 @@ bool datablock::operator==(const datablock &rhs) const
return true;
}
} // namespace cif::cif
} // namespace cif

View File

@@ -50,7 +50,7 @@ class dictionary_parser : public parser
try
{
while (m_lookahead != CIFToken::Eof)
while (m_lookahead != CIFToken::END_OF_FILE)
{
switch (m_lookahead)
{
@@ -87,7 +87,7 @@ class dictionary_parser : public parser
error("Undefined category '" + iv.first);
for (auto &v : iv.second)
const_cast<category_validator *>(cv)->addItemValidator(std::move(v));
const_cast<category_validator *>(cv)->add_item_validator(std::move(v));
}
// check all item validators for having a typeValidator
@@ -128,7 +128,7 @@ class dictionary_parser : public parser
datablock::iterator cat = dict.end();
match(CIFToken::SAVE_NAME);
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag)
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::ITEM_NAME)
{
if (m_lookahead == CIFToken::LOOP)
{
@@ -136,30 +136,30 @@ class dictionary_parser : public parser
match(CIFToken::LOOP);
std::vector<std::string> tags;
while (m_lookahead == CIFToken::Tag)
std::vector<std::string> item_names;
while (m_lookahead == CIFToken::ITEM_NAME)
{
std::string catName, item_name;
std::tie(catName, item_name) = split_tag_name(m_token_value);
std::tie(catName, item_name) = split_item_name(m_token_value);
if (cat == dict.end())
std::tie(cat, std::ignore) = dict.emplace(catName);
else if (not iequals(cat->name(), catName))
error("inconsistent categories in loop_");
tags.push_back(item_name);
match(CIFToken::Tag);
item_names.push_back(item_name);
match(CIFToken::ITEM_NAME);
}
while (m_lookahead == CIFToken::Value)
while (m_lookahead == CIFToken::VALUE)
{
cat->emplace({});
auto row = cat->back();
for (auto tag : tags)
for (auto item_name : item_names)
{
row[tag] = m_token_value;
match(CIFToken::Value);
row[item_name] = m_token_value;
match(CIFToken::VALUE);
}
}
@@ -168,18 +168,18 @@ class dictionary_parser : public parser
else
{
std::string catName, item_name;
std::tie(catName, item_name) = split_tag_name(m_token_value);
std::tie(catName, item_name) = split_item_name(m_token_value);
if (cat == dict.end() or not iequals(cat->name(), catName))
std::tie(cat, std::ignore) = dict.emplace(catName);
match(CIFToken::Tag);
match(CIFToken::ITEM_NAME);
if (cat->empty())
cat->emplace({});
cat->back()[item_name] = m_token_value;
match(CIFToken::Value);
match(CIFToken::VALUE);
}
}
@@ -191,7 +191,7 @@ class dictionary_parser : public parser
std::vector<std::string> keys;
for (auto k : dict["category_key"])
keys.push_back(std::get<1>(split_tag_name(k["name"].as<std::string>())));
keys.push_back(std::get<1>(split_item_name(k["name"].as<std::string>())));
iset groups;
for (auto g : dict["category_group"])
@@ -224,20 +224,27 @@ class dictionary_parser : public parser
// }
// }
std::vector<item_alias> aliases;
for (const auto &[alias_name, dictionary, version] :
dict["item_aliases"].rows<std::string,std::string,std::string>("alias_name", "dictionary", "version"))
{
aliases.emplace_back(alias_name, dictionary, version);
}
// collect the dict from our dataBlock and construct validators
for (auto i : dict["item"])
{
std::string tagName, category, mandatory;
cif::tie(tagName, category, mandatory) = i.get("name", "category_id", "mandatory_code");
std::string item, category, mandatory;
cif::tie(item, category, mandatory) = i.get("name", "category_id", "mandatory_code");
std::string cat_name, item_name;
std::tie(cat_name, item_name) = split_tag_name(tagName);
std::tie(cat_name, item_name) = split_item_name(item);
if (cat_name.empty() or item_name.empty())
error("Invalid tag name in _item.name " + tagName);
error("Invalid item name in _item.name " + item);
if (not iequals(category, cat_name) and not(category.empty() or category == "?"))
error("specified category id does match the implicit category name for tag '" + tagName + '\'');
error("specified category id does match the implicit category name for item '" + item + '\'');
else
category = cat_name;
@@ -245,7 +252,7 @@ class dictionary_parser : public parser
auto vi = find(ivs.begin(), ivs.end(), item_validator{ item_name });
if (vi == ivs.end())
ivs.push_back(item_validator{ item_name, iequals(mandatory, "yes"), tv, ess, defaultValue /*, defaultIsNull*/ });
ivs.push_back(item_validator{ item_name, iequals(mandatory, "yes"), tv, ess, defaultValue, nullptr, std::move(aliases) });
else
{
// need to update the itemValidator?
@@ -253,22 +260,22 @@ class dictionary_parser : public parser
{
if (VERBOSE > 2)
{
std::cerr << "inconsistent mandatory value for " << tagName << " in dictionary\n";
std::cerr << "inconsistent mandatory value for " << item << " in dictionary\n";
if (iequals(tagName, saveFrameName))
if (iequals(item, saveFrameName))
std::cerr << "choosing " << mandatory << '\n';
else
std::cerr << "choosing " << (vi->m_mandatory ? "Y" : "N") << '\n';
}
if (iequals(tagName, saveFrameName))
if (iequals(item, saveFrameName))
vi->m_mandatory = (iequals(mandatory, "yes"));
}
if (vi->m_type != nullptr and tv != nullptr and vi->m_type != tv)
{
if (VERBOSE > 1)
std::cerr << "inconsistent type for " << tagName << " in dictionary\n";
std::cerr << "inconsistent type for " << item << " in dictionary\n";
}
// vi->mMandatory = (iequals(mandatory, "yes"));
@@ -351,7 +358,7 @@ class dictionary_parser : public parser
}
size_t ix = linkIndex.at(key);
addLink(ix, piv->m_tag, civ->m_tag);
addLink(ix, piv->m_item_name, civ->m_item_name);
}
// Only process inline linked items if the linked group list is absent
@@ -379,7 +386,7 @@ class dictionary_parser : public parser
}
size_t ix = linkIndex.at(key);
addLink(ix, piv->m_tag, civ->m_tag);
addLink(ix, piv->m_item_name, civ->m_item_name);
}
}
@@ -410,7 +417,7 @@ class dictionary_parser : public parser
for (auto &iv : cv.m_item_validators)
{
if (iv.m_type == nullptr and cif::VERBOSE >= 0)
std::cerr << "Missing item_type for " << iv.m_tag << '\n';
std::cerr << "Missing item_type for " << iv.m_item_name << '\n';
}
}
}

View File

@@ -158,13 +158,6 @@ std::tuple<file::iterator, bool> file::emplace(std::string_view name)
if (iequals(name, i->name()))
{
is_new = false;
if (i != begin())
{
auto n = std::next(i);
splice(begin(), *this, i, n);
}
break;
}
@@ -173,11 +166,12 @@ std::tuple<file::iterator, bool> file::emplace(std::string_view name)
if (is_new)
{
auto &db = emplace_front(name);
db.set_validator(m_validator);
i = insert(end(), { name });
i->set_validator(m_validator);
}
return std::make_tuple(begin(), is_new);
assert(i != end());
return std::make_tuple(i, is_new);
}
void file::load(const std::filesystem::path &p)

View File

@@ -35,7 +35,7 @@ const item_handle item_handle::s_null_item;
row_handle s_null_row_handle;
item_handle::item_handle()
: m_column(std::numeric_limits<uint16_t>::max())
: m_item_ix(std::numeric_limits<uint16_t>::max())
, m_row_handle(s_null_row_handle)
{
}
@@ -44,7 +44,7 @@ std::string_view item_handle::text() const
{
if (not m_row_handle.empty())
{
auto iv = m_row_handle.m_row->get(m_column);
auto iv = m_row_handle.m_row->get(m_item_ix);
if (iv != nullptr)
return iv->text();
}
@@ -52,17 +52,17 @@ std::string_view item_handle::text() const
return {};
}
void item_handle::assign_value(const item &v)
void item_handle::assign_value(std::string_view value)
{
assert(not m_row_handle.empty());
m_row_handle.assign(m_column, v.value(), true);
m_row_handle.assign(m_item_ix, value, true);
}
void item_handle::swap(item_handle &b)
{
assert(m_column == b.m_column);
assert(m_item_ix == b.m_item_ix);
// assert(&m_row_handle.m_category == &b.m_row_handle.m_category);
m_row_handle.swap(m_column, b.m_row_handle);
m_row_handle.swap(m_item_ix, b.m_row_handle);
}
}

View File

@@ -163,9 +163,9 @@ int atom::atom_impl::get_charge() const
// const std::string atom::atom_impl::get_property(const std::string_view name) const
// {
// for (auto &&[tag, ref] : mCachedRefs)
// for (auto &&[item_name, ref] : mCachedRefs)
// {
// if (tag == name)
// if (item_name == name)
// return ref.as<std::string>();
// }
@@ -175,9 +175,9 @@ int atom::atom_impl::get_charge() const
// void atom::atom_impl::set_property(const std::string_view name, const std::string &value)
// {
// for (auto &&[tag, ref] : mCachedRefs)
// for (auto &&[item_name, ref] : mCachedRefs)
// {
// if (tag != name)
// if (item_name != name)
// continue;
// ref = value;
@@ -1783,7 +1783,7 @@ atom &structure::emplace_atom(atom &&atom)
std::string symbol = atom.get_property("type_symbol");
using namespace cif::literals;
if (not atom_type.exists("symbol"_key == symbol))
if (not atom_type.contains("symbol"_key == symbol))
atom_type.emplace({ { "symbol", symbol } });
return m_atoms.emplace_back(std::move(atom));
@@ -1969,7 +1969,7 @@ void structure::change_residue(residue &res, const std::string &newCompound,
// create rest
auto &chemComp = m_db["chem_comp"];
if (not chemComp.exists(key("id") == newCompound))
if (not chemComp.contains(key("id") == newCompound))
{
chemComp.emplace({{"id", newCompound},
{"name", compound->name()},
@@ -2006,7 +2006,10 @@ void structure::change_residue(residue &res, const std::string &newCompound,
continue;
if (a2.empty() or a2 == ".")
{
i->set_property("label_comp_id", newCompound);
remove_atom(*i);
}
else if (a1 != a2)
{
auto ra = r.front();
@@ -2699,7 +2702,7 @@ void structure::cleanup_empty_categories()
for (auto chemComp : chem_comp)
{
std::string compID = chemComp["id"].as<std::string>();
if (atomSite.exists("label_comp_id"_key == compID or "auth_comp_id"_key == compID))
if (atomSite.contains("label_comp_id"_key == compID or "auth_comp_id"_key == compID))
continue;
obsoleteChemComps.push_back(chemComp);
@@ -2716,7 +2719,7 @@ void structure::cleanup_empty_categories()
for (auto entity : entities)
{
std::string entityID = entity["id"].as<std::string>();
if (atomSite.exists("label_entity_id"_key == entityID))
if (atomSite.contains("label_entity_id"_key == entityID))
continue;
obsoleteEntities.push_back(entity);
@@ -2833,15 +2836,4 @@ void structure::validate_atoms() const
assert(atoms.empty());
}
// --------------------------------------------------------------------
void reconstruct_pdbx(datablock &db)
{
if (db.get("atom_site") == nullptr)
throw std::runtime_error("Cannot reconstruct PDBx file, atom data missing");
assert(false);
throw std::runtime_error("not implemented yet");
}
} // namespace pdbx

View File

@@ -269,7 +269,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
{
const auto kEOF = std::char_traits<char>::eof();
CIFToken result = CIFToken::Unknown;
CIFToken result = CIFToken::UNKNOWN;
int quoteChar = 0;
State state = State::Start;
m_bol = false;
@@ -279,7 +279,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
reserved_words_automaton dag;
while (result == CIFToken::Unknown)
while (result == CIFToken::UNKNOWN)
{
auto ch = get_next_char();
@@ -287,7 +287,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
{
case State::Start:
if (ch == kEOF)
result = CIFToken::Eof;
result = CIFToken::END_OF_FILE;
else if (ch == '\n')
{
m_bol = true;
@@ -298,9 +298,9 @@ sac_parser::CIFToken sac_parser::get_next_token()
else if (ch == '#')
state = State::Comment;
else if (ch == '_')
state = State::Tag;
state = State::ItemName;
else if (ch == ';' and m_bol)
state = State::TextField;
state = State::TextItem;
else if (ch == '?')
state = State::QuestionMark;
else if (ch == '\'' or ch == '"')
@@ -316,7 +316,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
case State::White:
if (ch == kEOF)
result = CIFToken::Eof;
result = CIFToken::END_OF_FILE;
else if (not is_space(ch))
{
state = State::Start;
@@ -335,7 +335,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
m_token_buffer.clear();
}
else if (ch == kEOF)
result = CIFToken::Eof;
result = CIFToken::END_OF_FILE;
else if (not is_any_print(ch))
error("invalid character in comment");
break;
@@ -344,29 +344,29 @@ sac_parser::CIFToken sac_parser::get_next_token()
if (not is_non_blank(ch))
{
retract();
result = CIFToken::Value;
result = CIFToken::VALUE;
}
else
state = State::Value;
break;
case State::TextField:
case State::TextItem:
if (ch == '\n')
state = State::TextFieldNL;
state = State::TextItemNL;
else if (ch == kEOF)
error("unterminated textfield");
else if (not is_any_print(ch) and cif::VERBOSE > 2)
warning("invalid character in text field '" + std::string({static_cast<char>(ch)}) + "' (" + std::to_string((int)ch) + ")");
break;
case State::TextFieldNL:
case State::TextItemNL:
if (is_text_lead(ch) or ch == ' ' or ch == '\t')
state = State::TextField;
state = State::TextItem;
else if (ch == ';')
{
assert(m_token_buffer.size() >= 2);
m_token_value = std::string_view(m_token_buffer.data() + 1, m_token_buffer.size() - 3);
result = CIFToken::Value;
result = CIFToken::VALUE;
}
else if (ch == kEOF)
error("unterminated textfield");
@@ -387,7 +387,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
if (is_white(ch))
{
retract();
result = CIFToken::Value;
result = CIFToken::VALUE;
if (m_token_buffer.size() < 2)
error("Invalid quoted string token");
@@ -403,11 +403,11 @@ sac_parser::CIFToken sac_parser::get_next_token()
error("invalid character in quoted string");
break;
case State::Tag:
case State::ItemName:
if (not is_non_blank(ch))
{
retract();
result = CIFToken::Tag;
result = CIFToken::ITEM_NAME;
m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.size());
}
break;
@@ -422,7 +422,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
if (not is_non_blank(ch))
{
retract();
result = CIFToken::Value;
result = CIFToken::VALUE;
m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.size());
}
else
@@ -467,7 +467,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
if (not is_non_blank(ch))
{
retract();
result = CIFToken::Value;
result = CIFToken::VALUE;
m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.size());
break;
}
@@ -483,7 +483,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
if (VERBOSE >= 5)
{
std::cerr << get_token_name(result);
if (result != CIFToken::Eof)
if (result != CIFToken::END_OF_FILE)
std::cerr << " " << std::quoted(m_token_value);
std::cerr << '\n';
}
@@ -710,7 +710,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock, const data
void sac_parser::parse_file()
{
while (m_lookahead != CIFToken::Eof)
while (m_lookahead != CIFToken::END_OF_FILE)
{
switch (m_lookahead)
{
@@ -735,10 +735,10 @@ void sac_parser::parse_file()
void sac_parser::parse_global()
{
match(CIFToken::GLOBAL);
while (m_lookahead == CIFToken::Tag)
while (m_lookahead == CIFToken::ITEM_NAME)
{
match(CIFToken::Tag);
match(CIFToken::Value);
match(CIFToken::ITEM_NAME);
match(CIFToken::VALUE);
}
}
@@ -747,7 +747,7 @@ void sac_parser::parse_datablock()
static const std::string kUnitializedCategory("<invalid>");
std::string cat = kUnitializedCategory; // intial value acts as a guard for empty category names
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag or m_lookahead == CIFToken::SAVE_NAME)
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::ITEM_NAME or m_lookahead == CIFToken::SAVE_NAME)
{
switch (m_lookahead)
{
@@ -757,12 +757,12 @@ void sac_parser::parse_datablock()
match(CIFToken::LOOP);
std::vector<std::string> tags;
std::vector<std::string> item_names;
while (m_lookahead == CIFToken::Tag)
while (m_lookahead == CIFToken::ITEM_NAME)
{
std::string catName, itemName;
std::tie(catName, itemName) = split_tag_name(m_token_value);
std::tie(catName, itemName) = split_item_name(m_token_value);
if (cat == kUnitializedCategory)
{
@@ -772,19 +772,19 @@ void sac_parser::parse_datablock()
else if (not iequals(cat, catName))
error("inconsistent categories in loop_");
tags.push_back(itemName);
item_names.push_back(itemName);
match(CIFToken::Tag);
match(CIFToken::ITEM_NAME);
}
while (m_lookahead == CIFToken::Value)
while (m_lookahead == CIFToken::VALUE)
{
produce_row();
for (auto tag : tags)
for (auto item_name : item_names)
{
produce_item(cat, tag, m_token_value);
match(CIFToken::Value);
produce_item(cat, item_name, m_token_value);
match(CIFToken::VALUE);
}
}
@@ -792,10 +792,10 @@ void sac_parser::parse_datablock()
break;
}
case CIFToken::Tag:
case CIFToken::ITEM_NAME:
{
std::string catName, itemName;
std::tie(catName, itemName) = split_tag_name(m_token_value);
std::tie(catName, itemName) = split_item_name(m_token_value);
if (not iequals(cat, catName))
{
@@ -804,11 +804,11 @@ void sac_parser::parse_datablock()
produce_row();
}
match(CIFToken::Tag);
match(CIFToken::ITEM_NAME);
produce_item(cat, itemName, m_token_value);
match(CIFToken::Value);
match(CIFToken::VALUE);
break;
}

File diff suppressed because it is too large Load Diff

View File

@@ -1493,8 +1493,8 @@ bool Remark3Parser::parse(const std::string &expMethod, PDBRecord *r, cif::datab
auto r1 = cat1.front();
auto r2 = cat2.front();
for (auto column : cat1.key_fields())
r2[column] = r1[column].text();
for (auto item : cat1.key_items())
r2[item] = r1[item].text();
}
}
else

1349
src/pdb/reconstruct.cpp Normal file

File diff suppressed because it is too large Load Diff

336
src/pdb/validate-pdbx.cpp Normal file
View File

@@ -0,0 +1,336 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cif++.hpp"
namespace cif::pdb
{
condition get_parents_condition(const validator &validator, row_handle rh, const category &parentCat)
{
condition result;
auto &childCat = rh.get_category();
auto childName = childCat.name();
auto parentName = parentCat.name();
auto links = validator.get_links_for_child(childName);
links.erase(remove_if(links.begin(), links.end(), [n = parentName](auto &l)
{ return l->m_parent_category != n; }),
links.end());
if (not links.empty())
{
for (auto &link : links)
{
condition cond;
for (size_t ix = 0; ix < link->m_child_keys.size(); ++ix)
{
auto childValue = rh[link->m_child_keys[ix]];
if (childValue.empty())
continue;
cond = std::move(cond) and key(link->m_parent_keys[ix]) == childValue.text();
}
result = std::move(result) or std::move(cond);
}
}
else if (cif::VERBOSE > 0)
std::cerr << "warning: no child to parent links were found for child " << childName << " and parent " << parentName << '\n';
return result;
}
bool is_valid_pdbx_file(const file &file, std::string_view dictionary)
{
std::error_code ec;
bool result = is_valid_pdbx_file(file, dictionary, ec);
return result and ec == std::errc();
}
bool is_valid_pdbx_file(const file &file, std::error_code &ec)
{
bool result = false;
if (file.empty())
ec = make_error_code(validation_error::empty_file);
else
{
std::string dictionary = "mmcif_pdbx";
for (auto &db : file)
{
auto audit_conform = db.get("audit_conform");
if (audit_conform == nullptr)
continue;
if (not audit_conform->empty())
{
auto specified_dict = audit_conform->front()["dict_name"];
if (not specified_dict.empty())
dictionary = specified_dict.as<std::string>();
}
break;
}
result = is_valid_pdbx_file(file, dictionary, ec);
}
return result;
}
bool is_valid_pdbx_file(const file &file, std::string_view dictionary, std::error_code &ec)
{
using namespace cif::literals;
bool result = true;
try
{
auto &cf = cif::compound_factory::instance();
auto &validator = cif::validator_factory::instance().operator[](dictionary);
if (file.empty())
throw std::runtime_error("Empty file");
auto &db = file.front();
if (db.empty())
throw std::runtime_error("Empty datablock");
auto &atom_site = db["atom_site"];
if (atom_site.empty())
throw std::runtime_error("Empty or missing atom_site category");
auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"];
std::string last_asym_id;
int last_seq_id = -1;
for (auto r : atom_site)
{
auto seq_id = r.get<std::optional<int>>("label_seq_id");
if (not seq_id.has_value()) // not a residue in a polymer
continue;
if (*seq_id == last_seq_id)
continue;
last_seq_id = *seq_id;
auto comp_id = r.get<std::string>("label_comp_id");
if (not cf.is_monomer(comp_id))
continue;
auto p = pdbx_poly_seq_scheme.find(get_parents_condition(validator, r, pdbx_poly_seq_scheme));
if (p.size() != 1)
{
if (cif::VERBOSE > 0)
std::clog << "In atom_site record: " << r["id"].text() << '\n';
throw std::runtime_error("For each monomer in atom_site there should be exactly one pdbx_poly_seq_scheme record");
}
}
auto &entity = db["entity"];
if (entity.empty())
throw std::runtime_error("Entity category is missing or empty");
auto &entity_poly = db["entity_poly"];
if (entity_poly.empty())
throw std::runtime_error("Entity_poly category is missing or empty");
auto &entity_poly_seq = db["entity_poly_seq"];
if (entity_poly_seq.empty())
throw std::runtime_error("Entity_poly_seq category is missing or empty");
auto &struct_asym = db["struct_asym"];
if (struct_asym.empty())
throw std::runtime_error("struct_asym category is missing or empty");
for (auto entity_id : entity.find<std::string>("type"_key == "polymer", "id"))
{
if (entity_poly.count("entity_id"_key == entity_id) != 1)
throw std::runtime_error("There should be exactly one entity_poly record per polymer entity");
const auto entity_poly_type = entity_poly.find1<std::string>("entity_id"_key == entity_id, "type");
std::map<int,std::set<std::string>> mon_per_seq_id;
for (const auto &[num, mon_id, hetero] : entity_poly_seq.find<int, std::string, bool>("entity_id"_key == entity_id, "num", "mon_id", "hetero"))
{
mon_per_seq_id[num].emplace(mon_id);
for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
{
if (pdbx_poly_seq_scheme.count(
"asym_id"_key == asym_id and
"mon_id"_key == mon_id and
"seq_id"_key == num and
"hetero"_key == hetero) != 1)
{
throw std::runtime_error("For each entity_poly_seq record there should be exactly one pdbx_poly_seq record");
}
}
}
for (const auto &[seq_id, mon_id, hetero] : pdbx_poly_seq_scheme.find<int, std::string, bool>("entity_id"_key == entity_id, "seq_id", "mon_id", "hetero"))
{
if (entity_poly_seq.count(
"mon_id"_key == mon_id and
"num"_key == seq_id and
"hetero"_key == hetero) != 1)
{
throw std::runtime_error("For each pdbx_poly_seq/struct_asym record there should be exactly one entity_poly_seq record");
}
if ((mon_per_seq_id[seq_id].size() > 1) != hetero)
throw std::runtime_error("Mismatch between the hetero flag in the poly seq schemes and the number residues per seq_id");
}
for (const auto &[seq_id, mon_ids] : mon_per_seq_id)
{
for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
{
condition cond;
for (auto mon_id : mon_ids)
cond = std::move(cond) or "label_comp_id"_key == mon_id;
cond = "label_entity_id"_key == entity_id and
"label_asym_id"_key == asym_id and
"label_seq_id"_key == seq_id and not std::move(cond);
if (atom_site.contains(std::move(cond)))
throw std::runtime_error("An atom_site record exists that has no parent in the poly seq scheme categories");
}
}
auto &&[seq, seq_can] = entity_poly.find1<std::optional<std::string>, std::optional<std::string>>("entity_id"_key == entity_id,
"pdbx_seq_one_letter_code", "pdbx_seq_one_letter_code_can");
std::string::const_iterator si, sci, se, sce;
auto seq_match = [&](bool can, std::string::const_iterator si, std::string::const_iterator se)
{
for (const auto &[seq_id, comp_ids] : mon_per_seq_id)
{
if (si == se)
return false;
bool match = false;
for (auto comp_id : comp_ids)
{
std::string letter;
if (can)
{
if (compound_factory::kBaseMap.contains(comp_id))
letter = compound_factory::kBaseMap.at(comp_id);
else
{
auto c = cf.create(comp_id);
if (c and c->one_letter_code())
letter = c->one_letter_code();
else
letter = "X";
}
}
else
{
if (compound_factory::kAAMap.contains(comp_id))
letter = compound_factory::kAAMap.at(comp_id);
else if (comp_id.length() == 1 and compound_factory::kBaseMap.contains(comp_id))
letter = compound_factory::kBaseMap.at(comp_id);
else
letter = '(' + comp_id + ')';
}
if (iequals(std::string{si, si + letter.length()}, letter))
{
match = true;
si += letter.length();
break;
}
else
return false;
}
if (not match)
break;
}
return si == se;
};
if (not seq.has_value())
{
if (cif::VERBOSE > 0)
std::clog << "Warning: entity_poly has no sequence for entity_id " << entity_id << '\n';
}
else
{
seq->erase(std::remove_if(seq->begin(), seq->end(), [](char ch) { return std::isspace(ch); }), seq->end());
if (not seq_match(false, seq->begin(), seq->end()))
throw std::runtime_error("Sequences do not match for entity " + entity_id);
}
if (not seq_can.has_value())
{
if (cif::VERBOSE > 0)
std::clog << "Warning: entity_poly has no sequence for entity_id " << entity_id << '\n';
}
else
{
seq_can->erase(std::remove_if(seq_can->begin(), seq_can->end(), [](char ch) { return std::isspace(ch); }), seq_can->end());
if (not seq_match(true, seq_can->begin(), seq_can->end()))
throw std::runtime_error("Canonical sequences do not match for entity " + entity_id);
}
}
result = true;
}
catch (const std::exception &ex)
{
result = false;
if (cif::VERBOSE > 0)
std::clog << ex.what() << '\n';
ec = make_error_code(validation_error::not_valid_pdbx);
}
if (not result and ec == std::errc())
ec = make_error_code(validation_error::not_valid_pdbx);
return result;
}
} // namespace cif::pdb

View File

@@ -29,44 +29,44 @@
namespace cif
{
void row_handle::assign(uint16_t column, std::string_view value, bool updateLinked, bool validate)
void row_handle::assign(uint16_t item, std::string_view value, bool updateLinked, bool validate)
{
if (not m_category)
throw std::runtime_error("uninitialized row");
m_category->update_value(m_row, column, value, updateLinked, validate);
m_category->update_value(m_row, item, value, updateLinked, validate);
}
uint16_t row_handle::get_column_ix(std::string_view name) const
uint16_t row_handle::get_item_ix(std::string_view name) const
{
if (not m_category)
throw std::runtime_error("uninitialized row");
return m_category->get_column_ix(name);
return m_category->get_item_ix(name);
}
std::string_view row_handle::get_column_name(uint16_t ix) const
std::string_view row_handle::get_item_name(uint16_t ix) const
{
if (not m_category)
throw std::runtime_error("uninitialized row");
return m_category->get_column_name(ix);
return m_category->get_item_name(ix);
}
uint16_t row_handle::add_column(std::string_view name)
uint16_t row_handle::add_item(std::string_view name)
{
if (not m_category)
throw std::runtime_error("uninitialized row");
return m_category->add_column(name);
return m_category->add_item(name);
}
void row_handle::swap(uint16_t column, row_handle &b)
void row_handle::swap(uint16_t item, row_handle &b)
{
if (not m_category)
throw std::runtime_error("uninitialized row");
m_category->swap_item(column, *this, b);
m_category->swap_item(item, *this, b);
}
// --------------------------------------------------------------------
@@ -86,7 +86,7 @@ row_initializer::row_initializer(row_handle rh)
auto &i = r->operator[](ix);
if (not i)
continue;
emplace_back(cat.get_column_name(ix), i.text());
emplace_back(cat.get_item_name(ix), i.text());
}
}

View File

@@ -35,24 +35,24 @@ namespace cif
// --------------------------------------------------------------------
// This really makes a difference, having our own tolower routines
const uint8_t kCharToLowerMap[256] =
{
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff};
const uint8_t kCharToLowerMap[256] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff
};
// --------------------------------------------------------------------
@@ -171,7 +171,7 @@ std::string trim_right_copy(std::string_view s)
e = pe;
}
return {s.begin(), e};
return { s.begin(), e };
}
std::string trim_left_copy(std::string_view s)
@@ -185,27 +185,46 @@ std::string trim_left_copy(std::string_view s)
b = std::next(b);
}
return {b, s.end()};
return { b, s.end() };
}
void trim_left(std::string &s)
{
auto b = s.begin();
while (b != s.end())
auto in = s.begin(), out = s.begin();
while (in != s.end() and std::isspace(*in))
++in;
if (in == s.end())
s.clear();
else if (in != out)
{
if (not std::isspace(*b))
break;
b = std::next(b);
while (in != s.end())
*out++ = *in++;
s.erase(out, s.end());
}
s.erase(s.begin(), b);
}
void trim(std::string &s)
{
trim_right(s);
trim_left(s);
auto in = s.begin(), out = s.begin(), end = s.end();
while (end != s.begin() and std::isspace(*(end - 1)))
--end;
while (in != end and std::isspace(*in))
++in;
if (in == end)
s.clear();
else if (in != out)
{
while (in != end)
*out++ = *in++;
s.erase(out, s.end());
}
else if (end != s.end())
s.erase(end, s.end());
}
std::string trim_copy(std::string_view s)
@@ -215,19 +234,19 @@ std::string trim_copy(std::string_view s)
// --------------------------------------------------------------------
std::tuple<std::string, std::string> split_tag_name(std::string_view tag)
std::tuple<std::string, std::string> split_item_name(std::string_view item_name)
{
if (tag.empty())
throw std::runtime_error("empty tag");
if (tag[0] != '_')
throw std::runtime_error("tag '" + std::string { tag } + "' does not start with underscore");
if (item_name.empty())
throw std::runtime_error("empty item_name");
if (item_name[0] != '_')
throw std::runtime_error("item_name '" + std::string{ item_name } + "' does not start with underscore");
auto s = tag.find('.');
auto s = item_name.find('.');
if (s == std::string::npos)
// throw std::runtime_error("tag does not contain dot (" + std::string{ tag } + ')');
return std::tuple<std::string, std::string>{ "", tag.substr(1) };
// throw std::runtime_error("item_name does not contain dot (" + std::string{ item_name } + ')');
return std::tuple<std::string, std::string>{ "", item_name.substr(1) };
else
return std::tuple<std::string, std::string>{tag.substr(1, s - 1), tag.substr(s + 1)};
return std::tuple<std::string, std::string>{ item_name.substr(1, s - 1), item_name.substr(s + 1) };
}
// --------------------------------------------------------------------
@@ -242,8 +261,7 @@ std::string cif_id_for_number(int number)
result += static_cast<char>('A' + r);
number = (number - r) / 26 - 1;
}
while (number >= 0);
} while (number >= 0);
std::reverse(result.begin(), result.end());
@@ -298,29 +316,29 @@ enum LineBreakClass
kLBC_Unknown
};
const LineBreakClass kASCII_LBTable[128] =
{
kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark,
kLBC_CombiningMark, kLBC_BreakAfter, kLBC_LineFeed, kLBC_MandatoryBreak, kLBC_MandatoryBreak, kLBC_CarriageReturn, kLBC_CombiningMark, kLBC_CombiningMark,
kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark,
kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark,
kLBC_Space, kLBC_Exlamation, kLBC_Quotation, kLBC_Alphabetic, kLBC_PrefixNumeric, kLBC_PostfixNumeric, kLBC_Alphabetic, kLBC_Quotation,
kLBC_OpenPunctuation, kLBC_CloseParenthesis, kLBC_Alphabetic, kLBC_PrefixNumeric,
const LineBreakClass kASCII_LBTable[128] = {
kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark,
kLBC_CombiningMark, kLBC_BreakAfter, kLBC_LineFeed, kLBC_MandatoryBreak, kLBC_MandatoryBreak, kLBC_CarriageReturn, kLBC_CombiningMark, kLBC_CombiningMark,
kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark,
kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark,
kLBC_Space, kLBC_Exlamation, kLBC_Quotation, kLBC_Alphabetic, kLBC_PrefixNumeric, kLBC_PostfixNumeric, kLBC_Alphabetic, kLBC_Quotation,
kLBC_OpenPunctuation, kLBC_CloseParenthesis, kLBC_Alphabetic, kLBC_PrefixNumeric,
// comma treated differently here, it is not a numeric separator in PDB
kLBC_SymbolAllowingBreakAfter /* kLBC_InfixNumericSeparator */,
// comma treated differently here, it is not a numeric separator in PDB
kLBC_SymbolAllowingBreakAfter /* kLBC_InfixNumericSeparator */,
kLBC_Hyphen, kLBC_InfixNumericSeparator, kLBC_SymbolAllowingBreakAfter,
kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric,
kLBC_Numeric, kLBC_Numeric, kLBC_InfixNumericSeparator, kLBC_InfixNumericSeparator, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Exlamation,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_OpenPunctuation, kLBC_PrefixNumeric, kLBC_CloseParenthesis, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_OpenPunctuation, kLBC_BreakAfter, kLBC_ClosePunctuation, kLBC_Alphabetic, kLBC_CombiningMark};
kLBC_Hyphen, kLBC_InfixNumericSeparator, kLBC_SymbolAllowingBreakAfter,
kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric,
kLBC_Numeric, kLBC_Numeric, kLBC_InfixNumericSeparator, kLBC_InfixNumericSeparator, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Exlamation,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_OpenPunctuation, kLBC_PrefixNumeric, kLBC_CloseParenthesis, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_OpenPunctuation, kLBC_BreakAfter, kLBC_ClosePunctuation, kLBC_Alphabetic, kLBC_CombiningMark
};
std::string::const_iterator nextLineBreak(std::string::const_iterator text, std::string::const_iterator end)
{
@@ -338,33 +356,33 @@ std::string::const_iterator nextLineBreak(std::string::const_iterator text, std:
const breakAction brkTable[27][27] = {
// OP CL CP QU GL NS EX SY IS PR PO NU AL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT
/* OP */ {PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, CPB, PBK, PBK, PBK, PBK, PBK, PBK},
/* CL */ {DBK, PBK, PBK, IBK, IBK, PBK, PBK, PBK, PBK, IBK, IBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* CP */ {DBK, PBK, PBK, IBK, IBK, PBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* QU */ {PBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
/* GL */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
/* NS */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* EX */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* SY */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* IS */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* PR */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, IBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
/* PO */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* NU */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* AL */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* ID */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* IN */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* HY */ {DBK, PBK, PBK, IBK, DBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* BA */ {DBK, PBK, PBK, IBK, DBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* BB */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
/* B2 */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, PBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* ZW */ {DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK},
/* CM */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* WJ */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
/* H2 */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, IBK, IBK},
/* H3 */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, IBK},
/* JL */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, DBK},
/* JV */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, IBK, IBK},
/* JT */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, IBK},
/* OP */ { PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, CPB, PBK, PBK, PBK, PBK, PBK, PBK },
/* CL */ { DBK, PBK, PBK, IBK, IBK, PBK, PBK, PBK, PBK, IBK, IBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK },
/* CP */ { DBK, PBK, PBK, IBK, IBK, PBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK },
/* QU */ { PBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK },
/* GL */ { IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK },
/* NS */ { DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK },
/* EX */ { DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK },
/* SY */ { DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK },
/* IS */ { DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK },
/* PR */ { IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, IBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK },
/* PO */ { IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK },
/* NU */ { DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK },
/* AL */ { DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK },
/* ID */ { DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK },
/* IN */ { DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK },
/* HY */ { DBK, PBK, PBK, IBK, DBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK },
/* BA */ { DBK, PBK, PBK, IBK, DBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK },
/* BB */ { IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK },
/* B2 */ { DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, PBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK },
/* ZW */ { DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK },
/* CM */ { DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK },
/* WJ */ { IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK },
/* H2 */ { DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, IBK, IBK },
/* H3 */ { DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, IBK },
/* JL */ { DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, DBK },
/* JV */ { DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, IBK, IBK },
/* JT */ { DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, IBK },
};
uint8_t ch = static_cast<uint8_t>(*text);
@@ -418,7 +436,7 @@ std::string::const_iterator nextLineBreak(std::string::const_iterator text, std:
std::vector<std::string> wrapLine(const std::string &text, size_t width)
{
std::vector<std::string> result;
std::vector<size_t> offsets = {0};
std::vector<size_t> offsets = { 0 };
auto b = text.begin();
while (b != text.end())

View File

@@ -39,16 +39,33 @@
// the code will use boost::regex instead.
#if USE_BOOST_REGEX
#include <boost/regex.hpp>
# include <boost/regex.hpp>
using boost::regex;
#else
#include <regex>
# include <regex>
using std::regex;
#endif
namespace cif
{
validation_exception::validation_exception(std::error_code ec)
: runtime_error(ec.message())
{
}
validation_exception::validation_exception(std::error_code ec, std::string_view category)
: runtime_error((ec.message() + "; category: ").append(category))
{
}
validation_exception::validation_exception(std::error_code ec, std::string_view category, std::string_view item)
: runtime_error((ec.message() + "; category: ").append(category).append("; item: ").append(item))
{
}
// --------------------------------------------------------------------
struct regex_impl : public regex
{
regex_impl(std::string_view rx)
@@ -57,21 +74,12 @@ struct regex_impl : public regex
}
};
validation_error::validation_error(const std::string &msg)
: m_msg(msg)
{
}
validation_error::validation_error(const std::string &cat, const std::string &item, const std::string &msg)
: m_msg("When validating _" + cat + '.' + item + ": " + msg)
{
}
// --------------------------------------------------------------------
DDL_PrimitiveType map_to_primitive_type(std::string_view s)
DDL_PrimitiveType map_to_primitive_type(std::string_view s, std::error_code &ec) noexcept
{
DDL_PrimitiveType result;
ec = {};
DDL_PrimitiveType result = DDL_PrimitiveType::Char;
if (iequals(s, "char"))
result = DDL_PrimitiveType::Char;
else if (iequals(s, "uchar"))
@@ -79,7 +87,16 @@ DDL_PrimitiveType map_to_primitive_type(std::string_view s)
else if (iequals(s, "numb"))
result = DDL_PrimitiveType::Numb;
else
throw validation_error("Not a known primitive type");
ec = make_error_code(validation_error::not_a_known_primitive_type);
return result;
}
DDL_PrimitiveType map_to_primitive_type(std::string_view s)
{
std::error_code ec;
auto result = map_to_primitive_type(s, ec);
if (ec)
throw std::system_error(ec, std::string{ s });
return result;
}
@@ -196,63 +213,72 @@ int type_validator::compare(std::string_view a, std::string_view b) const
// --------------------------------------------------------------------
// void ValidateItem::addLinked(ValidateItem* parent, const std::string& parentItem, const std::string& childItem)
//{
//// if (mParent != nullptr and VERBOSE)
//// cerr << "replacing parent in " << mCategory->m_name << " from " << mParent->mCategory->m_name << " to " << parent->mCategory->m_name << endl;
//// mParent = parent;
//
// if (m_type == nullptr and parent != nullptr)
// m_type = parent->m_type;
//
// if (parent != nullptr)
// {
// mLinked.push_back({parent, parentItem, childItem});
//
// parent->mChildren.insert(this);
////
//// if (mCategory->mKeys == std::vector<std::string>{mTag})
//// parent->mForeignKeys.insert(this);
// }
//}
void item_validator::operator()(std::string_view value) const
{
std::error_code ec;
if (not validate_value(value, ec))
throw std::system_error(ec, std::string{ value } + " does not match rx for " + m_item_name);
}
bool item_validator::validate_value(std::string_view value, std::error_code &ec) const noexcept
{
ec = {};
if (not value.empty() and value != "?" and value != ".")
{
if (m_type != nullptr and not regex_match(value.begin(), value.end(), *m_type->m_rx))
throw validation_error(m_category->m_name, m_tag, "Value '" + std::string{ value } + "' does not match type expression for type " + m_type->m_name);
if (not m_enums.empty())
{
if (m_enums.count(std::string{ value }) == 0)
throw validation_error(m_category->m_name, m_tag, "Value '" + std::string{ value } + "' is not in the list of allowed values");
}
ec = make_error_code(validation_error::value_does_not_match_rx);
else if (not m_enums.empty() and m_enums.count(std::string{ value }) == 0)
ec = make_error_code(validation_error::value_is_not_in_enumeration_list);
}
return ec == std::errc();
}
// --------------------------------------------------------------------
void category_validator::addItemValidator(item_validator &&v)
void category_validator::add_item_validator(item_validator &&v)
{
if (v.m_mandatory)
m_mandatory_fields.insert(v.m_tag);
m_mandatory_items.insert(v.m_item_name);
v.m_category = this;
auto r = m_item_validators.insert(std::move(v));
if (not r.second and VERBOSE >= 4)
std::cout << "Could not add validator for item " << v.m_tag << " to category " << m_name << '\n';
std::cout << "Could not add validator for item " << v.m_item_name << " to category " << m_name << '\n';
}
const item_validator *category_validator::get_validator_for_item(std::string_view tag) const
const item_validator *category_validator::get_validator_for_item(std::string_view item_name) const
{
const item_validator *result = nullptr;
auto i = m_item_validators.find(item_validator{ std::string(tag) });
auto i = m_item_validators.find(item_validator{ std::string(item_name) });
if (i != m_item_validators.end())
result = &*i;
else if (VERBOSE > 4)
std::cout << "No validator for tag " << tag << '\n';
std::cout << "No validator for item " << item_name << '\n';
return result;
}
const item_validator *category_validator::get_validator_for_aliased_item(std::string_view item_name) const
{
const item_validator *result = nullptr;
for (auto &iv : m_item_validators)
{
for (auto &ai : iv.m_aliases)
{
const auto &[cat, name] = split_item_name(ai.m_name);
if (iequals(name, item_name) and iequals(cat, m_name))
{
result = &iv;
break;
}
}
if (result)
break;
}
return result;
}
@@ -295,19 +321,19 @@ const category_validator *validator::get_validator_for_category(std::string_view
return result;
}
item_validator *validator::get_validator_for_item(std::string_view tag) const
item_validator *validator::get_validator_for_item(std::string_view item_name) const
{
item_validator *result = nullptr;
std::string cat, item;
std::tie(cat, item) = split_tag_name(tag);
std::tie(cat, item) = split_item_name(item_name);
auto *cv = get_validator_for_category(cat);
if (cv != nullptr)
result = const_cast<item_validator *>(cv->get_validator_for_item(item));
if (result == nullptr and VERBOSE > 4)
std::cout << "No validator for item " << tag << '\n';
std::cout << "No validator for item " << item_name << '\n';
return result;
}
@@ -332,11 +358,11 @@ void validator::add_link_validator(link_validator &&v)
auto piv = pcv->get_validator_for_item(v.m_parent_keys[i]);
if (piv == nullptr)
throw std::runtime_error("unknown parent tag _" + v.m_parent_category + '.' + v.m_parent_keys[i]);
throw std::runtime_error("unknown parent item _" + v.m_parent_category + '.' + v.m_parent_keys[i]);
auto civ = ccv->get_validator_for_item(v.m_child_keys[i]);
if (civ == nullptr)
throw std::runtime_error("unknown child tag _" + v.m_child_category + '.' + v.m_child_keys[i]);
throw std::runtime_error("unknown child item _" + v.m_child_category + '.' + v.m_child_keys[i]);
if (civ->m_type == nullptr and piv->m_type != nullptr)
const_cast<item_validator *>(civ)->m_type = piv->m_type;
@@ -351,7 +377,7 @@ std::vector<const link_validator *> validator::get_links_for_parent(std::string_
for (auto &l : m_link_validators)
{
if (l.m_parent_category == category)
if (iequals(l.m_parent_category, category))
result.push_back(&l);
}
@@ -364,19 +390,32 @@ std::vector<const link_validator *> validator::get_links_for_child(std::string_v
for (auto &l : m_link_validators)
{
if (l.m_child_category == category)
if (iequals(l.m_child_category, category))
result.push_back(&l);
}
return result;
}
void validator::report_error(const std::string &msg, bool fatal) const
void validator::report_error(std::error_code ec, bool fatal) const
{
if (m_strict or fatal)
throw validation_error(msg);
else if (VERBOSE > 0)
std::cerr << msg << '\n';
throw validation_exception(ec);
else
std::cerr << ec.message() << '\n';
}
void validator::report_error(std::error_code ec, std::string_view category,
std::string_view item, bool fatal) const
{
auto ex = item.empty() ?
validation_exception(ec, category) :
validation_exception(ec, category, item);
if (m_strict or fatal)
throw ex;
else
std::cerr << ex.what() << '\n';
}
// --------------------------------------------------------------------
@@ -438,12 +477,12 @@ const validator &validator_factory::operator[](std::string_view dictionary_name)
if (not std::filesystem::exists(p, ec) or ec)
{
for (const char *dir : {
#if defined(CACHE_DIR)
# if defined(CACHE_DIR)
CACHE_DIR,
#endif
#if defined(DATA_DIR)
# endif
# if defined(DATA_DIR)
DATA_DIR
#endif
# endif
})
{
auto p2 = std::filesystem::path(dir) / p;

Binary file not shown.

72
test/CMakeLists.txt Normal file
View File

@@ -0,0 +1,72 @@
# We're using the older version 2 of Catch2
find_package(Catch2 QUIET)
if(NOT Catch2_FOUND)
FetchContent_Declare(
Catch2
GIT_REPOSITORY https://github.com/catchorg/Catch2.git
GIT_TAG v2.13.9)
FetchContent_MakeAvailable(Catch2)
set(Catch2_VERSION "2.13.9")
endif()
list(
APPEND
CIFPP_tests
unit-v2
unit-3d
format
model
rename-compound
sugar
spinner
reconstruction
validate-pdbx)
add_library(test-main OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/test-main.cpp")
target_link_libraries(test-main cifpp::cifpp Catch2::Catch2)
if(${Catch2_VERSION} VERSION_GREATER_EQUAL 3.0.0)
target_compile_definitions(test-main PUBLIC CATCH22=0)
else()
target_compile_definitions(test-main PUBLIC CATCH22=1)
endif()
foreach(CIFPP_TEST IN LISTS CIFPP_tests)
set(CIFPP_TEST "${CIFPP_TEST}-test")
set(CIFPP_TEST_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/${CIFPP_TEST}.cpp")
add_executable(
${CIFPP_TEST} ${CIFPP_TEST_SOURCE} $<TARGET_OBJECTS:test-main>)
if(${Catch2_VERSION} VERSION_GREATER_EQUAL 3.0.0)
target_compile_definitions(${CIFPP_TEST} PUBLIC CATCH22=0)
else()
target_compile_definitions(${CIFPP_TEST} PUBLIC CATCH22=1)
endif()
target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp::cifpp
Catch2::Catch2)
target_include_directories(${CIFPP_TEST} PRIVATE "${EIGEN_INCLUDE_DIR}")
if(MSVC)
# Specify unwind semantics so that MSVC knowns how to handle exceptions
target_compile_options(${CIFPP_TEST} PRIVATE /EHsc)
endif()
add_custom_target(
"run-${CIFPP_TEST}"
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch ${CIFPP_TEST})
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch
COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir
${CMAKE_CURRENT_SOURCE_DIR})
add_test(NAME ${CIFPP_TEST} COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir
${CMAKE_CURRENT_SOURCE_DIR})
endforeach()

View File

@@ -24,7 +24,8 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <catch2/catch.hpp>
#include "test-main.hpp"
#include <stdexcept>

View File

@@ -26,8 +26,6 @@
#include "test-main.hpp"
#include <catch2/catch.hpp>
#include <stdexcept>
#include <cif++.hpp>

BIN
test/pdb1cbs.ent.gz Normal file

Binary file not shown.

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

10486
test/reconstruct/phenix.cif Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,62 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "test-main.hpp"
#include <cif++.hpp>
#include <iostream>
#include <fstream>
TEST_CASE("reconstruct")
{
cif::compound_factory::instance().push_dictionary(gTestDir / "REA.cif");
for (std::filesystem::directory_iterator i(gTestDir / "reconstruct"); i != std::filesystem::directory_iterator{}; ++i)
{
std::cout << i->path() << '\n';
if (i->path().extension() == ".pdb")
{
cif::file f = cif::pdb::read(i->path());
std::error_code ec;
if (not cif::pdb::is_valid_pdbx_file(f, ec))
CHECK(cif::pdb::reconstruct_pdbx(f));
}
else
{
cif::file f(i->path());
std::error_code ec;
CHECK_FALSE(cif::pdb::is_valid_pdbx_file(f, ec));
CHECK(ec != std::errc{});
CHECK(cif::pdb::reconstruct_pdbx(f));
}
}
}

View File

@@ -28,8 +28,6 @@
#include <cif++.hpp>
#include <catch2/catch.hpp>
#include <iostream>
#include <fstream>
@@ -37,8 +35,8 @@ TEST_CASE("rename")
{
cif::VERBOSE = 3;
if (std::filesystem::exists(gTestDir / ".." / "data" / "ccd-subset.cif"))
cif::add_file_resource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");
if (std::filesystem::exists(gTestDir / ".." / "rsrc" / "ccd-subset.cif"))
cif::add_file_resource("components.cif", gTestDir / ".." / "rsrc" / "ccd-subset.cif");
if (std::filesystem::exists(gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic"))
cif::add_file_resource("mmcif_pdbx.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic");

View File

@@ -1,6 +1,32 @@
#include "cif++/utilities.hpp"
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <catch2/catch.hpp>
#include "test-main.hpp"
#include "cif++/utilities.hpp"
#include <random>
#include <thread>

View File

@@ -26,8 +26,6 @@
#include "test-main.hpp"
#include <catch2/catch.hpp>
#include <stdexcept>
#include <cif++.hpp>

View File

@@ -1,10 +1,9 @@
#define CATCH_CONFIG_RUNNER 1
#include "test-main.hpp"
#include <cif++.hpp>
#define CATCH_CONFIG_RUNNER
#include <catch2/catch.hpp>
std::filesystem::path gTestDir = std::filesystem::current_path();
int main(int argc, char *argv[])
@@ -12,11 +11,18 @@ int main(int argc, char *argv[])
Catch::Session session; // There must be exactly one instance
// Build a new parser on top of Catch2's
#if CATCH22
using namespace Catch::clara;
auto cli = session.cli() // Get Catch2's command line parser
| Opt(gTestDir, "data-dir") // bind variable to a new option, with a hint string
["-D"]["--data-dir"] // the option names it will respond to
("The directory containing the data files"); // description string for the help output
#else
// Build a new parser on top of Catch2's
using namespace Catch::Clara;
#endif
auto cli = session.cli() // Get Catch2's command line parser
| Opt(gTestDir, "data-dir") // bind variable to a new option, with a hint string
["-D"]["--data-dir"] // the option names it will respond to
("The directory containing the data files") // description string for the help output
| Opt(cif::VERBOSE, "verbose")["-v"]["--cif-verbose"]("Flag for cif::VERBOSE");
// Now pass the new composite back to Catch2 so it uses that
session.cli(cli);
@@ -30,10 +36,9 @@ int main(int argc, char *argv[])
cif::add_file_resource("mmcif_pdbx.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic");
// initialize CCD location
cif::add_file_resource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");
cif::add_file_resource("components.cif", gTestDir / ".." / "rsrc" / "ccd-subset.cif");
cif::compound_factory::instance().push_dictionary(gTestDir / "HEM.cif");
return session.run();
}

View File

@@ -1,3 +1,37 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2024 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#if CATCH22
#include <catch2/catch.hpp>
#else
#include <catch2/catch_all.hpp>
#endif
#include <filesystem>
extern std::filesystem::path gTestDir;

View File

@@ -26,8 +26,6 @@
#include "test-main.hpp"
#include <catch2/catch.hpp>
#include <stdexcept>
#include <cif++.hpp>

View File

@@ -26,8 +26,6 @@
#include "test-main.hpp"
#include <catch2/catch.hpp>
#include <cif++.hpp>
#include "cif++/dictionary_parser.hpp"
@@ -568,7 +566,7 @@ _test.value
auto &test = db["test"];
REQUIRE(test.size() == 5);
REQUIRE(test.exists("value"_key == cif::null));
REQUIRE(test.contains("value"_key == cif::null));
REQUIRE(test.find("value"_key == cif::null).size() == 2);
}
@@ -746,27 +744,51 @@ _cat_2.desc
std::istream is_data(&data_buffer);
f.load(is_data);
auto &cat1 = f.front()["cat_1"];
auto &cat2 = f.front()["cat_2"];
SECTION("one")
{
auto &cat1 = f.front()["cat_1"];
auto &cat2 = f.front()["cat_2"];
REQUIRE(cat1.size() == 3);
REQUIRE(cat2.size() == 3);
REQUIRE(cat1.size() == 3);
REQUIRE(cat2.size() == 3);
cat1.erase(cif::key("id") == 1);
cat1.erase(cif::key("id") == 1);
REQUIRE(cat1.size() == 2);
REQUIRE(cat2.size() == 1);
REQUIRE(cat1.size() == 2);
REQUIRE(cat2.size() == 1);
// REQUIRE_THROWS_AS(cat2.emplace({
// { "id", 4 },
// { "parent_id", 4 },
// { "desc", "moet fout gaan" }
// }), std::exception);
// REQUIRE_THROWS_AS(cat2.emplace({
// { "id", 4 },
// { "parent_id", 4 },
// { "desc", "moet fout gaan" }
// }), std::exception);
REQUIRE_THROWS_AS(cat2.emplace({ { "id", "vijf" }, // <- invalid value
{ "parent_id", 2 },
{ "desc", "moet fout gaan" } }),
std::exception);
REQUIRE_THROWS_AS(cat2.emplace({ { "id", "vijf" }, // <- invalid value
{ "parent_id", 2 },
{ "desc", "moet fout gaan" } }),
std::exception);
}
// SECTION("two")
// {
// auto &cat1 = f.front()["cat_1"];
// auto &cat2 = f.front()["cat_2"];
// cat1.update_value(cif::all(), "id", [](std::string_view v) -> std::string
// {
// int vi;
// auto [ec, ptr] = std::from_chars(v.data(), v.data() + v.length(), vi);
// return std::to_string(vi + 1);
// });
// REQUIRE(cat1.find1<std::string>(cif::key("id") == 2, "name") == "Aap");
// REQUIRE(cat1.find1<std::string>(cif::key("id") == 3, "name") == "Noot");
// REQUIRE(cat1.find1<std::string>(cif::key("id") == 4, "name") == "Mies");
// REQUIRE(cat2.find1<int>(cif::key("id") == 1, "parent_id") == 2);
// REQUIRE(cat2.find1<int>(cif::key("id") == 2, "parent_id") == 2);
// REQUIRE(cat2.find1<int>(cif::key("id") == 3, "parent_id") == 3);
// }
}
// --------------------------------------------------------------------
@@ -3468,3 +3490,29 @@ TEST_CASE("compound_not_found_test_1")
auto cmp = cif::compound_factory::instance().create("&&&");
REQUIRE(cmp == nullptr);
}
// --------------------------------------------------------------------
// PDB2CIF tests
TEST_CASE("pdb2cif_formula_weight")
{
cif::compound_factory::instance().push_dictionary(gTestDir / "REA.cif");
cif::file a = cif::pdb::read(gTestDir / "pdb1cbs.ent.gz");
auto fw = a.front()["entity"].find1<float>(cif::key("id") == 1, "formula_weight");
CHECK(std::abs(fw - 15581.802f) < 0.1f);
fw = a.front()["entity"].find1<float>(cif::key("id") == 2, "formula_weight");
CHECK(fw == 300.435f);
fw = a.front()["entity"].find1<float>(cif::key("id") == 3, "formula_weight");
CHECK(fw == 18.015f);
}
// --------------------------------------------------------------------
TEST_CASE("update_values_with_provider")
{
}

284
test/validate-pdbx-test.cpp Normal file
View File

@@ -0,0 +1,284 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "test-main.hpp"
#include <cif++.hpp>
#include <stdexcept>
// --------------------------------------------------------------------
cif::file operator""_cf(const char *text, size_t length)
{
struct membuf : public std::streambuf
{
membuf(char *text, size_t length)
{
this->setg(text, text, text + length);
}
} buffer(const_cast<char *>(text), length);
std::istream is(&buffer);
return cif::file(is);
}
// --------------------------------------------------------------------
TEST_CASE("test-1")
{
auto f = R"(data_1CBS
#
_entry.id 1CBS
#
_entity.id 1
_entity.type polymer
#
_entity_poly.entity_id 1
_entity_poly.type 'polypeptide(L)'
_entity_poly.nstd_linkage no
_entity_poly.nstd_monomer no
_entity_poly.pdbx_seq_one_letter_code
;PNFSG
;
_entity_poly.pdbx_seq_one_letter_code_can
;PNFSG
;
_entity_poly.pdbx_strand_id A
_entity_poly.pdbx_target_identifier ?
#
loop_
_entity_poly_seq.entity_id
_entity_poly_seq.num
_entity_poly_seq.mon_id
_entity_poly_seq.hetero
1 1 PRO n
1 2 ASN n
1 3 PHE n
1 4 SER n
1 5 GLY n
#
loop_
_struct_asym.id
_struct_asym.pdbx_blank_PDB_chainid_flag
_struct_asym.pdbx_modified
_struct_asym.entity_id
_struct_asym.details
A N N 1 ?
#
loop_
_atom_type.symbol
C
N
O
S
#
loop_
_atom_site.group_PDB
_atom_site.id
_atom_site.type_symbol
_atom_site.label_atom_id
_atom_site.label_alt_id
_atom_site.label_comp_id
_atom_site.label_asym_id
_atom_site.label_entity_id
_atom_site.label_seq_id
_atom_site.pdbx_PDB_ins_code
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.B_iso_or_equiv
_atom_site.pdbx_formal_charge
_atom_site.auth_seq_id
_atom_site.auth_comp_id
_atom_site.auth_asym_id
_atom_site.auth_atom_id
_atom_site.pdbx_PDB_model_num
ATOM 2 C CA . PRO A 1 1 ? 18.150 13.525 43.680 1.00 28.82 ? 1 PRO A CA 1
ATOM 9 C CA . ASN A 1 2 ? 20.576 16.457 43.578 1.00 20.79 ? 2 ASN A CA 1
ATOM 17 C CA . PHE A 1 3 ? 21.144 17.838 40.087 1.00 12.62 ? 3 PHE A CA 1
ATOM 28 C CA . SER A 1 4 ? 23.170 20.780 41.464 1.00 11.30 ? 4 SER A CA 1
ATOM 34 C CA . GLY A 1 5 ? 26.628 21.486 40.103 1.00 10.86 ? 5 GLY A CA 1
#
loop_
_pdbx_poly_seq_scheme.asym_id
_pdbx_poly_seq_scheme.entity_id
_pdbx_poly_seq_scheme.seq_id
_pdbx_poly_seq_scheme.mon_id
_pdbx_poly_seq_scheme.ndb_seq_num
_pdbx_poly_seq_scheme.pdb_seq_num
_pdbx_poly_seq_scheme.auth_seq_num
_pdbx_poly_seq_scheme.pdb_mon_id
_pdbx_poly_seq_scheme.auth_mon_id
_pdbx_poly_seq_scheme.pdb_strand_id
_pdbx_poly_seq_scheme.pdb_ins_code
_pdbx_poly_seq_scheme.hetero
A 1 1 PRO 1 1 1 PRO PRO A . n
A 1 2 ASN 2 2 2 ASN ASN A . n
A 1 3 PHE 3 3 3 PHE PHE A . n
A 1 4 SER 4 4 4 SER SER A . n
A 1 5 GLY 5 5 5 GLY GLY A . n
#
)"_cf;
SECTION("Plain file")
{
REQUIRE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Delete one atom_site")
{
auto &db = f.front();
auto n = db["atom_site"].erase(cif::key("id") == 2);
REQUIRE(n == 1);
REQUIRE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Delete a pdbx_poly_seq_scheme record")
{
auto &db = f.front();
auto n = db["pdbx_poly_seq_scheme"].erase(cif::key("seq_id") == 2);
REQUIRE(n == 1);
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Delete an entity_poly_seq record")
{
auto &db = f.front();
auto n = db["entity_poly_seq"].erase(cif::key("num") == 2);
REQUIRE(n == 1);
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Delete an entity_poly record")
{
auto &db = f.front();
auto n = db["entity_poly"].erase(cif::key("entity_id") == 1);
REQUIRE(n == 1);
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Mutate an atom_site record")
{
auto &db = f.front();
auto r = db["atom_site"].find1(cif::key("id") == 9);
r.assign({
{ "label_comp_id", "ALA" },
{ "auth_comp_id", "ALA" }
});
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Hetero consistency")
{
auto &db = f.front();
db["entity_poly_seq"].emplace({ //
{ "entity_id", 1 },
{ "num", 1 },
{ "mon_id", "ALA" },
{ "hetero", "n" }
});
db["pdbx_poly_seq_scheme"].emplace({ //
{ "asym_id", "A" },
{ "entity_id", "1" },
{ "seq_id", "1" },
{ "mon_id", "ALA" },
{ "ndb_seq_num", "1" },
{ "pdb_seq_num", "1" },
{ "auth_seq_num", "1" },
{ "pdb_mon_id", "ALA" },
{ "auth_mon_id", "ALA" },
{ "pdb_strand_id", "A" },
{ "pdb_ins_code", "." },
{ "hetero", "n" }
});
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Missing hetero for record in atom_site")
{
auto &db = f.front();
auto r1 = db["atom_site"].front();
cif::row_initializer cr(r1);
cr.set_value("id", "3");
cr.set_value("label_comp_id", "ALA");
db["atom_site"].emplace(std::move(cr));
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Missing letter in entity_poly.pdbx_seq_one_letter_code")
{
auto &db = f.front();
auto &entity_poly = db["entity_poly"];
entity_poly.front().assign({
{ "pdbx_seq_one_letter_code", "PNSG" }
});
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Too many letters in entity_poly.pdbx_seq_one_letter_code")
{
auto &db = f.front();
auto &entity_poly = db["entity_poly"];
entity_poly.front().assign({
{ "pdbx_seq_one_letter_code", "PNFSGX" }
});
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
SECTION("Mismatch in entity_poly.pdbx_seq_one_letter_code")
{
auto &db = f.front();
auto &entity_poly = db["entity_poly"];
entity_poly.front().assign({
{ "pdbx_seq_one_letter_code", "PNASG" }
});
REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
}
}

View File

@@ -10,7 +10,7 @@ euid=${EUID:-$(id -u)}
if [ "${euid}" -ne 0 ]; then
echo "Please run as root"
exit
exit 1
fi
if [ -f "@CIFPP_ETC_DIR@/libcifpp.conf" ]; then
@@ -19,12 +19,13 @@ fi
# check to see if we're supposed to run at all
if [ "$update" != "true" ]; then
exit
exit 0
fi
# if cache directory doesn't exist, exit.
if ! [ -d "@CIFPP_CACHE_DIR@" ]; then
exit
echo "Cache directory '@CIFPP_CACHE_DIR@' does not exist"
exit 1
fi
# Create a temp file in the right directory and
@@ -60,12 +61,16 @@ update_dictionary() {
# Update the dictionaries
update_dictionary "@CIFPP_CACHE_DIR@/components.cif" "https://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz"
update_dictionary "@CIFPP_CACHE_DIR@/components.cif" "https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz"
update_dictionary "@CIFPP_CACHE_DIR@/mmcif_pdbx.dic" "https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic.gz"
update_dictionary "@CIFPP_CACHE_DIR@/mmcif_ma.dic" "https://github.com/ihmwg/ModelCIF/raw/master/dist/mmcif_ma.dic"
# notify subscribers, will fail on FreeBSD
# notify subscribers, using find instead of run-parts to make it work on FreeBSD as well
if [ -d "@CIFPP_ETC_DIR@/libcifpp/cache-update.d" ] && [ -x /bin/run-parts ]; then
run-parts --arg "@CIFPP_CACHE_DIR@" -- "@CIFPP_ETC_DIR@/libcifpp/cache-update.d"
if [ -d "@CIFPP_ETC_DIR@/libcifpp/cache-update.d" ]; then
find "@CIFPP_ETC_DIR@/libcifpp/cache-update.d" \
-exec test -x {} \; -and -not -exec test -d {} \; \
-exec {} "@CIFPP_CACHE_DIR@" \;
fi
exit 0