Compare commits

..

163 Commits

Author SHA1 Message Date
Maarten L. Hekkelman
56aab89176 Refactored validator_factory interface 2026-01-05 08:30:30 +01:00
Maarten L. Hekkelman
e3e7648c5c change test 2025-12-30 15:36:07 +01:00
Maarten L. Hekkelman
83ee1984d8 null remains a problem, conceptually...
Added drop_empty_items
2025-12-30 15:28:51 +01:00
Maarten L. Hekkelman
f9741a27cd Keep track of modification 2025-12-30 10:39:42 +01:00
Maarten L. Hekkelman
0b002afb9f Add exec methods to connection 2025-12-30 10:27:15 +01:00
Maarten L. Hekkelman
a58e5a1bfc Rename function 2025-12-29 17:59:49 +01:00
Maarten L. Hekkelman
1229652444 Remove recursion 2025-12-29 17:19:03 +01:00
Maarten L. Hekkelman
05197a85c6 Fixed transactions by patching sqlite3 2025-12-29 17:11:17 +01:00
Maarten L. Hekkelman
71cd4958bd check complete sql 2025-12-29 15:52:02 +01:00
Maarten L. Hekkelman
61965c2391 More output options 2025-12-29 14:41:35 +01:00
Maarten L. Hekkelman
0f60f42f9e dirty marking, mcql work, load_dictionary 2025-12-29 12:45:56 +01:00
Maarten L. Hekkelman
2e61b330c4 Merge branch 'with-sqlite' of https://github.com/PDB-REDO/libcifpp into with-sqlite 2025-12-27 16:46:11 +01:00
Maarten L. Hekkelman
964e7620eb format output 2025-12-27 16:46:04 +01:00
Maarten L. Hekkelman
8a329e7c2d foreign key test 2025-12-24 14:07:14 +01:00
Maarten L. Hekkelman
8485747377 Merge branch 'trunk' into with-sqlite 2025-12-22 11:25:56 +01:00
Maarten L. Hekkelman
34af410d5e Use FetchContent to find sqlite3, if needed 2025-12-22 11:25:00 +01:00
UENO, M.
8f5b9eb631 Use find_package for FastFloat prior to FetchContent_Declare (#73)
* Use `find_package` for FastFloat prior to `FetchContent_Declare`

* Convert space to tab
2025-12-22 07:26:41 +01:00
Maarten L. Hekkelman
388eae211e Merge branch 'trunk' into with-sqlite 2025-12-20 08:53:23 +01:00
Maarten L. Hekkelman
73f18a4da2 PCRE2 is not thread safe, the way it is used in libcifpp type validator 2025-12-20 08:38:59 +01:00
Maarten L. Hekkelman
8e2494532e order and supress empty output 2025-12-19 15:47:43 +01:00
Maarten L. Hekkelman
2f7f62bdce Implemented drop table 2025-12-18 14:58:22 +01:00
Maarten L. Hekkelman
bf9551a994 Better error reporting in cql 2025-12-18 14:36:44 +01:00
Maarten L. Hekkelman
30ff5bea36 Better validation message 2025-12-18 14:36:27 +01:00
Maarten L. Hekkelman
b6568664ea Execute multiple statements and more 2025-12-17 16:02:53 +01:00
Maarten L. Hekkelman
863f010a7c Renaming cql files 2025-12-17 13:45:00 +01:00
Maarten L. Hekkelman
da76bbae7c With transactions 2025-12-17 12:29:31 +01:00
Maarten L. Hekkelman
e2454a2e79 Insert, Delete and Update 2025-12-17 11:05:15 +01:00
Maarten L. Hekkelman
9503a7e9b4 Transaction stream working 2025-12-16 19:33:31 +01:00
Maarten L. Hekkelman
20784bdaf5 Working around NULL problem, for now 2025-12-16 16:14:10 +01:00
Maarten L. Hekkelman
9558085105 Working, but NULL is a problem 2025-12-16 15:40:50 +01:00
Maarten L. Hekkelman
429c31ae42 First working version 2025-12-16 12:56:29 +01:00
Maarten L. Hekkelman
58ac1ce033 backup 2025-12-15 18:43:02 +01:00
Maarten L. Hekkelman
23e575858c we can create tables now 2025-12-15 16:39:39 +01:00
Maarten L. Hekkelman
4a31878975 first steps with sqlite 2025-12-09 16:47:11 +01:00
Maarten L. Hekkelman
9f6e1e245b start value expression 2025-12-09 13:42:46 +01:00
Maarten L. Hekkelman
8a8ca9599d several simple select statements added 2025-12-09 11:28:39 +01:00
Maarten L. Hekkelman
00b0473438 backup 2025-12-03 16:22:44 +01:00
Maarten L. Hekkelman
7a9d94bc57 private linking to fast_float 2025-11-27 15:34:20 +01:00
Maarten L. Hekkelman
a3ba760ab5 Merge branch 'develop' into trunk 2025-11-27 15:29:46 +01:00
Maarten L. Hekkelman
510e336306 exclude from all for fast_float 2025-11-27 09:06:22 +01:00
Maarten L. Hekkelman
f15a76e29b exclude from all for fast_float 2025-11-27 09:05:51 +01:00
Maarten L. Hekkelman
915a147449 backup 2025-11-26 16:35:30 +01:00
Maarten L. Hekkelman
edf24ca9ff work 2025-11-26 13:46:55 +01:00
Maarten L. Hekkelman
ffff2479d2 revert version-string code 2025-11-26 13:17:14 +01:00
Maarten L. Hekkelman
46a9318aa5 revert version string generator 2025-11-26 13:11:54 +01:00
Maarten L. Hekkelman
4a7f48eed8 some initial work 2025-11-19 16:36:44 +01:00
Maarten L. Hekkelman
42e66afd92 Merge branch 'develop' into cql 2025-11-19 13:29:35 +01:00
Maarten L. Hekkelman
b550e9b027 re-enable tests 2025-11-19 13:28:47 +01:00
Maarten L. Hekkelman
452bb83ce7 Remove revision.hpp file when making clean 2025-11-19 11:39:27 +01:00
Maarten L. Hekkelman
6eda9aaf36 better center_and_radius for residue 2025-11-18 16:44:53 +01:00
Maarten L. Hekkelman
251fb55d6a fixing smallest sphere 2025-11-05 13:18:58 +01:00
Maarten L. Hekkelman
f94e9aece9 create_non_poly, another 2025-11-05 11:03:56 +01:00
Maarten L. Hekkelman
c565bb96be Do not run the spinner test 2025-10-30 09:19:50 +01:00
Maarten L. Hekkelman
e51f31dc4c Remove libfmt, fix instantiating templates for fast_float usage 2025-10-30 09:08:44 +01:00
Maarten L. Hekkelman
4e128885d6 Added missing include 2025-10-29 18:21:47 +01:00
Maarten L. Hekkelman
b37054228d Added smalles sphere function 2025-10-29 17:09:13 +01:00
Maarten L. Hekkelman
815b33fee0 Matrix determinant for 4x4 2025-10-28 15:55:05 +01:00
Maarten L. Hekkelman
97f55c1639 Version bump 2025-10-22 10:05:55 +02:00
Maarten L. Hekkelman
89de73eb6f Added exists to compound_factory 2025-10-21 13:06:09 +02:00
Maarten L. Hekkelman
75f2ec3792 Remove warning 2025-10-13 14:22:57 +02:00
Maarten L. Hekkelman
f4d29e8da9 re-enable test to see if fast_float is required 2025-10-01 17:09:06 +02:00
Maarten L. Hekkelman
b97b2638b8 More supported float types 2025-10-01 17:08:33 +02:00
Maarten L. Hekkelman
ea8dea8cbd Merge branch 'develop' into cql 2025-10-01 16:46:27 +02:00
Maarten L. Hekkelman
bc0222dc0e attempt two 2025-10-01 16:42:05 +02:00
Maarten L. Hekkelman
10a6b5649b Using fast float instead of home baked version 2025-10-01 16:14:07 +02:00
Maarten L. Hekkelman
ff2a233156 stap 1, een test 2025-10-01 15:48:16 +02:00
Maarten L. Hekkelman
743e2800f8 update changelog 2025-09-30 11:21:03 +02:00
Maarten L. Hekkelman
32ac884127 Do not stop on empty audit_conform fields 2025-09-29 10:34:29 +02:00
Maarten L. Hekkelman
bec69f7d07 Fix reconstruction when entity ID's are missing 2025-09-29 09:59:04 +02:00
Maarten L. Hekkelman
a99215ad6a version bump 2025-09-24 16:45:05 +02:00
Maarten L. Hekkelman
e3d2cbd044 Lower required catch2 version 2025-09-24 16:42:45 +02:00
Maarten L. Hekkelman
5fc965789d messages updated 2025-09-24 15:11:33 +02:00
Maarten L. Hekkelman
b4596902aa Add compile features for Catch2, required on Windows 2025-09-24 14:15:19 +02:00
Maarten L. Hekkelman
cbf8b52f62 Update catch2 usage 2025-09-24 13:25:56 +02:00
Maarten L. Hekkelman
4e0fa1c916 No complete jthread on macOS/CLang 2025-09-24 13:00:50 +02:00
Maarten L. Hekkelman
95b007d38f Merge branch 'trunk' into develop 2025-09-24 11:38:01 +02:00
Maarten L. Hekkelman
b66f7a30ce Progress bar using WriteConsole on Windows 2025-09-24 11:36:37 +02:00
Maarten L. Hekkelman
ec7287c503 remove warning 2025-09-24 11:32:12 +02:00
Maarten L. Hekkelman
a41c591f0c Restore order of imports, avoid reordering by clang-format 2025-09-24 10:51:53 +02:00
Maarten L. Hekkelman
3a6527cdd5 yet another update on progress bar 2025-09-24 10:23:58 +02:00
Maarten L. Hekkelman
5f21a094c0 added flush to progress bar 2025-09-24 10:14:03 +02:00
Maarten L. Hekkelman
2203a1855d improved progress bar 2025-09-24 09:49:28 +02:00
Maarten L. Hekkelman
7edd2ecc18 new progress bar 2025-09-23 15:53:55 +02:00
Maarten L. Hekkelman
1d2953c850 Fix reconstruction, version bump 2025-09-22 13:51:18 +02:00
Maarten L. Hekkelman
dbf59ce622 reconstruct better when entity ID's are missing 2025-09-22 12:59:16 +02:00
Maarten L. Hekkelman
1596db8499 Merge branch 'develop' of github.com:pdb-redo/libcifpp into develop 2025-09-16 13:29:57 +02:00
Maarten L. Hekkelman
bd1fb5c5cd Added model::create_link 2025-09-16 13:29:51 +02:00
Maarten L. Hekkelman
da500025c3 swap atoms should swap type_symbol as well 2025-09-10 17:16:22 +02:00
Maarten L. Hekkelman
60eeea9a93 more resilient loading of dictionary data 2025-09-10 15:06:06 +02:00
Maarten L. Hekkelman
1220f01f1e change location of mmcif_ma.dic 2025-09-10 14:22:52 +02:00
Maarten L. Hekkelman
ad0a34fe98 Update changelog 2025-09-10 12:49:43 +02:00
Maarten L. Hekkelman
a7425ff1a0 Optimise validation code 2025-09-10 12:40:01 +02:00
Maarten L. Hekkelman
1ce25f86ae better check anisotrop atoms 2025-09-10 12:19:56 +02:00
Maarten L. Hekkelman
cd93f72b96 Merge branch 'develop' into better-create-entity-ids 2025-09-10 09:28:50 +02:00
Maarten L. Hekkelman
23500bd303 Fix reconstruction of really bare files 2025-09-10 09:25:49 +02:00
Maarten L. Hekkelman
14b4753b4f test for null 2025-09-09 19:55:13 +02:00
Maarten L. Hekkelman
4c37d5db5f use rowhandles 2025-09-09 19:52:39 +02:00
Maarten L. Hekkelman
fc2c4b4172 fix map::at in reconstruct sequences 2025-09-09 10:51:31 +02:00
Maarten L. Hekkelman
3ac64de16b Version bump, update changelog 2025-09-03 14:55:20 +02:00
Maarten L. Hekkelman
45eecd72b0 using pkg-config, when available 2025-09-03 14:24:29 +02:00
Maarten L. Hekkelman
d1dd558cda as object lib 2025-09-03 14:00:22 +02:00
Maarten L. Hekkelman
d19e2c2196 Update pcre2s 2025-09-03 13:11:45 +02:00
Maarten L. Hekkelman
72c7aca074 revert to catch2 version 2, due to linker errors on Windows? 2025-09-03 13:04:13 +02:00
Maarten L. Hekkelman
683a1087d0 Install catch2 before testing 2025-09-03 12:28:06 +02:00
Maarten L. Hekkelman
35bc139deb Update pcre2s 2025-09-03 11:39:00 +02:00
Maarten L. Hekkelman
45ece2fa0d pcre2 is no longer a depends on Windows 2025-09-03 11:18:04 +02:00
Maarten L. Hekkelman
11c98f553f Update Catch2 to version 3
Updated pcre2s
2025-09-03 11:15:31 +02:00
Maarten L. Hekkelman
28aa9b1036 Using pcre2s 2025-09-03 11:07:02 +02:00
Maarten L. Hekkelman
d7b5c0a748 remove message 2025-09-02 15:21:04 +02:00
Maarten L. Hekkelman
065e7f5f18 added clang format file 2025-09-02 14:59:28 +02:00
Maarten L. Hekkelman
4b1623cfdc pcre2 again 2025-09-02 14:57:07 +02:00
Maarten L. Hekkelman
1de973ddcb Fix findpcre2 cmake file 2025-09-02 13:12:20 +02:00
Maarten L. Hekkelman
eecc801203 last remaining warning 2025-09-02 12:57:08 +02:00
Maarten L. Hekkelman
5c50154ea4 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2025-09-02 12:54:03 +02:00
Maarten L. Hekkelman
0fa3d6aa94 Removing warning using MSVC 2025-09-02 12:54:07 +02:00
Maarten L. Hekkelman
01f5242bfb Revert "Add formatting file"
This reverts commit af6d8d4f71.
2025-09-02 11:57:21 +02:00
Maarten L. Hekkelman
af6d8d4f71 Add formatting file 2025-09-02 11:47:50 +02:00
Maarten L. Hekkelman
fa8285fc0f use std::format anyway, even if __cpp_lib_format is not defined. 2025-09-02 10:23:00 +02:00
Maarten L. Hekkelman
2e7f6b8337 cross platform check for lib format 2025-09-02 10:03:36 +02:00
Maarten L. Hekkelman
a6a55020eb Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2025-09-01 10:54:09 +02:00
Maarten L. Hekkelman
0e84ea454d fmt fix 2025-09-01 10:53:29 +02:00
Maarten L. Hekkelman
f3bf211d45 pdb formatting 2025-09-01 09:34:07 +02:00
Maarten L. Hekkelman
f5ef44836c pdb formatting 2025-09-01 09:31:22 +02:00
Maarten L. Hekkelman
070124b6e1 fix cif2pdb 2025-08-29 14:00:27 +02:00
Maarten L. Hekkelman
c8a46fcdd9 Stop when element is missing in reading PDB input 2025-08-29 13:17:17 +02:00
Maarten L. Hekkelman
5306b59fd8 Do not write zero seq ID's in PDB files 2025-08-29 10:08:56 +02:00
Maarten L. Hekkelman
90c5df832a Check only first datablock 2025-08-29 09:49:27 +02:00
Maarten L. Hekkelman
2aa439d51f test for fmt 2025-08-28 08:48:20 +02:00
Maarten L. Hekkelman
ac2b68517c conditional fmt 2025-08-27 16:08:51 +02:00
Maarten L. Hekkelman
e56b568c42 use cif::format... sigh 2025-08-27 15:46:41 +02:00
Maarten L. Hekkelman
63c49b2e04 Fix writing pdb files 2025-08-27 15:20:20 +02:00
Maarten L. Hekkelman
559fd18a20 Fix std::format usage 2025-08-27 08:57:40 +02:00
Maarten L. Hekkelman
beb7585261 fix std::format usage 2025-08-27 08:24:07 +02:00
Maarten L. Hekkelman
8b0f92aa9a Merge remote-tracking branch 'github/using-fmt' into develop 2025-08-27 07:43:22 +02:00
Maarten L. Hekkelman
0d8beeae5b No longer needed 2025-08-26 16:04:31 +02:00
Maarten L. Hekkelman
e3da654e67 Speed up build when eigen3 is not available 2025-08-26 15:54:55 +02:00
Maarten L. Hekkelman
dc9e151d89 remove warning 2025-08-26 15:37:22 +02:00
Maarten L. Hekkelman
7cfaf051ba should now work on windows 2025-08-26 15:16:01 +02:00
Maarten L. Hekkelman
7920491309 hope to find pcre2.h 2025-08-26 14:07:47 +02:00
Maarten L. Hekkelman
0ee493a3fb implib? 2025-08-26 13:55:25 +02:00
Maarten L. Hekkelman
7e23bc0c0b finding pcre2 on windows 2025-08-26 13:46:03 +02:00
Maarten L. Hekkelman
579f859562 no find_packge for pcre2 2025-08-26 13:42:14 +02:00
Maarten L. Hekkelman
752938ca44 using find_package? 2025-08-26 13:38:37 +02:00
Maarten L. Hekkelman
fce58c02fe no pcre2grep please, no tests either 2025-08-26 13:34:43 +02:00
Maarten L. Hekkelman
924f7c1505 finding pcre2, yet another attempt 2025-08-26 13:31:12 +02:00
Maarten L. Hekkelman
8944906fd2 fix warning, pcre2 2025-08-26 12:41:08 +02:00
Maarten L. Hekkelman
cb02969604 Using std::format 2025-08-25 16:31:00 +02:00
Maarten L. Hekkelman
31090c6ec5 attempt 2 2025-08-25 11:25:10 +02:00
Maarten L. Hekkelman
9e30d2bc1a finding pcre2 2025-08-25 10:39:48 +02:00
Maarten L. Hekkelman
93d703f7a1 Do not buld pcre tests 2025-08-20 20:40:08 +02:00
Maarten L. Hekkelman
3c241048a5 do not install pcre 2025-08-20 17:02:40 +02:00
Maarten L. Hekkelman
2788536799 this should work 2025-08-20 16:58:52 +02:00
Maarten L. Hekkelman
314d435a18 Another way of importing pcre 2025-08-20 16:49:12 +02:00
Maarten L. Hekkelman
37edcd8666 Finding and optionally building pcre 2025-08-20 15:47:49 +02:00
Maarten L. Hekkelman
10e290fbdf pcre2 in github actions? 2025-08-20 14:40:50 +02:00
Maarten L. Hekkelman
58cda1241e cleanup 2025-08-20 13:41:45 +02:00
Maarten L. Hekkelman
3659aaabff remove unneeded allocations 2025-08-20 13:35:15 +02:00
Maarten L. Hekkelman
727a39cc54 Finishing up replacing boost with pcre 2025-08-20 13:28:24 +02:00
Maarten L. Hekkelman
fd9ccdfff9 Using pcre instead of boost::regex 2025-08-19 16:16:43 +02:00
Maarten L. Hekkelman
aabee270b3 update .gitignore 2025-08-19 14:28:51 +02:00
Maarten L. Hekkelman
647c58f8ec allow code to be built with older compilers... 2025-08-19 12:44:41 +02:00
Maarten L. Hekkelman
0b8024d19c Optimise query processing 2025-08-19 12:24:33 +02:00
Maarten L. Hekkelman
d59b0bf27f Remove wrong warnings 2025-08-13 11:30:53 +02:00
68 changed files with 288105 additions and 2368 deletions

22
.clang-format Normal file
View File

@@ -0,0 +1,22 @@
BasedOnStyle: LLVM
UseTab: AlignWithSpaces
IndentWidth: 4
TabWidth: 4
BreakBeforeBraces: Allman
ColumnLimit: 0
NamespaceIndentation: Inner
FixNamespaceComments: true
AccessModifierOffset: -2
AllowShortCaseLabelsOnASingleLine: true
IndentCaseLabels: true
BreakConstructorInitializers: BeforeComma
BraceWrapping:
BeforeLambdaBody: false
AlignAfterOpenBracket: DontAlign
Cpp11BracedListStyle: false
IncludeBlocks: Regroup
LambdaBodyIndentation: Signature
AllowShortLambdasOnASingleLine: Inline
EmptyLineBeforeAccessModifier: LogicalBlock
IndentPPDirectives: AfterHash
PPIndentWidth: 1

View File

@@ -33,13 +33,18 @@ jobs:
- name: Install dependencies Ubuntu
if: matrix.os == 'ubuntu-latest'
run: sudo apt-get update && sudo apt-get install mrc
run: sudo apt-get update && sudo apt-get install mrc catch2
- name: Install dependencies Window
if: matrix.os == 'windows-latest'
run: ./tools/depends.cmd
shell: cmd
- name: Install Catch2 macOS
if: matrix.os == 'macos-latest'
run: >
brew install catch2
- name: Configure CMake
run: >
cmake -B ${{ steps.strings.outputs.build-output-dir }}

3
.gitignore vendored
View File

@@ -13,3 +13,6 @@ docs/api
docs/conf.py
build_ci/
data/components.cif
perf.data*
.cache/

View File

@@ -24,47 +24,41 @@
cmake_minimum_required(VERSION 3.23)
cmake_policy(SET CMP0135 NEW)
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
endif()
# set the project name
project(
libcifpp
VERSION 9.0.0
LANGUAGES CXX)
libcifpp
VERSION 10.0.0
LANGUAGES CXX C)
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
include(FindAtomic)
include(CheckFunctionExists)
include(CheckIncludeFiles)
include(CheckLibraryExists)
include(CMakePackageConfigHelpers)
include(CheckCXXSourceCompiles)
include(GenerateExportHeader)
include(CTest)
include(FetchContent)
include(ExternalProject)
# FindBoost, take care of it now.
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.30)
cmake_policy(SET CMP0167 NEW)
endif()
include(FetchContent)
include(VersionString)
# When building with ninja-multiconfig, build both debug and release by default
if(CMAKE_GENERATOR STREQUAL "Ninja Multi-Config")
set(CMAKE_CROSS_CONFIGS "Debug;Release")
set(CMAKE_DEFAULT_CONFIGS "Debug;Release")
set(CMAKE_CROSS_CONFIGS "Debug;Release")
set(CMAKE_DEFAULT_CONFIGS "Debug;Release")
endif()
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers"
)
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers"
)
elseif(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
endif()
# Build documentation?
@@ -73,165 +67,147 @@ set(BUILD_DOCUMENTATION OFF CACHE BOOL "Build the documentation")
# Optionally build a version to be installed inside CCP4
set(BUILD_FOR_CCP4 OFF CACHE BOOL "Build a version to be installed in CCP4")
# Optionally use libcurl to fetch compound files
set(USE_CURL_FOR_CCD ON CACHE BOOL "Use curl to fetch missing CCD files")
# Create the cql/sqlite interface
set(BUILD_SQLITE_INTERFACE ON CACHE BOOL "Build the sqlite interface")
# Building shared libraries?
if(NOT(BUILD_FOR_CCP4 AND WIN32))
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build a shared library instead of a static one")
if(NOT (BUILD_FOR_CCP4 AND WIN32))
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build a shared library instead of a static one")
endif()
if(PROJECT_IS_TOP_LEVEL AND NOT BUILD_FOR_CCP4)
# Lots of code depend on the availability of the components.cif file
set(CIFPP_DOWNLOAD_CCD ON CACHE BOOL "Download the CCD file components.cif during installation")
# Lots of code depend on the availability of the components.cif file
set(CIFPP_DOWNLOAD_CCD ON CACHE BOOL "Download the CCD file components.cif during installation")
# An optional cron script can be installed to keep the data files up-to-date
if(UNIX AND NOT APPLE)
set(CIFPP_INSTALL_UPDATE_SCRIPT ON CACHE BOOL "Install the script to update CCD and dictionary files")
endif()
# An optional cron script can be installed to keep the data files up-to-date
if(UNIX AND NOT APPLE)
set(CIFPP_INSTALL_UPDATE_SCRIPT ON CACHE BOOL "Install the script to update CCD and dictionary files")
endif()
else()
unset(CIFPP_DOWNLOAD_CCD)
unset(CIFPP_INSTALL_UPDATE_SCRIPT)
unset(CIFPP_DOWNLOAD_CCD)
unset(CIFPP_INSTALL_UPDATE_SCRIPT)
endif()
# When CCP4 is sourced in the environment, we can recreate the symmetry
# operations table
if(EXISTS "$ENV{CCP4}/lib/data/syminfo.lib")
set(CIFPP_RECREATE_SYMOP_DATA ON CACHE BOOL "Recreate SymOp data table in case it is out of date")
set(CIFPP_RECREATE_SYMOP_DATA ON CACHE BOOL "Recreate SymOp data table in case it is out of date")
endif()
# CCP4 build
if(BUILD_FOR_CCP4)
if("$ENV{CCP4}" STREQUAL "" OR NOT EXISTS $ENV{CCP4})
message(FATAL_ERROR "cifpp: A CCP4 built was requested but CCP4 was not sourced")
else()
list(PREPEND CMAKE_MODULE_PATH "$ENV{CCP4}")
list(PREPEND CMAKE_PREFIX_PATH "$ENV{CCP4}")
set(CMAKE_INSTALL_PREFIX "$ENV{CCP4}")
if("$ENV{CCP4}" STREQUAL "" OR NOT EXISTS $ENV{CCP4})
message(FATAL_ERROR "cifpp: A CCP4 built was requested but CCP4 was not sourced")
else()
list(PREPEND CMAKE_MODULE_PATH "$ENV{CCP4}")
list(PREPEND CMAKE_PREFIX_PATH "$ENV{CCP4}")
set(CMAKE_INSTALL_PREFIX "$ENV{CCP4}")
if(WIN32)
set(BUILD_SHARED_LIBS ON)
endif()
endif()
if(WIN32)
set(BUILD_SHARED_LIBS ON)
endif()
endif()
endif()
# Now include the GNUInstallDirs module
include(GNUInstallDirs)
if(WIN32)
if(${CMAKE_SYSTEM_VERSION} GREATER_EQUAL 10) # Windows 10
add_definitions(-D _WIN32_WINNT=0x0A00)
elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.3) # Windows 8.1
add_definitions(-D _WIN32_WINNT=0x0603)
elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.2) # Windows 8
add_definitions(-D _WIN32_WINNT=0x0602)
elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.1) # Windows 7
add_definitions(-D _WIN32_WINNT=0x0601)
elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.0) # Windows Vista
add_definitions(-D _WIN32_WINNT=0x0600)
else() # Windows XP (5.1)
add_definitions(-D _WIN32_WINNT=0x0501)
endif()
if(${CMAKE_SYSTEM_VERSION} GREATER_EQUAL 10) # Windows 10
add_definitions(-D _WIN32_WINNT=0x0A00)
elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.3) # Windows 8.1
add_definitions(-D _WIN32_WINNT=0x0603)
elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.2) # Windows 8
add_definitions(-D _WIN32_WINNT=0x0602)
elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.1) # Windows 7
add_definitions(-D _WIN32_WINNT=0x0601)
elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.0) # Windows Vista
add_definitions(-D _WIN32_WINNT=0x0600)
else() # Windows XP (5.1)
add_definitions(-D _WIN32_WINNT=0x0501)
endif()
# We do not want to write an export file for all our symbols...
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
# We do not want to write an export file for all our symbols...
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif()
if(MSVC)
# make msvc standards compliant...
add_compile_options(/permissive- /bigobj)
add_link_options(/NODEFAULTLIB:library)
# make msvc standards compliant...
add_compile_options(/permissive- /bigobj)
add_link_options(/NODEFAULTLIB:library)
# This is dubious...
if(BUILD_SHARED_LIBS)
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>DLL")
else()
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
endif()
# This is dubious...
if(BUILD_SHARED_LIBS)
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>DLL")
else()
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
endif()
endif()
# Libraries
# Start by finding out if std:regex is usable. Note that the current
# implementation in GCC is not acceptable, it crashes on long lines. The
# implementation in libc++ (clang) and MSVC seem to be OK.
check_cxx_source_compiles(
"
#include <iostream>
#ifndef __GLIBCXX__
#error
#endif
int main(int argc, char *argv[]) { return 0; }"
GXX_LIBSTDCPP)
if(GXX_LIBSTDCPP)
message(
STATUS "cifpp: Testing for known regex bug, since you're using GNU libstdc++")
try_run(STD_REGEX_RUNNING STD_REGEX_COMPILING
${CMAKE_CURRENT_BINARY_DIR}/test
${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-rx.cpp)
if(STD_REGEX_RUNNING STREQUAL FAILED_TO_RUN)
message(
STATUS
"cifpp: You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will use boost::regex instead"
)
find_package(Boost 1.80 QUIET COMPONENTS regex)
if(NOT Boost_FOUND)
set(BOOST_REGEX_STANDALONE ON)
FetchContent_Declare(
boost-rx
GIT_REPOSITORY https://github.com/boostorg/regex
GIT_TAG boost-1.83.0)
FetchContent_MakeAvailable(boost-rx)
endif()
set(BOOST_REGEX ON)
endif()
endif()
if(MSVC)
# Avoid linking the shared library of zlib Search ZLIB_ROOT first if it is
# set.
if(ZLIB_ROOT)
set(_ZLIB_SEARCH_ROOT PATHS ${ZLIB_ROOT} NO_DEFAULT_PATH)
list(APPEND _ZLIB_SEARCHES _ZLIB_SEARCH_ROOT)
endif()
# Avoid linking the shared library of zlib. Search ZLIB_ROOT first if it is
# set.
if(ZLIB_ROOT)
set(_ZLIB_SEARCH_ROOT PATHS ${ZLIB_ROOT} NO_DEFAULT_PATH)
list(APPEND _ZLIB_SEARCHES _ZLIB_SEARCH_ROOT)
endif()
# Normal search.
set(_ZLIB_x86 "(x86)")
set(_ZLIB_SEARCH_NORMAL
PATHS "[HKEY_LOCAL_MACHINE\\SOFTWARE\\GnuWin32\\Zlib;InstallPath]"
"$ENV{ProgramFiles}/zlib" "$ENV{ProgramFiles${_ZLIB_x86}}/zlib")
unset(_ZLIB_x86)
list(APPEND _ZLIB_SEARCHES _ZLIB_SEARCH_NORMAL)
# Normal search.
set(_ZLIB_x86 "(x86)")
set(_ZLIB_SEARCH_NORMAL
PATHS "[HKEY_LOCAL_MACHINE\\SOFTWARE\\GnuWin32\\Zlib;InstallPath]"
"$ENV{ProgramFiles}/zlib" "$ENV{ProgramFiles${_ZLIB_x86}}/zlib")
unset(_ZLIB_x86)
list(APPEND _ZLIB_SEARCHES _ZLIB_SEARCH_NORMAL)
if(BUILD_FOR_CCP4)
list(PREPEND _ZLIB_SEARCHES "$ENV{CCP4}/lib")
endif()
if(BUILD_FOR_CCP4)
list(PREPEND _ZLIB_SEARCHES "$ENV{CCP4}/lib")
endif()
foreach(search ${_ZLIB_SEARCHES})
find_library(
ZLIB_LIBRARY
NAMES zlibstatic NAMES_PER_DIR ${${search}}
PATH_SUFFIXES lib)
endforeach()
foreach(search ${_ZLIB_SEARCHES})
find_library(
ZLIB_LIBRARY
NAMES zlibstatic NAMES_PER_DIR ${${search}}
PATH_SUFFIXES lib)
endforeach()
endif()
if(USE_CURL_FOR_CCD)
find_package(CURL REQUIRED)
# Using fast_float for float parsing, but only if needed
try_compile(STD_CHARCONV_COMPILING
SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-charconv.cpp
CXX_STANDARD 20
CXX_STANDARD_REQUIRED ON)
if(NOT STD_CHARCONV_COMPILING)
message(NOTICE "libcifpp: Using fast_float for std::from_chars")
find_package(FastFloat 8.0 QUIET CONFIG)
if(NOT FastFloat_FOUND)
message(STATUS "FastFloat not found in system, fetching from GitHub")
FetchContent_Declare(fastfloat
GIT_REPOSITORY "https://github.com/fastfloat/fast_float"
GIT_TAG v8.0.2
EXCLUDE_FROM_ALL)
FetchContent_MakeAvailable(fastfloat)
endif()
endif()
find_package(ZLIB QUIET)
find_package(Threads)
find_package(ZLIB QUIET)
if(NOT ZLIB_FOUND)
message(FATAL_ERROR "cifpp: The zlib development files were not found you this system, please install them and try again (hint: on debian/ubuntu use apt-get install zlib1g-dev)")
message(FATAL_ERROR "cifpp: The zlib development files were not found you this system, please install them and try again (hint: on debian/ubuntu use apt-get install zlib1g-dev)")
endif()
include(FindPkgConfig)
if(PKG_CONFIG_FOUND)
pkg_check_modules(PCRE2 IMPORTED_TARGET libpcre2-8)
endif()
if(NOT PCRE2_FOUND)
add_subdirectory(pcre2-simple)
endif()
# Using Eigen3 is a bit of a thing. We don't want to build it completely since
@@ -240,360 +216,362 @@ endif()
find_package(Eigen3 3.4 QUIET)
if(Eigen3_FOUND AND TARGET Eigen3::Eigen)
get_target_property(EIGEN_INCLUDE_DIR Eigen3::Eigen
INTERFACE_INCLUDE_DIRECTORIES)
get_target_property(EIGEN_INCLUDE_DIR Eigen3::Eigen
INTERFACE_INCLUDE_DIRECTORIES)
else()
# Use ExternalProject since FetchContent always tries to install the result...
ExternalProject_Add(my-eigen3
GIT_REPOSITORY https://gitlab.com/libeigen/eigen.git
GIT_TAG 3.4.0
INSTALL_COMMAND "")
# Use ExternalProject since FetchContent always tries to install the result...
ExternalProject_Add(my-eigen3
URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.zip
DOWNLOAD_EXTRACT_TIMESTAMP TRUE
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND "")
ExternalProject_Get_Property(my-eigen3 SOURCE_DIR)
set(EIGEN_INCLUDE_DIR ${SOURCE_DIR})
ExternalProject_Get_Property(my-eigen3 SOURCE_DIR)
set(EIGEN_INCLUDE_DIR ${SOURCE_DIR})
endif()
message(STATUS "cifpp: Eigen include dir is ${EIGEN_INCLUDE_DIR}")
# Create a revision file, containing the current git version info
include(VersionString)
write_version_header(${CMAKE_CURRENT_SOURCE_DIR}/src/ LIB_NAME "LibCIFPP")
# SymOp data table
if(CIFPP_RECREATE_SYMOP_DATA)
# The tool to create the table
add_executable(symop-map-generator
"${CMAKE_CURRENT_SOURCE_DIR}/src/symop-map-generator.cpp")
# The tool to create the table
add_executable(symop-map-generator
"${CMAKE_CURRENT_SOURCE_DIR}/src/symop-map-generator.cpp")
target_compile_features(symop-map-generator PUBLIC cxx_std_20)
target_compile_features(symop-map-generator PUBLIC cxx_std_20)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/symop_table_data.hpp
COMMAND
$<TARGET_FILE:symop-map-generator> $ENV{CLIBD}/syminfo.lib
$ENV{CLIBD}/symop.lib ${CMAKE_CURRENT_SOURCE_DIR}/src/symop_table_data.hpp)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/symop_table_data.hpp
COMMAND
$<TARGET_FILE:symop-map-generator> $ENV{CLIBD}/syminfo.lib
$ENV{CLIBD}/symop.lib ${CMAKE_CURRENT_SOURCE_DIR}/src/symop_table_data.hpp)
add_custom_target(
OUTPUT
${CMAKE_CURRENT_SOURCE_DIR}/src/symop_table_data.hpp
DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib"
"$ENV{CLIBD}/symop.lib")
add_custom_target(
OUTPUT
${CMAKE_CURRENT_SOURCE_DIR}/src/symop_table_data.hpp
DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib"
"$ENV{CLIBD}/symop.lib")
endif()
# Create a revision file, containing the current git version info
write_version_header("${CMAKE_CURRENT_SOURCE_DIR}/src/" LIB_NAME "LibCIFPP")
# Sources
set(project_sources
${CMAKE_CURRENT_SOURCE_DIR}/src/category.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/condition.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/datablock.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/dictionary_parser.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/file.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/item.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/parser.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/row.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/validate.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/text.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/utilities.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/atom_type.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/compound.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/point.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/symmetry.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/model.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/cif2pdb.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/pdb2cif.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/pdb_record.hpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.hpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/reconstruct.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/validate-pdbx.cpp
list(APPEND project_sources
${CMAKE_CURRENT_SOURCE_DIR}/src/category.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/condition.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/datablock.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/dictionary_parser.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/file.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/item.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/parser.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/row.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/validate.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/text.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/utilities.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/atom_type.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/compound.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/point.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/symmetry.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/model.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/cql.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/cif2pdb.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/pdb2cif.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/pdb_record.hpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.hpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/reconstruct.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/validate-pdbx.cpp
)
set(project_headers
include/cif++.hpp
include/cif++/atom_type.hpp
include/cif++/category.hpp
include/cif++/compound.hpp
include/cif++/condition.hpp
include/cif++/datablock.hpp
include/cif++/dictionary_parser.hpp
include/cif++/exports.hpp
include/cif++/file.hpp
include/cif++/format.hpp
include/cif++/forward_decl.hpp
include/cif++/gzio.hpp
include/cif++/item.hpp
include/cif++/iterator.hpp
include/cif++/matrix.hpp
include/cif++/model.hpp
include/cif++/parser.hpp
include/cif++/pdb/cif2pdb.hpp
include/cif++/pdb.hpp
include/cif++/pdb/io.hpp
include/cif++/pdb/pdb2cif.hpp
include/cif++/pdb/tls.hpp
include/cif++/point.hpp
include/cif++/row.hpp
include/cif++/symmetry.hpp
include/cif++/text.hpp
include/cif++/utilities.hpp
include/cif++/validate.hpp
if(BUILD_SQLITE_INTERFACE)
list(APPEND project_sources
${CMAKE_CURRENT_SOURCE_DIR}/src/sqlite3/sqlite3.c)
endif()
list(APPEND project_headers
include/cif++.hpp
include/cif++/atom_type.hpp
include/cif++/category.hpp
include/cif++/compound.hpp
include/cif++/condition.hpp
include/cif++/datablock.hpp
include/cif++/dictionary_parser.hpp
include/cif++/exports.hpp
include/cif++/file.hpp
include/cif++/format.hpp
include/cif++/forward_decl.hpp
include/cif++/gzio.hpp
include/cif++/item.hpp
include/cif++/iterator.hpp
include/cif++/matrix.hpp
include/cif++/model.hpp
include/cif++/parser.hpp
include/cif++/pdb/cif2pdb.hpp
include/cif++/pdb.hpp
include/cif++/pdb/io.hpp
include/cif++/pdb/pdb2cif.hpp
include/cif++/pdb/tls.hpp
include/cif++/point.hpp
include/cif++/row.hpp
include/cif++/symmetry.hpp
include/cif++/text.hpp
include/cif++/cql.hpp
include/cif++/utilities.hpp
include/cif++/validate.hpp
)
add_library(cifpp)
add_library(cifpp::cifpp ALIAS cifpp)
if(TARGET my-eigen3)
add_dependencies(cifpp my-eigen3)
add_dependencies(cifpp my-eigen3)
endif()
target_sources(cifpp
PRIVATE ${project_sources}
${CMAKE_CURRENT_SOURCE_DIR}/src/symop_table_data.hpp
PUBLIC
FILE_SET cifpp_headers TYPE HEADERS
BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include
FILES ${project_headers}
PRIVATE ${project_sources}
${CMAKE_CURRENT_SOURCE_DIR}/src/symop_table_data.hpp
PUBLIC
FILE_SET cifpp_headers TYPE HEADERS
BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include
FILES ${project_headers}
)
# The code now really requires C++20
target_compile_features(cifpp PUBLIC cxx_std_20)
set(CMAKE_DEBUG_POSTFIX d)
set_target_properties(cifpp PROPERTIES DEBUG_POSTFIX "d")
generate_export_header(cifpp EXPORT_FILE_NAME
${CMAKE_CURRENT_SOURCE_DIR}/include/cif++/exports.hpp)
if(BOOST_REGEX)
target_compile_definitions(cifpp PRIVATE USE_BOOST_REGEX=1
BOOST_REGEX_STANDALONE=1)
get_target_property(BOOST_REGEX_INCLUDE_DIR Boost::regex
INTERFACE_INCLUDE_DIRECTORIES)
endif()
${CMAKE_CURRENT_SOURCE_DIR}/include/cif++/exports.hpp)
if(MSVC)
target_compile_definitions(cifpp PUBLIC NOMINMAX=1)
target_compile_definitions(cifpp PUBLIC NOMINMAX=1)
endif()
set_target_properties(cifpp PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(
cifpp
PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
PRIVATE "${BOOST_REGEX_INCLUDE_DIR}" "${EIGEN_INCLUDE_DIR}")
cifpp
PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
PRIVATE "${EIGEN_INCLUDE_DIR}")
target_link_libraries(cifpp
PUBLIC Threads::Threads ZLIB::ZLIB $<$<TARGET_EXISTS:std::atomic>:std::atomic>
$<IF:$<BOOL:${USE_CURL_FOR_CCD}>,CURL::libcurl,>)
PUBLIC Threads::Threads ZLIB::ZLIB $<$<TARGET_EXISTS:std::atomic>:std::atomic>)
if (USE_CURL_FOR_CCD)
target_compile_definitions(cifpp PUBLIC HAVE_CURL)
if(PCRE2_FOUND)
target_include_directories(cifpp PRIVATE ${PCRE2_INCLUDE_DIRS})
target_link_libraries(cifpp PRIVATE ${PCRE2_LINK_LIBRARIES})
else()
target_link_libraries(cifpp PRIVATE $<BUILD_INTERFACE:pcre2s>)
endif()
if(NOT STD_CHARCONV_COMPILING)
target_link_libraries(cifpp PRIVATE FastFloat::fast_float)
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
endif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
if(CIFPP_DOWNLOAD_CCD)
# download the components.cif file from CCD
set(COMPONENTS_CIF ${CMAKE_CURRENT_SOURCE_DIR}/rsrc/components.cif)
# download the components.cif file from CCD
set(COMPONENTS_CIF ${CMAKE_CURRENT_SOURCE_DIR}/rsrc/components.cif)
if(EXISTS ${COMPONENTS_CIF})
file(SIZE ${COMPONENTS_CIF} CCD_FILE_SIZE)
if(EXISTS ${COMPONENTS_CIF})
file(SIZE ${COMPONENTS_CIF} CCD_FILE_SIZE)
if(CCD_FILE_SIZE EQUAL 0)
message(STATUS "cifpp: Removing empty ${COMPONENTS_CIF} file")
file(REMOVE "${COMPONENTS_CIF}")
endif()
endif()
if(CCD_FILE_SIZE EQUAL 0)
message(STATUS "cifpp: Removing empty ${COMPONENTS_CIF} file")
file(REMOVE "${COMPONENTS_CIF}")
endif()
endif()
if(NOT EXISTS ${COMPONENTS_CIF})
# Since the file(DOWNLOAD) command in cmake does not use compression, we try
# to download the gzipped version and decompress it ourselves.
find_program(GUNZIP gunzip)
if(NOT EXISTS ${COMPONENTS_CIF})
# Since the file(DOWNLOAD) command in cmake does not use compression, we try
# to download the gzipped version and decompress it ourselves.
find_program(GUNZIP gunzip)
if(WIN32 OR GUNZIP STREQUAL "GUNZIP-NOTFOUND")
file(
DOWNLOAD https://files.wwpdb.org/pub/pdb/data/monomers/components.cif
${COMPONENTS_CIF}
SHOW_PROGRESS
STATUS CCD_FETCH_STATUS)
else()
if(NOT EXISTS "${COMPONENTS_CIF}.gz")
file(
DOWNLOAD
https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz
${COMPONENTS_CIF}.gz
SHOW_PROGRESS
STATUS CCD_FETCH_STATUS)
endif()
if(WIN32 OR GUNZIP STREQUAL "GUNZIP-NOTFOUND")
file(
DOWNLOAD https://files.wwpdb.org/pub/pdb/data/monomers/components.cif
${COMPONENTS_CIF}
SHOW_PROGRESS
STATUS CCD_FETCH_STATUS)
else()
if(NOT EXISTS "${COMPONENTS_CIF}.gz")
file(
DOWNLOAD
https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz
${COMPONENTS_CIF}.gz
SHOW_PROGRESS
STATUS CCD_FETCH_STATUS)
endif()
add_custom_command(
OUTPUT ${COMPONENTS_CIF}
COMMAND "${GUNZIP}" ${COMPONENTS_CIF}.gz
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/rsrc/)
add_custom_command(
OUTPUT ${COMPONENTS_CIF}
COMMAND "${GUNZIP}" ${COMPONENTS_CIF}.gz
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/rsrc/)
add_custom_target(COMPONENTS ALL DEPENDS ${COMPONENTS_CIF})
endif()
add_custom_target(COMPONENTS ALL DEPENDS ${COMPONENTS_CIF})
endif()
# Do not continue if downloading went wrong
list(POP_FRONT CCD_FETCH_STATUS CCD_FETCH_STATUS_CODE)
# Do not continue if downloading went wrong
list(POP_FRONT CCD_FETCH_STATUS CCD_FETCH_STATUS_CODE)
if(CCD_FETCH_STATUS_CODE)
message(
FATAL_ERROR "cifpp: Error trying to download CCD file: ${CCD_FETCH_STATUS}")
endif()
endif()
if(CCD_FETCH_STATUS_CODE)
message(
FATAL_ERROR "cifpp: Error trying to download CCD file: ${CCD_FETCH_STATUS}")
endif()
endif()
endif()
# Installation directories
if(BUILD_FOR_CCP4)
set(CIFPP_DATA_DIR
"$ENV{CCP4}/share/libcifpp"
CACHE PATH "Directory where dictionary and other static data is stored")
set(CIFPP_DATA_DIR
"$ENV{CCP4}/share/libcifpp"
CACHE PATH "Directory where dictionary and other static data is stored")
else()
set(CIFPP_DATA_DIR
"${CMAKE_INSTALL_FULL_DATADIR}/libcifpp"
CACHE PATH "Directory where dictionary and other static data is stored")
set(CIFPP_DATA_DIR
"${CMAKE_INSTALL_FULL_DATADIR}/libcifpp"
CACHE PATH "Directory where dictionary and other static data is stored")
endif()
if(CIFPP_DATA_DIR)
target_compile_definitions(cifpp PUBLIC DATA_DIR="${CIFPP_DATA_DIR}")
set_target_properties(cifpp PROPERTIES CIFPP_DATA_DIR ${CIFPP_DATA_DIR})
target_compile_definitions(cifpp PUBLIC DATA_DIR="${CIFPP_DATA_DIR}")
set_target_properties(cifpp PROPERTIES CIFPP_DATA_DIR ${CIFPP_DATA_DIR})
endif()
if(NOT PROJECT_IS_TOP_LEVEL)
set(CIFPP_SHARE_DIR ${CIFPP_DATA_DIR} PARENT_SCOPE)
set(CIFPP_SHARE_DIR ${CIFPP_DATA_DIR} PARENT_SCOPE)
endif()
if(UNIX AND NOT BUILD_FOR_CCP4)
if("${CMAKE_INSTALL_PREFIX}" STREQUAL "/usr/local")
set(CIFPP_CACHE_DIR
"/var/cache/libcifpp"
CACHE PATH "The directory where downloaded data files are stored")
else()
set(CIFPP_CACHE_DIR
"${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/cache/libcifpp"
CACHE PATH "The directory where downloaded data files are stored")
endif()
if("${CMAKE_INSTALL_PREFIX}" STREQUAL "/usr/local")
set(CIFPP_CACHE_DIR
"/var/cache/libcifpp"
CACHE PATH "The directory where downloaded data files are stored")
else()
set(CIFPP_CACHE_DIR
"${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/cache/libcifpp"
CACHE PATH "The directory where downloaded data files are stored")
endif()
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
set(CIFPP_ETC_DIR
"${CMAKE_INSTALL_FULL_SYSCONFDIR}"
CACHE PATH "The directory where the update configuration file is stored")
set(CIFPP_ETC_DIR
"${CMAKE_INSTALL_FULL_SYSCONFDIR}"
CACHE PATH "The directory where the update configuration file is stored")
else()
unset(CIFPP_CACHE_DIR)
unset(CIFPP_CACHE_DIR)
endif()
# Install rules
install(TARGETS cifpp
EXPORT cifpp
FILE_SET cifpp_headers DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
EXPORT cifpp
FILE_SET cifpp_headers DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if(MSVC AND BUILD_SHARED_LIBS)
install(
FILES $<TARGET_PDB_FILE:cifpp>
DESTINATION ${CMAKE_INSTALL_LIBDIR}
OPTIONAL)
install(
FILES $<TARGET_PDB_FILE:cifpp>
DESTINATION ${CMAKE_INSTALL_LIBDIR}
OPTIONAL)
endif()
# Clean up old config files (with old names)
file(GLOB OLD_CONFIG_FILES
${CMAKE_INSTALL_FULL_LIBDIR}/cmake/cifpp/cifppConfig*.cmake
${CMAKE_INSTALL_FULL_LIBDIR}/cmake/cifpp/cifppTargets*.cmake)
${CMAKE_INSTALL_FULL_LIBDIR}/cmake/cifpp/cifppConfig*.cmake
${CMAKE_INSTALL_FULL_LIBDIR}/cmake/cifpp/cifppTargets*.cmake)
if(OLD_CONFIG_FILES)
message(
STATUS "cifpp: Installation will remove old config files: ${OLD_CONFIG_FILES}")
install(CODE "file(REMOVE ${OLD_CONFIG_FILES})")
message(
STATUS "cifpp: Installation will remove old config files: ${OLD_CONFIG_FILES}")
install(CODE "file(REMOVE ${OLD_CONFIG_FILES})")
endif()
install(EXPORT cifpp
NAMESPACE cifpp::
FILE "cifpp-targets.cmake"
DESTINATION lib/cmake/cifpp)
NAMESPACE cifpp::
FILE "cifpp-targets.cmake"
DESTINATION lib/cmake/cifpp)
install(
FILES ${CMAKE_CURRENT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
${CMAKE_CURRENT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
${CMAKE_CURRENT_SOURCE_DIR}/rsrc/mmcif_ma.dic
DESTINATION ${CMAKE_INSTALL_DATADIR}/libcifpp)
FILES ${CMAKE_CURRENT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
${CMAKE_CURRENT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
${CMAKE_CURRENT_SOURCE_DIR}/rsrc/mmcif_ma.dic
DESTINATION ${CMAKE_INSTALL_DATADIR}/libcifpp)
if(CIFPP_DATA_DIR AND CIFPP_DOWNLOAD_CCD)
install(FILES ${COMPONENTS_CIF}
DESTINATION ${CMAKE_INSTALL_DATADIR}/libcifpp)
install(FILES ${COMPONENTS_CIF}
DESTINATION ${CMAKE_INSTALL_DATADIR}/libcifpp)
endif()
set(CONFIG_TEMPLATE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cifpp-config.cmake.in)
configure_package_config_file(
${CONFIG_TEMPLATE_FILE} ${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake
INSTALL_DESTINATION lib/cmake/cifpp
PATH_VARS CIFPP_DATA_DIR)
${CONFIG_TEMPLATE_FILE} ${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake
INSTALL_DESTINATION lib/cmake/cifpp
PATH_VARS CIFPP_DATA_DIR)
install(
FILES "${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config-version.cmake"
DESTINATION lib/cmake/cifpp)
FILES "${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config-version.cmake"
DESTINATION lib/cmake/cifpp)
set_target_properties(
cifpp
PROPERTIES VERSION ${PROJECT_VERSION}
SOVERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}"
INTERFACE_cifpp_MAJOR_VERSION ${PROJECT_VERSION_MAJOR})
cifpp
PROPERTIES VERSION ${PROJECT_VERSION}
SOVERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}"
INTERFACE_cifpp_MAJOR_VERSION ${PROJECT_VERSION_MAJOR})
set_property(
TARGET cifpp
APPEND
PROPERTY COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION)
TARGET cifpp
APPEND
PROPERTY COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION)
write_basic_package_version_file(
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config-version.cmake"
VERSION ${PROJECT_VERSION}
COMPATIBILITY AnyNewerVersion)
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config-version.cmake"
VERSION ${PROJECT_VERSION}
COMPATIBILITY AnyNewerVersion)
if(BUILD_TESTING AND PROJECT_IS_TOP_LEVEL)
add_subdirectory(test)
add_subdirectory(test)
endif()
# Optionally install the update scripts for CCD and dictionary files
if(CIFPP_INSTALL_UPDATE_SCRIPT)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/tools/update-libcifpp-data.in
update-libcifpp-data @ONLY)
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/tools/update-libcifpp-data.in
update-libcifpp-data @ONLY)
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR
${CMAKE_SYSTEM_NAME} STREQUAL "GNU" OR
${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD")
install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/update-libcifpp-data
DESTINATION ${CMAKE_INSTALL_SYSCONFDIR}/cron.weekly
PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE
WORLD_READ)
else()
message(FATAL_ERROR "cifpp: Don't know where to install the update script")
endif()
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR
${CMAKE_SYSTEM_NAME} STREQUAL "GNU" OR
${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD")
install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/update-libcifpp-data
DESTINATION ${CMAKE_INSTALL_SYSCONFDIR}/cron.weekly
PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE
WORLD_READ)
else()
message(FATAL_ERROR "cifpp: Don't know where to install the update script")
endif()
# a config file, to make it complete
# install(DIRECTORY DESTINATION "${CMAKE_INSTALL_LOCALSTATEDIR}/libcifpp")
if(NOT EXISTS "${CMAKE_INSTALL_SYSCONFDIR}/libcifpp.conf")
file(
WRITE ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf
[[# Uncomment the next line to enable automatic updates
# a config file, to make it complete
# install(DIRECTORY DESTINATION "${CMAKE_INSTALL_LOCALSTATEDIR}/libcifpp")
if(NOT EXISTS "${CMAKE_INSTALL_SYSCONFDIR}/libcifpp.conf")
file(
WRITE ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf
[[# Uncomment the next line to enable automatic updates
# update=true
]])
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf
DESTINATION ${CMAKE_INSTALL_SYSCONFDIR})
install(
CODE "message(\"cifpp: A configuration file has been written to ${CIFPP_ETC_DIR}/libcifpp.conf, please edit this file to enable automatic updates\")"
)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf
DESTINATION ${CMAKE_INSTALL_SYSCONFDIR})
install(
CODE "message(\"cifpp: A configuration file has been written to ${CIFPP_ETC_DIR}/libcifpp.conf, please edit this file to enable automatic updates\")"
)
install(DIRECTORY DESTINATION ${CMAKE_INSTALL_SYSCONFDIR}/libcifpp/cache-update.d)
endif()
install(DIRECTORY DESTINATION ${CMAKE_INSTALL_SYSCONFDIR}/libcifpp/cache-update.d)
endif()
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
endif()
if(BUILD_DOCUMENTATION)
add_subdirectory(docs)
add_subdirectory(docs)
endif()

View File

@@ -117,12 +117,8 @@ Other libraries you might want to install beforehand are:
`libeigen3-dev`
- [zlib](https://github.com/madler/zlib), the development version of this
library. On Debian/Ubuntu this is the package `zlib1g-dev`.
- [boost](https://www.boost.org), in Debian/Ubuntu this is `libboost-dev`.
The Boost libraries are only needed in case you are using GCC due to a long
standing bug in GNU's implementation of std::regex. It simply crashes
on the regular expressions used in the mmcif_pdbx dictionary and so
we use the boost regex implementation instead.
- [pcre2](https://www.pcre.org/), the Perl Compatible Regular Expression
library. On Debian/Ubuntu this is the package `libpcre2-dev`.
### Building

View File

@@ -1,3 +1,26 @@
Version 10.0.0
- Added a SQLite interface.
Version 9.0.5
- Added exists to compound_factory
- Added sub_matrix, fix and extend determinant calculation
- Added yet another structure::create_non_poly
- Remove revision.hpp file in make clean (new VersionString.cmake)
Version 9.0.4
- Fix various stopping and reconstruction errors
Version 9.0.3
- Reconstruction fixed when some entity ids are missing
Version 9.0.2
- Fix code that reconstructs sequences, could throw a map::at
- Many optimisations in validation and reconstruction code.
Version 9.0.1
- Use pcre2 from pkg-config if available, if not
build a version from the original code.
Version 9.0.0
- Rename fields of cif::mm::polymer to match the naming
in mmcif_pdbx.dic. Also, related, fix building mm::structure

12
cmake/FindPCRE2.cmake Normal file
View File

@@ -0,0 +1,12 @@
# The problem is, find_package(PCRE2) does not work
# and using pkg-config results in linking to a shared library
# causing all kinds of trouble later on
find_path(PCRE2_INCLUDEDIR NAMES pcre2.h HINTS "C:/Program Files (x86)/PCRE2/include" REQUIRED)
find_library(PCRE2_LIBRARY NAMES pcre2-8-static libpcre2-8.a HINTS "C:/Program Files (x86)/PCRE2/lib" REQUIRED)
add_library(pcre2-8 IMPORTED STATIC)
target_include_directories(pcre2-8 INTERFACE ${PCRE2_INCLUDEDIR})
target_compile_definitions(pcre2-8 INTERFACE PCRE2_STATIC)
set_target_properties(pcre2-8 PROPERTIES IMPORTED_LOCATION ${PCRE2_LIBRARY})
set_target_properties(pcre2-8 PROPERTIES IMPORTED_IMPLIB ${PCRE2_LIBRARY})

View File

@@ -238,7 +238,7 @@ function(write_version_header dir)
if(res EQUAL 0)
set(REVISION_STRING "${out}")
else()
message(STATUS "Git hash not found, does this project has a 'build' tag?")
message(STATUS "Git hash not found, does this project have a 'build' tag?")
endif()
else()
message(STATUS "Git hash not found")

View File

@@ -8,6 +8,5 @@ include(CMakeFindDependencyMacro)
find_dependency(Threads)
find_dependency(ZLIB REQUIRED)
find_dependency(CURL REQUIRED)
check_required_components(cifpp)

17
cmake/test-charconv.cpp Normal file
View File

@@ -0,0 +1,17 @@
#include <charconv>
#include <cassert>
#include <cstring>
int main()
{
float v;
char s[] = "1.0";
auto r = std::from_chars(s, s + strlen(s), v);
assert(r.ec == std::errc{});
assert(r.ptr = s + strlen(s));
assert(v == 1.0f);
return 0;
}

View File

@@ -1,18 +0,0 @@
// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86164
#include <iostream>
#include <regex>
int main()
{
std::string s(100'000, '*');
std::smatch m;
std::regex r("^(.*?)$");
std::regex_search(s, m, r);
std::cout << s.substr(0, 10) << '\n';
std::cout << m.str(1).substr(0, 10) << '\n';
return 0;
}

Binary file not shown.

View File

@@ -26,15 +26,12 @@
#pragma once
#include "cif++/forward_decl.hpp"
#include "cif++/condition.hpp"
#include "cif++/forward_decl.hpp"
#include "cif++/iterator.hpp"
#include "cif++/row.hpp"
#include "cif++/text.hpp"
#include <array>
/** \file category.hpp
* Documentation for the cif::category class
*
@@ -157,7 +154,7 @@ class category
emplace(std::forward<row_initializer>(rows));
}
category(const category &rhs); ///< Copy constructor
category(const category &rhs); ///< Copy constructor
category(category &&rhs) noexcept ///< Move constructor
{
@@ -181,6 +178,27 @@ class category
const std::string &name() const { return m_name; } ///< Returns the name of the category
/// \brief Rename category to @a new_name
void name(std::string_view new_name)
{
m_name = new_name;
m_dirty = true;
}
/// \brief Return true if the category has been modified since last open/save
constexpr bool is_dirty() const
{
return m_dirty;
}
/// \brief Mark the category as modified according to @a dirty
void set_dirty(bool dirty)
{
m_dirty = dirty;
}
// --------------------------------------------------------------------
[[deprecated("use key_items instead")]] iset key_fields() const; ///< Returns the cif::iset of key item names. Retrieved from the @ref category_validator for this category
iset key_items() const; ///< Returns the cif::iset of key item names. Retrieved from the @ref category_validator for this category
@@ -332,8 +350,16 @@ class category
// --------------------------------------------------------------------
// A category can have a key, as defined by the validator/dictionary
/// @brief The type of an element of the key_type
struct key_element_type
{
std::string name; ///< Name of the item
std::string value; ///< Value to be found
bool may_be_null = false; ///< If true, value should be same or empty
};
/// @brief The key type
using key_type = row_initializer;
using key_type = std::vector<key_element_type>;
/// @brief Return a row_handle for the row specified by \a key
/// @param key The value for the key, items specified in the dictionary should have a value
@@ -1047,60 +1073,6 @@ class category
{ return value; });
}
// --------------------------------------------------------------------
// Naming used to be very inconsistent. For backward compatibility,
// the old function names are here as deprecated variants.
/// \brief Return the index number for \a column_name
[[deprecated("Use get_item_ix instead")]] uint16_t get_column_ix(std::string_view column_name) const
{
return get_item_ix(column_name);
}
/// @brief Return the name for column with index @a ix
/// @param ix The index number
/// @return The name of the column
[[deprecated("use get_item_name instead")]] std::string_view get_column_name(uint16_t ix) const
{
return get_item_name(ix);
}
/// @brief Make sure a item with name @a item_name is known and return its index number
/// @param item_name The name of the item
/// @return The index number of the item
[[deprecated("use add_item instead")]] uint16_t add_column(std::string_view item_name)
{
return add_item(item_name);
}
/** @brief Remove column name @a colum_name
* @param column_name The column to be removed
*/
[[deprecated("use remove_item instead")]] void remove_column(std::string_view column_name)
{
remove_item(column_name);
}
/** @brief Rename column @a from_name to @a to_name */
[[deprecated("use rename_item instead")]] void rename_column(std::string_view from_name, std::string_view to_name)
{
rename_item(from_name, to_name);
}
/// @brief Return whether a column with name @a name exists in this category
/// @param name The name of the column
/// @return True if the column exists
[[deprecated("use has_item instead")]] bool has_column(std::string_view name) const
{
return has_item(name);
}
/// @brief Return the cif::iset of columns in this category
[[deprecated("use get_items instead")]] iset get_columns() const
{
return get_items();
}
// --------------------------------------------------------------------
/// \brief Return the index number for \a item_name
@@ -1127,6 +1099,9 @@ class category
*/
void remove_item(std::string_view item_name);
/// \brief Drop items in this category that contain empty values in all rows.
void drop_empty_items();
/** @brief Rename item @a from_name to @a to_name */
void rename_item(std::string_view from_name, std::string_view to_name);
@@ -1138,8 +1113,14 @@ class category
return get_item_ix(name) < m_items.size();
}
/// @brief Return the cif::iset of items in this category
iset get_items() const;
/// @brief Return the items in this category
std::vector<std::string> get_items() const;
/// @brief Return the number of items (colums) in this category
size_t get_item_count() const noexcept
{
return m_items.size();
}
// --------------------------------------------------------------------
@@ -1169,16 +1150,48 @@ class category
/// Write the contents of the category to the std::ostream @a os
void write(std::ostream &os) const;
/// \brief Various supported output formats
enum class output_format
{
cif, // Output in mmCIF format
csv, // comma separated values
tsv, // tab separated values
list, // values delimited by a '|' character
column, // output in columns
markdown, //
table, // ascii art table
box, // table with unicode line characters
};
/// @brief
/// @brief Write the contents of the category to the std::ostream @a os and
/// use @a order as the order of the items. If @a addMissingItems is
/// false, items that do not contain any value will be suppressed. Use this version
/// to write out
/// @param os The std::ostream to write to
/// @param fmt The format to use
/// @param order The order in which the items should appear
/// @param addMissingItems When false, empty items are suppressed from the output
void write(std::ostream &os, output_format fmt,
const std::vector<std::string> &order, bool addMissingItems = true);
/// @brief Write the contents of the category to the std::ostream @a os and
/// use @a order as the order of the items. If @a addMissingItems is
/// false, items that do not contain any value will be suppressed
/// @param os The std::ostream to write to
/// @param order The order in which the items should appear
/// @param addMissingItems When false, empty items are suppressed from the output
void write(std::ostream &os, const std::vector<std::string> &order, bool addMissingItems = true);
void write(std::ostream &os, const std::vector<std::string> &order, bool addMissingItems = true)
{
write(os, output_format::cif, order, addMissingItems);
}
private:
void write(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyItems) const;
void write_cif(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyItems) const;
void write_delimited(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyItems,
std::string_view delimiter, bool aligned, bool header) const;
void write_markdown(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyItems) const;
void write_table(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyItems, bool ascii) const;
public:
/// friend function to make it possible to do:
@@ -1249,7 +1262,7 @@ class category
{
}
// TODO: NEED TO FIX THIS!
// TODO: NEED TO FIX THIS!
category *linked;
const link_validator *v;
};
@@ -1278,6 +1291,8 @@ class category
uint32_t m_last_unique_num = 0;
class category_index *m_index = nullptr;
row *m_head = nullptr, *m_tail = nullptr;
bool m_dirty = false; // Keep track of modifications
};
} // namespace cif

View File

@@ -179,8 +179,8 @@ class compound
friend class compound_factory_impl;
friend class local_compound_factory_impl;
compound(cif::datablock &db);
compound(datablock &db);
std::string m_id;
std::string m_name;
std::string m_type;
@@ -196,23 +196,6 @@ class compound
// --------------------------------------------------------------------
// Factory class for compound and Link objects
/// @brief Options available to configure a compound factory
struct compound_factory_options
{
/// If you have a multithreaded application and want to have different
/// compounds in each thread (e.g. a web service processing user requests
/// with different sets of compounds) you can set this flag to true.
bool use_thread_local_instance_only = false;
#if HAVE_CURL
// Various locations for chem_comp data files:
// - ftp://files.ebi.ac.uk/pub/databases/pdb/refdata/chem_comp
// - https://files.rcsb.org/pub/pdb/refdata/chem_comp/
std::string remote_chem_comp_url = "ftp://files.ebi.ac.uk/pub/databases/pdb/refdata/chem_comp";
#endif
};
/// Use the compound_factory singleton instance to create compound objects
class compound_factory
@@ -225,12 +208,8 @@ class compound_factory
/// with different sets of compounds) you can set the \a useThreadLocalInstanceOnly
/// flag to true.
[[deprecated("Use version with compound_factory_options instead")]]
static void init(bool useThreadLocalInstanceOnly);
/// \brief Initialise a singleton instance.
static void init(compound_factory_options options = {});
/// Return the singleton instance. If initialized with local threads, this is the
/// instance for the current thread.
static compound_factory &instance();
@@ -260,8 +239,6 @@ class compound_factory
void push_dictionary(const file &file);
/// Remove the last pushed dictionary
// TODO: check if the popped dict is the correct one
void pop_dictionary();
/// Return whether @a res_name is a valid and known peptide
@@ -293,11 +270,15 @@ class compound_factory
return is_std_base(res_name) or is_std_peptide(res_name);
}
/// Return whether @a res_name is water
bool is_water(std::string_view res_name) const
{
return res_name == "HOH" or res_name == "H2O" or res_name == "WAT";
}
/// Return whether @a res_name already exists, without creating it.
bool exists(std::string_view res_name) const;
/// \brief Create the compound object for \a id
///
/// This will create the compound instance for \a id if it doesn't exist already.
@@ -313,6 +294,13 @@ class compound_factory
void report_missing_compound(std::string_view compound_id);
bool get_report_missing() const { return m_report_missing; }
void set_report_missing(bool report)
{
m_report_missing = report;
}
private:
compound_factory();
@@ -321,9 +309,10 @@ class compound_factory
static std::unique_ptr<compound_factory> s_instance;
static thread_local std::unique_ptr<compound_factory> tl_instance;
static compound_factory_options s_options;
static bool s_use_thread_local_instance;
std::shared_ptr<compound_factory_impl> m_impl;
bool m_report_missing = true;
};
// --------------------------------------------------------------------
@@ -343,20 +332,17 @@ class compound_factory
* @endcode
*/
// TODO: check if pushed and popped dicts are the same!
class compound_source
{
public:
compound_source(const cif::file &file)
compound_source(const file &file)
{
cif::compound_factory::instance().push_dictionary(file);
compound_factory::instance().push_dictionary(file);
}
~compound_source()
{
cif::compound_factory::instance().pop_dictionary();
compound_factory::instance().pop_dictionary();
}
};

View File

@@ -26,7 +26,9 @@
#pragma once
#include "cif++/text.hpp"
#include "cif++/row.hpp"
#include "cif++/format.hpp"
#include <cassert>
#include <concepts>
@@ -49,49 +51,49 @@
* @code {.cpp}
* cif::condition c = cif::key("id") == 1;
* @endcode
*
*
* That will find rows where the ID item contains the number 1. If
* using cif::key is a bit too much typing, you can also write:
*
*
* @code{.cpp}
* using namespace cif::literals;
*
*
* cif::condition c2 = "id"_key == 1;
* @endcode
*
*
* Now if you want both ID = 1 and ID = 2 in the result:
*
*
* @code{.cpp}
* auto c3 = "id"_key == 1 or "id"_key == 2;
* @endcode
*
*
* There are some special values you can use. To find rows with item that
* do not have a value:
*
*
* @code{.cpp}
* auto c4 = "type"_key == cif::null;
* @endcode
*
* @endcode
*
* Of if it should not be NULL:
*
*
* @code{.cpp}
* auto c5 = "type"_key != cif::null;
* @endcode
*
* @endcode
*
* There's even a way to find all records:
*
*
* @code{.cpp}
* auto c6 = cif::all;
* @endcode
*
*
* And when you want to search for any item containing the value 'foo':
*
*
* @code{.cpp}
* auto c7 = cif::any == "foo";
* @endcode
*
* @endcode
*
* All these conditions can be chained together again:
*
*
* @code{.cpp}
* auto c8 = std::move(c3) and std::move(c5);
* @endcode
@@ -106,7 +108,7 @@ namespace cif
/**
* @brief Get the items that can be used as key in conditions for a category
*
*
* @param cat The category whose items to return
* @return iset The set of key item names
*/
@@ -115,7 +117,7 @@ iset get_category_fields(const category &cat);
/**
* @brief Get the items that can be used as key in conditions for a category
*
*
* @param cat The category whose items to return
* @return iset The set of key field names
*/
@@ -123,7 +125,7 @@ iset get_category_items(const category &cat);
/**
* @brief Get the item index for item @a col in category @a cat
*
*
* @param cat The category
* @param col The name of the item
* @return uint16_t The index
@@ -132,7 +134,7 @@ uint16_t get_item_ix(const category &cat, std::string_view col);
/**
* @brief Return whether the item @a col in category @a cat has a primitive type of *uchar*
*
*
* @param cat The category
* @param col The item name
* @return true If the primitive type is of type *uchar*
@@ -175,14 +177,13 @@ namespace detail
class condition
{
public:
/** @cond */
using condition_impl = detail::condition_impl;
/** @endcond */
/**
* @brief Construct a new, empty condition object
*
*
*/
condition()
: m_impl(nullptr)
@@ -191,7 +192,7 @@ class condition
/**
* @brief Construct a new condition object with implementation @a impl
*
*
* @param impl The implementation to use
*/
explicit condition(condition_impl *impl)
@@ -230,15 +231,15 @@ class condition
/**
* @brief Prepare the condition to be used on category @a c. This will
* take care of setting the correct indices for items e.g.
*
*
* @param c The category this query should act upon
*/
void prepare(const category &c);
/**
* @brief This operator returns true if the row referenced by @a r is
* @brief This operator returns true if the row referenced by @a r is
* a match for this condition.
*
*
* @param r The reference to a row.
* @return true If there is a match
* @return false If there is no match
@@ -263,7 +264,7 @@ class condition
/**
* @brief If the prepare step found out there is only one hit
* this single hit can be returned by this method.
*
*
* @return std::optional<row_handle> The result will contain
* a row reference if there is a single hit, it will be empty otherwise
*/
@@ -292,7 +293,7 @@ class condition
/**
* @brief Operator to use to write out a condition to @a os, for debugging purposes
*
*
* @param os The std::ostream to write to
* @param cond The condition to write
* @return std::ostream& The same as @a os
@@ -752,28 +753,9 @@ namespace detail
delete sub;
}
condition_impl *prepare(const category &c) override
{
for (auto &sub : m_sub)
sub = sub->prepare(c);
return this;
}
condition_impl *prepare(const category &c) override;
bool test(row_handle r) const override
{
bool result = true;
for (auto sub : m_sub)
{
if (sub->test(r))
continue;
result = false;
break;
}
return result;
}
bool test(row_handle r) const override;
void str(std::ostream &os) const override
{
@@ -820,6 +802,7 @@ namespace detail
static condition_impl *combine_equal(std::vector<and_condition_impl *> &subs, or_condition_impl *oc);
std::vector<condition_impl *> m_sub;
std::optional<row_handle> m_single; // Potential result of index lookup
};
struct or_condition_impl : public condition_impl
@@ -977,9 +960,9 @@ inline condition operator or(condition &&a, condition &&b)
if (ci->m_item_name == ce->m_item_name)
return condition(new detail::key_equals_or_empty_condition_impl(ci));
}
if (typeid(*b.m_impl) == typeid(detail::key_equals_condition_impl) and
typeid(*a.m_impl) == typeid(detail::key_is_empty_condition_impl))
typeid(*a.m_impl) == typeid(detail::key_is_empty_condition_impl))
{
auto ci = static_cast<detail::key_equals_condition_impl *>(b.m_impl);
auto ce = static_cast<detail::key_is_empty_condition_impl *>(a.m_impl);
@@ -997,9 +980,9 @@ inline condition operator or(condition &&a, condition &&b)
if (ci->m_item_name == ce->m_item_name)
return condition(new detail::key_equals_number_or_empty_condition_impl(ci));
}
if (typeid(*b.m_impl) == typeid(detail::key_equals_number_condition_impl) and
typeid(*a.m_impl) == typeid(detail::key_is_empty_condition_impl))
typeid(*a.m_impl) == typeid(detail::key_is_empty_condition_impl))
{
auto ci = static_cast<detail::key_equals_number_condition_impl *>(b.m_impl);
auto ce = static_cast<detail::key_is_empty_condition_impl *>(a.m_impl);
@@ -1019,7 +1002,7 @@ inline condition operator or(condition &&a, condition &&b)
/**
* @brief A helper class to make it possible to search for empty items (NULL)
*
*
* @code{.cpp}
* "id"_key == cif::empty_type();
* @endcode
@@ -1031,7 +1014,7 @@ struct empty_type
/**
* @brief A helper to make it possible to have conditions like
*
*
* @code{.cpp}
* "id"_key == cif::null;
* @endcode
@@ -1041,14 +1024,14 @@ inline constexpr empty_type null = empty_type();
/**
* @brief Class to use in creating conditions, creates a reference to a item or item
*
*
*/
struct key
{
/**
* @brief Construct a new key object using @a item_name as name
*
* @param item_name
*
* @param item_name
*/
explicit key(const std::string &item_name)
: m_item_name(item_name)
@@ -1057,8 +1040,8 @@ struct key
/**
* @brief Construct a new key object using @a item_name as name
*
* @param item_name
*
* @param item_name
*/
explicit key(const char *item_name)
: m_item_name(item_name)
@@ -1067,8 +1050,8 @@ struct key
/**
* @brief Construct a new key object using @a item_name as name
*
* @param item_name
*
* @param item_name
*/
explicit key(std::string_view item_name)
: m_item_name(item_name)
@@ -1090,7 +1073,18 @@ concept Numeric = ((std::is_floating_point_v<T> or std::is_integral_v<T>) and no
template <Numeric T>
condition operator==(const key &key, const T &v)
{
return condition(new detail::key_equals_number_condition_impl(key.m_item_name, v));
// TODO: change key_equals_etc... to use std::variant<double,int64_t> or something
return condition(new detail::key_equals_number_condition_impl(key.m_item_name, static_cast<double>(v)));
}
/**
* @brief Operator to create an not-equals condition based on a key @a key and a numeric value @a v
*/
template <Numeric T>
condition operator!=(const key &key, const T &v)
{
// TODO: change key_equals_etc... to use std::variant<double,int64_t> or something
return condition(new detail::not_condition_impl(key == v));
}
/**
@@ -1137,13 +1131,10 @@ inline condition operator!=(const key &key, std::string_view value)
template <Numeric T>
condition operator>(const key &key, const T &v)
{
std::ostringstream s;
s << " > " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].compare(v) > 0; },
s.str()));
cif::format(" > {}", v)));
}
/**
@@ -1152,13 +1143,10 @@ condition operator>(const key &key, const T &v)
template <Numeric T>
condition operator>=(const key &key, const T &v)
{
std::ostringstream s;
s << " >= " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].compare(v) >= 0; },
s.str()));
cif::format(" >= {}", v)));
}
/**
@@ -1167,13 +1155,10 @@ condition operator>=(const key &key, const T &v)
template <Numeric T>
condition operator<(const key &key, const T &v)
{
std::ostringstream s;
s << " < " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].compare(v) < 0; },
s.str()));
cif::format(" < {}", v)));
}
/**
@@ -1182,13 +1167,10 @@ condition operator<(const key &key, const T &v)
template <Numeric T>
condition operator<=(const key &key, const T &v)
{
std::ostringstream s;
s << " <= " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].compare(v) <= 0; },
s.str()));
cif::format(" <= {}", v)));
}
/**
@@ -1196,13 +1178,10 @@ condition operator<=(const key &key, const T &v)
*/
inline condition operator>(const key &key, std::string_view v)
{
std::ostringstream s;
s << " > " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].compare(v, icase) > 0; },
s.str()));
cif::format(" > {}", v)));
}
/**
@@ -1210,13 +1189,10 @@ inline condition operator>(const key &key, std::string_view v)
*/
inline condition operator>=(const key &key, std::string_view v)
{
std::ostringstream s;
s << " >= " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].compare(v, icase) >= 0; },
s.str()));
cif::format(" >= {}", v)));
}
/**
@@ -1224,13 +1200,10 @@ inline condition operator>=(const key &key, std::string_view v)
*/
inline condition operator<(const key &key, std::string_view v)
{
std::ostringstream s;
s << " < " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].compare(v, icase) < 0; },
s.str()));
cif::format(" < {}", v)));
}
/**
@@ -1238,13 +1211,10 @@ inline condition operator<(const key &key, std::string_view v)
*/
inline condition operator<=(const key &key, std::string_view v)
{
std::ostringstream s;
s << " <= " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].compare(v, icase) <= 0; },
s.str()));
cif::format(" <= {}", v)));
}
/**
@@ -1345,7 +1315,7 @@ namespace literals
{
/**
* @brief Return a cif::key for the item name @a text
*
*
* @param text The name of the item
* @param length The length of @a text
* @return key The cif::key created

444
include/cif++/cql.hpp Normal file
View File

@@ -0,0 +1,444 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2025 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/category.hpp"
#include "cif++/datablock.hpp"
#include "cif++/item.hpp"
#include "cif++/iterator.hpp"
#include "cif++/row.hpp"
#include "cif++/validate.hpp"
#include <iterator>
#include <memory>
#include <stdexcept>
#include <string>
// --------------------------------------------------------------------
namespace cif::cql
{
class result;
class row;
class transaction;
class connection;
struct result_impl;
// --------------------------------------------------------------------
class field_ref final
{
public:
std::string_view name() const &
{
return m_row.get_category().get_item_name(m_index);
}
constexpr size_t num() const noexcept
{
return m_index;
}
std::string_view text() const &
{
return m_row[m_index].text();
}
/** Return the contents of this item as type @tparam T */
template <typename T = std::string>
auto as() const -> T
{
return m_row[m_index].as<T>();
}
/** Return the contents of this item as type @tparam T or, if not
* set, use @a dv as the default value.
*/
template <typename T>
auto value_or(const T &dv) const
{
return m_row[m_index].value_or(dv);
}
field_ref(row_handle rh, int col, std::shared_ptr<result_impl> result_impl)
: m_row(rh)
, m_index(col)
, m_result_impl(result_impl)
{
}
field_ref(const field_ref &) = default;
field_ref(field_ref &&) = default;
field_ref &operator=(const field_ref &) = default;
field_ref &operator=(field_ref &&) = default;
private:
row_handle m_row;
int m_index;
std::shared_ptr<result_impl> m_result_impl;
};
// --------------------------------------------------------------------
class row_ref final
{
public:
class const_field_iterator
{
public:
friend class result;
using iterator_category = std::forward_iterator_tag;
using value_type = const field_ref;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
using reference = value_type &;
const_field_iterator(const const_field_iterator &) = default;
const_field_iterator(const_field_iterator &&) = default;
const_field_iterator &operator=(const const_field_iterator &) = default;
const_field_iterator &operator=(const_field_iterator &&) = default;
reference operator*()
{
return m_current;
}
pointer operator->()
{
return &m_current;
}
const_field_iterator &operator++()
{
if (m_row)
{
++m_col;
m_current = field_ref(m_row, m_col, m_result_impl);
}
return *this;
}
const_field_iterator operator++(int)
{
const_field_iterator result(*this);
this->operator++();
return result;
}
bool operator==(const const_field_iterator &rhs) const
{
return m_row == rhs.m_row and m_col == rhs.m_col;
}
bool operator!=(const const_field_iterator &rhs) const
{
return m_row != rhs.m_row or m_col != rhs.m_col;
}
private:
friend class row_ref;
const_field_iterator(row_handle row, int column, std::shared_ptr<result_impl> result_impl)
: m_row(row)
, m_col(column)
, m_current(m_row, m_col, result_impl)
, m_result_impl(result_impl)
{
}
row_handle m_row;
int m_col;
field_ref m_current;
std::shared_ptr<result_impl> m_result_impl;
};
// --------------------------------------------------------------------
row_ref() = default;
row_ref(row_handle rh, std::shared_ptr<result_impl> result_impl)
: m_row(rh)
, m_result_impl(result_impl)
{
}
row_ref(const row_ref &) = default;
row_ref &operator=(const row_ref &) = default;
// --------------------------------------------------------------------
const_field_iterator cbegin() const noexcept { return const_field_iterator(m_row, 0, m_result_impl); }
const_field_iterator begin() const noexcept { return const_field_iterator(m_row, 0, m_result_impl); }
const_field_iterator cend() const noexcept { return const_field_iterator(m_row, size(), m_result_impl); }
const_field_iterator end() const noexcept { return const_field_iterator(m_row, size(), m_result_impl); }
field_ref front() const noexcept { return field_ref(m_row, 0, m_result_impl); }
field_ref back() const noexcept { return field_ref(m_row, size() - 1, m_result_impl); }
size_t size() const noexcept;
bool empty() const noexcept { return size() == 0; }
field_ref operator[](size_t index) const noexcept { return field_ref(m_row, index, m_result_impl); }
field_ref operator[](std::string_view name) const;
// --------------------------------------------------------------------
bool operator==(const row_ref &rhs) const { return m_row == rhs.m_row; }
bool operator!=(const row_ref &rhs) const { return m_row != rhs.m_row; }
private:
row_handle m_row;
std::shared_ptr<result_impl> m_result_impl;
};
// --------------------------------------------------------------------
class result
{
public:
// --------------------------------------------------------------------
class iterator
{
public:
friend class view;
using iterator_category = std::forward_iterator_tag;
using value_type = const row_ref;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
using reference = value_type &;
// const_row_iterator() = default;
iterator(std::shared_ptr<result_impl> result_impl, category::iterator cat_iter)
: m_iter(cat_iter)
, m_current(*m_iter, result_impl)
, m_result_impl(result_impl)
{
}
iterator(const iterator &) = default;
iterator(iterator &&) = default;
// const_row_iterator &operator=(const const_row_iterator &) = default;
// const_row_iterator &operator=(const_row_iterator &&) = default;
reference operator*()
{
return m_current;
}
pointer operator->()
{
return &m_current;
}
iterator &operator++()
{
++m_iter;
m_current = { *m_iter, m_result_impl };
return *this;
}
iterator operator++(int)
{
iterator result(*this);
this->operator++();
return result;
}
bool operator==(const iterator &rhs) const
{
return m_result_impl == rhs.m_result_impl and m_iter == rhs.m_iter;
}
bool operator!=(const iterator &rhs) const
{
return m_result_impl != rhs.m_result_impl or m_iter != rhs.m_iter;
}
private:
category::iterator m_iter;
row_ref m_current;
std::shared_ptr<result_impl> m_result_impl;
};
// --------------------------------------------------------------------
result() = delete;
result(result const &rhs) noexcept = default;
result(result &&rhs) noexcept = default;
result &operator=(result const &rhs) noexcept = default;
result &operator=(result &&rhs) noexcept = default;
result(category &&data, const std::string &query = "");
~result() = default;
row_ref one_row() const
{
if (size() != 1)
throw std::runtime_error("Expected one row");
return front();
}
field_ref one_field() const
{
expect_columns(1);
if (size() != 1)
throw std::runtime_error("Expected one row");
return one_row().front();
}
// --------------------------------------------------------------------
iterator begin() const noexcept;
iterator cbegin() const noexcept;
iterator end() const noexcept;
iterator cend() const noexcept;
row_ref front() const;
row_ref back() const;
size_t size() const noexcept;
bool empty() const noexcept { return size() == 0; }
size_t column_count() const;
category &get_category() const;
result expect_columns(size_t cols) const
{
if (auto actual = column_count(); cols != actual)
throw std::runtime_error("Unexpected number of columns");
return *this;
}
// --------------------------------------------------------------------
friend std::ostream &operator<<(std::ostream &os, const result &r)
{
os << r.get_category();
return os;
}
private:
friend class transaction;
friend class SelectStatement;
std::shared_ptr<result_impl> m_impl;
};
// --------------------------------------------------------------------
template <typename... Ts>
class cql_iterator_proxy : public cif::iterator_proxy<category, Ts...>
{
public:
cql_iterator_proxy(result &&res)
: cif::iterator_proxy<category, Ts...>(res.get_category())
, m_result(std::forward<result>(res))
{
m_result.expect_columns(cif::iterator_proxy<category, Ts...>::N);
}
private:
result m_result;
};
// --------------------------------------------------------------------
class transaction final
{
public:
transaction(connection &conn);
~transaction();
transaction(const transaction &) = delete;
transaction &operator=(const transaction &) = delete;
/// \brief Execute the sql in @a query returning an iterable result
result exec(std::string query);
/// \brief Execute the sql in @a query returning an iterable result.
/// Updates @a tail with what remains after the first statement in @a query
result exec(std::string query, std::string &tail);
template<typename... Ts>
cql_iterator_proxy<Ts...> stream(const std::string &sql)
{
return cql_iterator_proxy<Ts...>{ exec(sql) };
}
void commit();
void rollback();
private:
connection &m_conn;
bool m_transaction_active = false;
};
// --------------------------------------------------------------------
class connection final
{
public:
connection(datablock &db);
~connection();
friend class transaction;
/// \brief Return true if the string @a sql contains a complete statement.
bool is_complete_statement(const std::string &sql) const;
/// \brief Execute the sql in @a query returning an iterable result
result exec(std::string query);
/// \brief Execute the sql in @a query returning an iterable result.
/// Updates @a tail with what remains after the first statement in @a query
result exec(std::string query, std::string &tail);
/// \brief Return true if the underlying data was modified by any query.
bool is_modified() const;
private:
struct connection_impl *m_impl;
};
} // namespace cif::cql

View File

@@ -43,7 +43,7 @@ namespace cif
/**
* @brief A datablock is a list of category objects with some additional features
*
*
*/
class datablock : public std::list<category>
@@ -53,7 +53,7 @@ class datablock : public std::list<category>
/**
* @brief Construct a new datablock object with name @a name
*
*
* @param name The name for the new datablock
*/
datablock(std::string_view name)
@@ -80,7 +80,7 @@ class datablock : public std::list<category>
{
std::swap(a.m_name, b.m_name);
std::swap(a.m_validator, b.m_validator);
std::swap(static_cast<std::list<category>&>(a), static_cast<std::list<category>&>(b));
std::swap(static_cast<std::list<category> &>(a), static_cast<std::list<category> &>(b));
}
// --------------------------------------------------------------------
@@ -92,7 +92,7 @@ class datablock : public std::list<category>
/**
* @brief Set the name of this datablock to @a name
*
*
* @param name The new name
*/
void set_name(std::string_view name)
@@ -102,27 +102,33 @@ class datablock : public std::list<category>
/**
* @brief Attempt to load the dictionary specified in audit_conform category
*
*
*/
void load_dictionary();
/**
* @brief Attempt to load the dictionary @a dict
*
*/
void load_dictionary(std::string_view dict);
/**
* @brief Set the validator object to @a v
*
*
* @param v The new validator object, may be null
*/
void set_validator(const validator *v);
/**
* @brief Get the validator object
*
*
* @return const validator* The validator or nullptr if there is none
*/
const validator *get_validator() const;
/**
* @brief Validates the content of this datablock and all its content
*
*
* @return true If the content is valid
* @return false If the content is not valid
*/
@@ -131,7 +137,7 @@ class datablock : public std::list<category>
/**
* @brief Validates all contained data for valid links between parents and children
* as defined in the validator
*
*
* @return true If all links are valid
* @return false If all links are not valid
*/
@@ -140,7 +146,7 @@ class datablock : public std::list<category>
/**
* @brief Strip removes all categories and items that are invalid according
* to the assigned validator. Will also add a valid audit_conform block.
*
*
* @return true if the remaining datablock is valid
*/
bool strip();
@@ -150,7 +156,7 @@ class datablock : public std::list<category>
/**
* @brief Return the category named @a name, will create a new and empty
* category named @a name if it does not exist.
*
*
* @param name The name of the category to return
* @return category& Reference to the named category
*/
@@ -159,7 +165,7 @@ class datablock : public std::list<category>
/**
* @brief Return the const category named @a name, will return a reference
* to a static empty category if it was not found.
*
*
* @param name The name of the category to return
* @return category& Reference to the named category
*/
@@ -168,7 +174,7 @@ class datablock : public std::list<category>
/**
* @brief Return a pointer to the category named @a name or nullptr if
* it does not exist.
*
*
* @param name The name of the category
* @return category* Pointer to the category found or nullptr
*/
@@ -177,13 +183,12 @@ class datablock : public std::list<category>
/**
* @brief Return a pointer to the category named @a name or nullptr if
* it does not exist.
*
*
* @param name The name of the category
* @return category* Pointer to the category found or nullptr
*/
const category *get(std::string_view name) const;
/**
* @brief Return true if this datablock contains a non-empty category
*/
@@ -197,7 +202,7 @@ class datablock : public std::list<category>
* new one if it is not found. The result is a tuple of an iterator
* pointing to the category and a boolean indicating whether the category
* was created or not.
*
*
* @param name The name for the category
* @return std::tuple<iterator, bool> A tuple containing an iterator pointing
* at the category and a boolean indicating whether the category was newly

View File

@@ -26,138 +26,28 @@
#pragma once
#if __has_include(<format>)
#include <format>
#define USE_STD_FORMAT 1
#else
#include <fmt/format.h>
#endif
#include <string>
/** \file format.hpp
*
* File containing a basic reimplementation of boost::format
* but then a bit more simplistic. Still this allowed me to move my code
* from using boost::format to something without external dependency easily.
* Now using cif::format instead of a home grown rip off
*/
namespace cif
{
namespace detail
{
template <typename T>
struct to_varg
{
using type = T;
to_varg(const T &v)
: m_value(v)
{
}
type operator*() { return m_value; }
T m_value;
};
template <>
struct to_varg<const char *>
{
using type = const char *;
to_varg(const char *v)
: m_value(v)
{
}
type operator*() { return m_value.c_str(); }
std::string m_value;
};
template <>
struct to_varg<std::string>
{
using type = const char *;
to_varg(const std::string &v)
: m_value(v)
{
}
type operator*() { return m_value.c_str(); }
std::string m_value;
};
} // namespace
/** @cond */
template <typename... Args>
class format_plus_arg
{
public:
using args_vector_type = std::tuple<detail::to_varg<Args>...>;
using vargs_vector_type = std::tuple<typename detail::to_varg<Args>::type...>;
format_plus_arg(const format_plus_arg &) = delete;
format_plus_arg &operator=(const format_plus_arg &) = delete;
format_plus_arg(std::string_view fmt, Args... args)
: m_fmt(fmt)
, m_args(std::forward<Args>(args)...)
{
auto ix = std::make_index_sequence<sizeof...(Args)>();
copy_vargs(ix);
}
std::string str()
{
char buffer[1024];
std::string::size_type r = std::apply(snprintf, std::tuple_cat(std::make_tuple(buffer, sizeof(buffer), m_fmt.c_str()), m_vargs));
return { buffer, r };
}
friend std::ostream &operator<<(std::ostream &os, const format_plus_arg &f)
{
char buffer[1024];
std::string::size_type r = std::apply(snprintf, std::tuple_cat(std::make_tuple(buffer, sizeof(buffer), f.m_fmt.c_str()), f.m_vargs));
os.write(buffer, r);
return os;
}
private:
template <std::size_t... I>
void copy_vargs(std::index_sequence<I...>)
{
((std::get<I>(m_vargs) = *std::get<I>(m_args)), ...);
}
std::string m_fmt;
args_vector_type m_args;
vargs_vector_type m_vargs;
};
/** @endcond */
/**
* @brief A simplistic reimplementation of boost::format, in fact it is
* actually a way to call the C function snprintf to format the arguments
* in @a args into the format string @a fmt
*
* The string in @a fmt should thus be a C style format string.
*
* TODO: Move to C++23 style of printing.
*
* @tparam Args The types of the arguments
* @param fmt The format string
* @param args The arguments
* @return An object that can be written out to a std::ostream using operator<<
*/
template <typename... Args>
constexpr auto format(std::string_view fmt, Args... args)
{
return format_plus_arg(fmt, std::forward<Args>(args)...);
}
#if USE_STD_FORMAT
using std::format;
#else
using fmt::format;
#endif
// --------------------------------------------------------------------
/// A streambuf that fills out lines with spaces up until a specified width

View File

@@ -1,7 +1,33 @@
// Copyright Maarten L. Hekkelman, 2022
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2025 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
Original code comes from libgxrio at https://github.com/mhekkel/gxrio
This is a stripped down version.
*/
#pragma once

View File

@@ -37,7 +37,6 @@
#include <iomanip>
#include <iostream>
#include <limits>
#include <memory>
#include <optional>
#include <utility>
@@ -53,12 +52,12 @@ namespace cif
// --------------------------------------------------------------------
/** @brief item is a transient class that is used to pass data into rows
* but it also takes care of formatting data.
*
*
*
*
*
*
* The class cif::item is often used implicitly when creating a row in a category
* using the emplace function.
*
*
* @code{.cpp}
* cif::category cat("my-cat");
* cat.emplace({
@@ -68,12 +67,12 @@ namespace cif
* { "item-4", std::make_optional<int>(42) }, // <- stores an item with value 42
* { "item-5" } // <- stores an item with value .
* });
*
*
* std::cout << cat << '\n';
* @endcode
*
*
* Will result in:
*
*
* @code{.txt}
* _my-cat.item-1 1
* _my-cat.item-2 1.00
@@ -176,7 +175,7 @@ class item
/// \brief constructor for an item with name \a name and as
/// content value \a value
template<typename T, std::enable_if_t<std::is_same_v<T, std::string>, int> = 0>
template <typename T, std::enable_if_t<std::is_same_v<T, std::string>, int> = 0>
item(const std::string_view name, T &&value)
: m_name(name)
, m_value(std::move(value))
@@ -221,15 +220,15 @@ class item
item &operator=(item &&rhs) noexcept = default;
/** @endcond */
std::string_view name() const { return m_name; } ///< Return the name of the item
std::string_view value() const & { return m_value; } ///< Return the value of the item
std::string_view name() const { return m_name; } ///< Return the name of the item
std::string_view value() const & { return m_value; } ///< Return the value of the item
std::string value() const && { return std::move(m_value); } ///< Return the value of the item
/// \brief replace the content of the stored value with \a v
void value(std::string_view v) { m_value = v; }
/// \brief empty means either null or unknown
bool empty() const { return m_value.empty(); }
bool empty() const { return is_null() or is_unknown() or m_value.empty(); }
/// \brief returns true if the item contains '.'
bool is_null() const { return m_value == "."; }
@@ -250,6 +249,8 @@ class item
return value();
}
auto operator<=>(const item &rhs) const = default;
private:
std::string_view m_name;
std::string m_value;
@@ -258,8 +259,8 @@ class item
// --------------------------------------------------------------------
/// \brief the internal storage for items in a category
///
/// Internal storage, strictly forward linked list with minimal space
/// requirements. Strings of size 7 or shorter are stored internally.
/// Internal storage, with minimal space requirements. Strings of
/// size 7 or shorter are stored internally.
/// Typically, more than 99% of the strings in an mmCIF file are less
/// than 8 bytes in length.
@@ -336,7 +337,8 @@ struct item_value
/** Return the content of the item as a std::string_view */
constexpr inline std::string_view text() const
{
return { m_length >= kBufferSize ? m_data : m_local_data, m_length };
const char *ptr = m_length >= kBufferSize ? m_data : m_local_data;
return (m_length == 1 and *ptr == '?') ? std::string_view{} : std::string_view{ ptr, m_length };
}
};
@@ -558,7 +560,9 @@ struct item_handle::item_value_as<T, std::enable_if_t<std::is_arithmetic_v<T> an
auto b = txt.data();
auto e = txt.data() + txt.size();
std::from_chars_result r = (b + 1 < e and *b == '+' and std::isdigit(b[1])) ? selected_charconv<value_type>::from_chars(b + 1, e, result) : selected_charconv<value_type>::from_chars(b, e, result);
std::from_chars_result r = (b + 1 < e and *b == '+' and std::isdigit(b[1])) //
? from_chars(b + 1, e, result)
: from_chars(b, e, result);
if ((bool)r.ec or r.ptr != e)
{
@@ -593,7 +597,9 @@ struct item_handle::item_value_as<T, std::enable_if_t<std::is_arithmetic_v<T> an
auto b = txt.data();
auto e = txt.data() + txt.size();
std::from_chars_result r = (b + 1 < e and *b == '+' and std::isdigit(b[1])) ? selected_charconv<value_type>::from_chars(b + 1, e, v) : selected_charconv<value_type>::from_chars(b, e, v);
std::from_chars_result r = (b + 1 < e and *b == '+' and std::isdigit(b[1]))
? from_chars(b + 1, e, v)
: from_chars(b, e, v);
if ((bool)r.ec or r.ptr != e)
{

View File

@@ -26,9 +26,12 @@
#pragma once
#include "cif++/condition.hpp"
#include "cif++/row.hpp"
#include <array>
#include <cstdint>
#include <numeric>
/**
* @file iterator.hpp
@@ -262,6 +265,11 @@ class iterator_impl<Category>
return m_current;
}
int64_t row_id() const
{
return reinterpret_cast<int64_t>(m_current.m_row);
}
iterator_impl &operator++()
{
if (m_current)
@@ -489,6 +497,9 @@ class iterator_proxy
std::swap(m_item_ix, rhs.m_item_ix);
}
protected:
iterator_proxy(category_type &cat);
private:
category_type *m_category;
row_iterator m_begin, m_end;
@@ -530,6 +541,7 @@ class conditional_iterator_proxy
using pointer = value_type *;
using reference = value_type;
conditional_iterator_impl() = default;
conditional_iterator_impl(CategoryType &cat, row_iterator pos, const condition &cond, const std::array<uint16_t, N> &cix);
conditional_iterator_impl(const conditional_iterator_impl &i) = default;
conditional_iterator_impl &operator=(const conditional_iterator_impl &i) = default;
@@ -649,6 +661,15 @@ iterator_proxy<Category, Ts...>::iterator_proxy(Category &cat, row_iterator pos,
m_item_ix[i++] = m_category->get_item_ix(item);
}
template <typename Category, typename... Ts>
iterator_proxy<Category, Ts...>::iterator_proxy(Category &cat)
: m_category(&cat)
, m_begin(cat.begin())
, m_end(cat.end())
{
std::iota(m_item_ix.begin(), m_item_ix.end(), 0);
}
// --------------------------------------------------------------------
template <typename Category, typename... Ts>
@@ -661,6 +682,8 @@ conditional_iterator_proxy<Category, Ts...>::conditional_iterator_impl::conditio
{
if (m_condition == nullptr or m_condition->empty())
m_begin = m_end;
else
m_current = *m_begin;
}
template <typename Category, typename... Ts>

View File

@@ -124,6 +124,23 @@ class matrix_expression
return os;
}
template <typename M2>
constexpr bool operator==(const matrix_expression<M2> &m) const
{
bool same = false;
if (dim_m() == m.dim_m() and dim_n() == m.dim_n())
{
same = true;
for (std::size_t i = 0; same and i < m.dim_m(); ++i)
{
for (std::size_t j = 0; same and j < m.dim_n(); ++j)
same = operator()(i, j) == m(i, j);
}
}
return same;
}
};
// --------------------------------------------------------------------
@@ -594,6 +611,35 @@ auto operator*(const matrix_expression<M1> &m1, const matrix_expression<M2> &m2)
// --------------------------------------------------------------------
template <typename M2>
class sub_matrix : public matrix_expression<sub_matrix<M2>>
{
public:
sub_matrix(const M2 &m, int i, int j)
: m_m(m)
, m_i(i)
, m_j(j)
{
}
constexpr std::size_t dim_m() const { return m_m.dim_m() - 1; } ///< Return dimension m
constexpr std::size_t dim_n() const { return m_m.dim_n() - 1; } ///< Return dimension n
/** Access to the value of element [ @a i, @a j ] */
constexpr auto operator()(std::size_t i, std::size_t j) const
{
return m_m(
i >= m_i ? i + 1 : i,
j >= m_j ? j + 1 : j);
}
private:
const M2 &m_m;
std::size_t m_i, m_j;
};
// --------------------------------------------------------------------
/** Generic routine to calculate the determinant of a matrix
*
* @note This is currently only implemented for fixed matrices of size 3x3
@@ -605,11 +651,23 @@ auto determinant(const M &m);
template <typename F = float>
auto determinant(const matrix3x3<F> &m)
{
return (m(0, 0) * (m(1, 1) * m(2, 2) - m(1, 2) * m(2, 1)) +
m(0, 1) * (m(1, 2) * m(2, 0) - m(1, 0) * m(2, 2)) +
m(0, 2) * (m(1, 0) * m(2, 1) - m(1, 1) * m(2, 0)));
return (m(0, 0) * ((m(1, 1) * m(2, 2) - m(1, 2) * m(2, 1))) +
m(0, 1) * ((m(1, 2) * m(2, 0) - m(1, 0) * m(2, 2))) +
m(0, 2) * ((m(1, 0) * m(2, 1) - m(1, 1) * m(2, 0))));
}
/** Implementation of the determinant function for fixed size matrices of size 4x4 */
template <typename F = float>
F determinant(const matrix4x4<F> &m)
{
return m(0, 0) * determinant(matrix3x3<F>(sub_matrix<decltype(m)>(m, 0, 0))) -
m(0, 1) * determinant(matrix3x3<F>(sub_matrix<decltype(m)>(m, 0, 1))) +
m(0, 2) * determinant(matrix3x3<F>(sub_matrix<decltype(m)>(m, 0, 2))) -
m(0, 3) * determinant(matrix3x3<F>(sub_matrix<decltype(m)>(m, 0, 3)));
}
// --------------------------------------------------------------------
/** Generic routine to calculate the inverse of a matrix
*
* @note This is currently only implemented for fixed matrices of size 3x3

View File

@@ -29,6 +29,7 @@
#include "cif++/atom_type.hpp"
#include "cif++/datablock.hpp"
#include "cif++/point.hpp"
#include "cif++/row.hpp"
#include <memory>
#include <numeric>
@@ -134,14 +135,20 @@ class atom
row_handle row_aniso()
{
row_handle result{};
auto cat = m_db.get("atom_site_anisotrop");
return cat ? cat->operator[]({ { "id", m_id } }) : row_handle{};
if (cat)
result = cat->operator[]({ { "id", m_id } });
return result;
}
const row_handle row_aniso() const
{
row_handle result{};
auto cat = m_db.get("atom_site_anisotrop");
return cat ? cat->operator[]({ { "id", m_id } }) : row_handle{};
if (cat)
result = cat->operator[]({ { "id", m_id } });
return result;
}
const datablock &m_db;
@@ -1059,12 +1066,30 @@ class structure
/// \return The newly create asym ID
std::string create_non_poly(const std::string &entity_id, std::vector<row_initializer> atoms);
/// \brief Create a new NonPolymer struct_asym for a compound of type \a compound_id, returns asym_id.
/// This method creates new atom records filled with info from the CCD compound info.
///
/// \param compound_id The compound ID of the new nonpoly
/// \param skip_hydrogen Do not create hydrogen atoms when true
/// \return The newly create asym ID
std::string create_non_poly(const std::string &compound_id, bool skip_hydrogen);
/// \brief Create a new water with atom constructed from info in \a atom_info
/// This method creates a new atom record filled with info from the info.
///
/// \param atom The set of item data containing the data for the atoms.
void create_water(row_initializer atom);
/// \brief Create a link, a struct_conn record for two atoms.
///
/// \param a1 Atom 1
/// \param a2 Atom 2
/// \param link_type The struct_conn_type ID for the link
/// \param role The pdbx_role field value
/// \return The ID of the struct_conn record created
std::string create_link(atom a1, atom a2, const std::string &link_type, const std::string &role);
/// \brief Create a new and empty (sugar) branch
branch &create_branch();

View File

@@ -149,6 +149,23 @@ void fixup_pdbx(file &pdbx_file, const validator &v);
bool reconstruct_pdbx(file &pdbx_file, const validator &v);
/** \brief This is an extension to cif::validator, use the logic in common
* PDBx files to see if the file is internally consistent.
*
* This function for now checks if the following categories are consistent:
*
* atom_site -> pdbx_poly_seq_scheme -> entity_poly_seq -> entity_poly -> entity
*
* Use the common \ref cif::VERBOSE flag to turn on diagnostic messages.
*
* This function throws a std::system_error in case of an error
*
* \param pdbx_file The input file
* \result Returns true if the file was valid and consistent
*/
bool is_valid_pdbx_file(const file &pdbx_file);
/** \brief This is an extension to cif::validator, use the logic in common
* PDBx files to see if the file is internally consistent.
*
@@ -165,8 +182,7 @@ bool reconstruct_pdbx(file &pdbx_file, const validator &v);
* \result Returns true if the file was valid and consistent
*/
bool is_valid_pdbx_file(const file &pdbx_file,
const validator &v = validator_factory::instance().get("mmcif_pdbx.dic"));
bool is_valid_pdbx_file(const file &pdbx_file, const validator &v);
/** \brief This is an extension to cif::validator, use the logic in common
* PDBx files to see if the file is internally consistent.

View File

@@ -1,33 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
/// \file cif2pdb.hpp
/// \deprecated This file is no longer used. Please use "cif++/pdb.hpp" instead
#warning "Use of this file is deprecated, please use "cif++/pdb.hpp"

View File

@@ -1,32 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
/// \file pdb2cif.hpp
/// \deprecated This file is no longer used. Please use "cif++/pdb.hpp" instead
#warning "Use of this file is deprecated, please use "cif++/pdb.hpp"

View File

@@ -1,32 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
/// \file tls.hpp
/// \deprecated This code has been moved to libpdb-redo
#warning "This code has been moved to libpdb-redo"

View File

@@ -30,7 +30,9 @@
#include <cmath>
#include <complex>
#include <cstdint>
#include <format>
#include <functional>
#include <optional>
#include <valarray>
#if __has_include(<clipper/core/coords.h>)
@@ -365,11 +367,18 @@ class quaternion_type
}
/// \brief test for all zero values
constexpr operator bool() const
constexpr explicit operator bool() const
{
return a != 0 or b != 0 or c != 0 or d != 0;
}
/// \brief for debugging e.g.
friend std::ostream &operator<<(std::ostream &os, const quaternion_type &rhs)
{
os << std::format("{{ a: {}, b: {}, c: {}, d: {} }}", rhs.a, rhs.b, rhs.c, rhs.d);
return os;
}
private:
value_type a, b, c, d;
};
@@ -743,6 +752,55 @@ inline constexpr auto cross_product(const point_type<F1> &a, const point_type<F2
a.m_x * b.m_y - b.m_x * a.m_y);
}
/// \brief return the squared norm of point @a p
template <typename F>
constexpr F norm_squared(const point_type<F> &p)
{
return p.m_x * p.m_x + p.m_y * p.m_y + p.m_z * p.m_z;
}
/// \brief return the norm of point @a p
template <typename F>
constexpr point_type<F> norm(const point_type<F> &p)
{
return std::sqrt(norm_squared(p));
}
/// \brief return the point where two lines intersect, or an empty value if they don't intersect at all
template <typename F>
std::optional<cif::point> line_line_intersection(const point_type<F> &p1,
const point_type<F> &p2, const point_type<F> &p3, const point_type<F> &p4)
{
auto p13 = p1 - p3;
auto p43 = p4 - p3;
if (std::abs(p43.m_x) < std::numeric_limits<F>::epsilon() and std::abs(p43.m_y) < std::numeric_limits<F>::epsilon() and std::abs(p43.m_z) < std::numeric_limits<F>::epsilon())
return {};
auto p21 = p2 - p1;
if (std::abs(p21.m_x) < std::numeric_limits<F>::epsilon() and std::abs(p21.m_y) < std::numeric_limits<F>::epsilon() and std::abs(p21.m_z) < std::numeric_limits<F>::epsilon())
return {};
auto d1343 = cif::dot_product(p43, p13);
auto d4321 = cif::dot_product(p43, p21);
auto d1321 = cif::dot_product(p13, p21);
auto d4343 = cif::dot_product(p43, p43);
auto d2121 = cif::dot_product(p21, p21);
auto denom = d2121 * d4343 - d4321 * d4321;
if (std::abs(denom) < std::numeric_limits<F>::epsilon())
return {};
auto numer = d1343 * d4321 - d1321 * d4343;
auto mua = numer / denom;
auto mub = (d1343 + d4321 * mua) / d4343;
auto pa = p1 + mua * p21;
auto pb = p3 + mub * p43;
return { (pa + pb) / 2 };
}
/// \brief return the angle in degrees between the vectors from point @a p2 to @a p1 and @a p2 to @a p3
template <typename F>
constexpr auto angle(const point_type<F> &p1, const point_type<F> &p2, const point_type<F> &p3)
@@ -806,6 +864,9 @@ constexpr auto distance_point_to_line(const point_type<F> &l1, const point_type<
return cross.length() / line.length();
}
/// \brief return the smallest sphere around the points in @a pts
std::tuple<point, float> smallest_sphere_around_points(std::vector<point> pts);
// --------------------------------------------------------------------
/**
* @brief For e.g. simulated annealing, returns a new point that is moved in

View File

@@ -29,55 +29,61 @@
#include "cif++/item.hpp"
#include <array>
#include <cstdint>
/**
* @file row.hpp
*
*
* The class cif::row should be an opaque type. It is used to store the
* internal data per row in a category. You should use cif::row_handle
* to get access to the contents in a row.
*
*
* One could think of rows as vectors of cif::item. But internally
* that's not the case.
*
*
* You can access the values of stored items by name or index.
* The return value of operator[] is an cif::item_handle object.
*
*
* @code {.cpp}
* cif::category &atom_site = my_db["atom_site"];
* cif::row_handle rh = atom_site.front();
*
*
* // by name:
* std::string name = rh["label_atom_id"].as<std::string>();
*
*
* // by index:
* uint16_t ix = atom_site.get_item_ix("label_atom_id");
* assert(rh[ix].as<std::string() == name);
* @endcode
*
*
* There some template magic here to allow easy extracting of data
* from rows. This can be done using cif::tie e.g.:
*
*
* @code {.cpp}
* std::string name;
* float x, y, z;
*
*
* cif::tie(name, x, y, z) = rh.get("label_atom_id", "cartn_x", "cartn_y", "cartn_z");
* @endcode
*
*
* However, a more modern way uses structured binding:
*
*
* @code {.cpp}
* const auto &[name, x, y, z] = rh.get<std::string,float,float,float>("label_atom_id", "cartn_x", "cartn_y", "cartn_z");
* @endcode
*
*
*
*
*
*
*/
namespace cif
{
namespace cql
{
struct connection_impl;
}
namespace detail
{
@@ -141,7 +147,7 @@ namespace detail
} // namespace detail
/// \brief similar to std::tie, assign values to each element in @a v from the
/// \brief similar to std::tie, assign values to each element in @a v from the
/// result of a get on a row_handle.
template <typename... Ts>
auto tie(Ts &...v)
@@ -160,7 +166,7 @@ class row : public std::vector<item_value>
/**
* @brief Return the item_value pointer for item at index @a ix
*/
item_value* get(uint16_t ix)
item_value *get(uint16_t ix)
{
return ix < size() ? &data()[ix] : nullptr;
}
@@ -168,7 +174,7 @@ class row : public std::vector<item_value>
/**
* @brief Return the const item_value pointer for item at index @a ix
*/
const item_value* get(uint16_t ix) const
const item_value *get(uint16_t ix) const
{
return ix < size() ? &data()[ix] : nullptr;
}
@@ -184,7 +190,7 @@ class row : public std::vector<item_value>
{
if (ix >= size())
resize(ix + 1);
at(ix) = std::move(iv);
}
@@ -208,7 +214,8 @@ class row_handle
friend class category;
friend class category_index;
friend class row_initializer;
template <typename, typename...> friend class iterator_impl;
template <typename, typename...>
friend class iterator_impl;
row_handle() = default;
@@ -233,6 +240,12 @@ class row_handle
return *m_category;
}
/// \brief return the row ID
int64_t row_id() const
{
return reinterpret_cast<int64_t>(m_row);
}
/// \brief Return true if the row is empty or uninitialised
bool empty() const
{
@@ -299,19 +312,19 @@ class row_handle
}
/// \brief assign each of the items named in @a values to their respective value
void assign(const std::vector<item> &values)
void assign(const std::vector<item> &values, bool updateLinked = true)
{
for (auto &value : values)
assign(value, true);
assign(value, updateLinked);
}
/** \brief assign the value @a value to the item named @a name
*
/** \brief assign the value @a value to the item named @a name
*
* If updateLinked it true, linked records are updated as well.
* That means that if item @a name is part of the link definition
* and the link results in a linked record in another category
* this record in the linked category is updated as well.
*
*
* If validate is true, which is default, the assigned value is
* checked to see if it conforms to the rules defined in the dictionary
*/
@@ -322,12 +335,12 @@ class row_handle
}
/** \brief assign the value @a value to item at index @a item
*
*
* If updateLinked it true, linked records are updated as well.
* That means that if item @a item is part of the link definition
* and the link results in a linked record in another category
* this record in the linked category is updated as well.
*
*
* If validate is true, which is default, the assigned value is
* checked to see if it conforms to the rules defined in the dictionary
*/
@@ -346,6 +359,8 @@ class row_handle
uint16_t add_item(std::string_view name);
friend cql::connection_impl;
row *get_row()
{
return m_row;
@@ -371,7 +386,7 @@ class row_handle
/**
* @brief The class row_initializer is a list of cif::item's.
*
*
* This class is used to construct new rows, it allows to
* group a list of item name and value pairs and pass it
* in one go to the constructing function.
@@ -406,7 +421,6 @@ class row_initializer : public std::vector<item>
/// \brief constructor taking the values of an existing row
row_initializer(row_handle rh);
/// \brief set the value for item name @a name to @a value
void set_value(std::string_view name, std::string_view value);

View File

@@ -355,279 +355,35 @@ std::string cif_id_for_number(int number);
std::vector<std::string> word_wrap(const std::string &text, std::size_t width);
// --------------------------------------------------------------------
/// \brief std::from_chars for floating point types.
///
/// These are optional, there's a selected_charconv class below that selects
/// the best option to use based on support by the stl library.
///
/// I.e. that in case of GNU < 12 (or something) the cif implementation will
/// be used, all other cases will use the stl version.
template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
std::from_chars_result from_chars(const char *first, const char *last, FloatType &value)
{
std::from_chars_result result{ first, {} };
enum State
{
IntegerSign,
Integer,
Fraction,
ExponentSign,
Exponent
} state = IntegerSign;
int sign = 1;
unsigned long long vi = 0;
int fl = 0, tz = 0;
int exponent_sign = 1;
int exponent = 0;
bool done = false;
while (not done and not (bool)result.ec)
{
char ch = result.ptr != last ? *result.ptr : 0;
++result.ptr;
switch (state)
{
case IntegerSign:
if (ch == '-')
{
sign = -1;
state = Integer;
}
else if (ch == '+')
state = Integer;
else if (ch >= '0' and ch <= '9')
{
vi = ch - '0';
state = Integer;
}
else if (ch == '.')
state = Fraction;
else
result.ec = std::errc::invalid_argument;
break;
case Integer:
if (ch >= '0' and ch <= '9')
vi = 10 * vi + (ch - '0');
else if (ch == 'e' or ch == 'E')
state = ExponentSign;
else if (ch == '.')
state = Fraction;
else
{
done = true;
--result.ptr;
}
break;
case Fraction:
if (ch >= '0' and ch <= '9')
{
vi = 10 * vi + (ch - '0');
if (ch == '0')
tz += 1;
else
{
fl += tz + 1;
tz = 0;
}
}
else if (ch == 'e' or ch == 'E')
state = ExponentSign;
else
{
done = true;
--result.ptr;
}
break;
case ExponentSign:
if (ch == '-')
{
exponent_sign = -1;
state = Exponent;
}
else if (ch == '+')
state = Exponent;
else if (ch >= '0' and ch <= '9')
{
exponent = ch - '0';
state = Exponent;
}
else
result.ec = std::errc::invalid_argument;
break;
case Exponent:
if (ch >= '0' and ch <= '9')
exponent = 10 * exponent + (ch - '0');
else
{
done = true;
--result.ptr;
}
break;
}
}
if (not (bool)result.ec)
{
while (tz-- > 0)
vi /= 10;
long double v = std::pow(10, -fl) * vi * sign;
if (exponent != 0)
v *= std::pow(10, exponent * exponent_sign);
if (std::isnan(v))
result.ec = std::errc::invalid_argument;
else if (std::abs(v) > std::numeric_limits<FloatType>::max())
result.ec = std::errc::result_out_of_range;
value = static_cast<FloatType>(v);
}
return result;
}
/// \brief duplication of std::chars_format for deficient STL implementations
enum class chars_format
{
scientific = 1,
fixed = 2,
// hex,
general = fixed | scientific
};
/// \brief a simplistic implementation of std::to_chars for old STL implementations
template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
std::to_chars_result to_chars(char *first, char *last, FloatType &value, chars_format fmt)
{
int size = static_cast<int>(last - first);
int r = 0;
switch (fmt)
{
case chars_format::scientific:
if constexpr (std::is_same_v<FloatType, long double>)
r = snprintf(first, last - first, "%le", value);
else
r = snprintf(first, last - first, "%e", value);
break;
case chars_format::fixed:
if constexpr (std::is_same_v<FloatType, long double>)
r = snprintf(first, last - first, "%lf", value);
else
r = snprintf(first, last - first, "%f", value);
break;
case chars_format::general:
if constexpr (std::is_same_v<FloatType, long double>)
r = snprintf(first, last - first, "%lg", value);
else
r = snprintf(first, last - first, "%g", value);
break;
}
std::to_chars_result result;
if (r < 0 or r >= size)
result = { first, std::errc::value_too_large };
else
result = { first + r, std::errc() };
return result;
}
/// \brief a simplistic implementation of std::to_chars for old STL implementations
template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
std::to_chars_result to_chars(char *first, char *last, FloatType &value, chars_format fmt, int precision)
{
int size = static_cast<int>(last - first);
int r = 0;
switch (fmt)
{
case chars_format::scientific:
if constexpr (std::is_same_v<FloatType, long double>)
r = snprintf(first, last - first, "%.*le", precision, value);
else
r = snprintf(first, last - first, "%.*e", precision, value);
break;
case chars_format::fixed:
if constexpr (std::is_same_v<FloatType, long double>)
r = snprintf(first, last - first, "%.*lf", precision, value);
else
r = snprintf(first, last - first, "%.*f", precision, value);
break;
case chars_format::general:
if constexpr (std::is_same_v<FloatType, long double>)
r = snprintf(first, last - first, "%.*lg", precision, value);
else
r = snprintf(first, last - first, "%.*g", precision, value);
break;
}
std::to_chars_result result;
if (r < 0 or r >= size)
result = { first, std::errc::value_too_large };
else
result = { first + r, std::errc() };
return result;
}
/// \brief class that uses our implementation of std::from_chars and std::to_chars
template <typename T>
struct my_charconv
{
/// @brief Simply call our version of std::from_chars
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
{
return cif::from_chars(a, b, d);
}
using from_chars_function = decltype(std::from_chars(std::declval<const char *>(), std::declval<const char *>(), std::declval<T &>()));
/// @brief Simply call our version of std::to_chars
static std::to_chars_result to_chars(char *first, char *last, T &value, chars_format fmt)
{
return cif::to_chars(first, last, value, fmt);
}
};
/// \brief class that uses the STL implementation of std::from_chars and std::to_chars
template <typename T>
struct std_charconv
{
/// @brief Simply call std::from_chars
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
{
return std::from_chars(a, b, d);
}
/// @brief Simply call std::to_chars
static std::to_chars_result to_chars(char *first, char *last, T &value, chars_format fmt)
{
return std::to_chars(first, last, value, fmt);
}
};
/// \brief helper to find a from_chars function
template <typename T>
using from_chars_function = decltype(std::from_chars(std::declval<const char *>(), std::declval<const char *>(), std::declval<T &>()));
template <typename T, typename = void>
struct ff_charconv;
/**
* @brief Helper to select the best implementation of charconv based on availability of the
* function in the std:: namespace
*
* @tparam T The type for which we want to find a from_chars/to_chars function
*/
template <typename T>
using selected_charconv = typename std::conditional_t<std_experimental::is_detected_v<from_chars_function, T>, std_charconv<T>, my_charconv<T>>;
struct ff_charconv<T, typename std::enable_if_t<std::is_floating_point_v<T>>>
{
static std::from_chars_result from_chars(const char *a, const char *b, T &v);
};
template <typename T>
using charconv = typename std::conditional_t<std_experimental::is_detected_v<from_chars_function, T>, std_charconv<T>, ff_charconv<T>>;
template <typename T>
constexpr auto from_chars(const char *s, const char *e, T &v)
{
return charconv<T>::from_chars(s, e, v);
}
} // namespace cif

View File

@@ -53,6 +53,7 @@
#pragma warning(disable : 4068) // unknown pragma
#pragma warning(disable : 4100) // unreferenced formal parameter
#pragma warning(disable : 4101) // unreferenced local variable
#pragma warning(disable : 4702) // unreachable code (too bad, this one. Happens in for loops)
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING 1
#endif
@@ -295,6 +296,11 @@ class progress_bar
*/
void message(const std::string &inMessage);
/**
* @brief Flush the progress bar to the output stream
*/
void flush();
private:
progress_bar(const progress_bar &) = delete;
progress_bar &operator=(const progress_bar &) = delete;

View File

@@ -30,7 +30,6 @@
#include "cif++/text.hpp"
#include <cassert>
#include <filesystem>
#include <list>
#include <mutex>
#include <optional>
@@ -343,11 +342,11 @@ struct item_validator
*/
struct category_validator
{
std::string m_name; ///< The name of the category
std::vector<std::string> m_keys; ///< The list of items that make up the key
cif::iset m_groups; ///< The category groups this category belongs to
cif::iset m_mandatory_items; ///< The mandatory items for this category
std::set<item_validator> m_item_validators; ///< The item validators for the items in this category
std::string m_name; ///< The name of the category
std::vector<std::string> m_keys; ///< The list of items that make up the key
cif::iset m_groups; ///< The category groups this category belongs to
cif::iset m_mandatory_items; ///< The mandatory items for this category
std::vector<item_validator> m_item_validators; ///< The item validators for the items in this category
/// @brief return true if this category sorts before @a rhs
bool operator<(const category_validator &rhs) const
@@ -520,10 +519,18 @@ class validator_factory
static validator_factory &instance();
/// @brief Return validator with info recorded in @a audit_conform
const validator &get(const category &audit_conform);
const validator *get(const category &audit_conform);
/// @brief Return the single-file validator with name @a dictionary_name
const validator &get(std::string_view dictionary_name);
/// and the dictionary name may be a set of dictionaries separated by comma
const validator *get(std::string_view dictionary_name);
/// @brief Return validator with info recorded in @a audit_conform
const validator &operator[](const category &audit_conform);
/// @brief Return the single-file validator with name @a dictionary_name
/// and the dictionary name may be a set of dictionaries separated by comma
const validator &operator[](std::string_view dictionary_name);
/// @brief Return true if the version @a found is equal or higher than @a expected for dictionary @a name
static bool check_version(std::string_view name, std::string_view expected, std::string_view found);
@@ -535,6 +542,21 @@ class validator_factory
return m_validators.emplace_back(std::move(v));
}
#if __cplusplus >= 202302L
/// @brief Return validator with info recorded in @a audit_conform
static validator &operator[](const category &audit_conform)
{
return instance()[audit_conform];
}
/// @brief Return the single-file validator with name @a dictionary_name
/// and the dictionary name may be a set of dictionaries separated by comma
static validator &operator[](std::string_view dict)
{
return instance()[dict];
}
#endif
private:
validator_factory() = default;

316
pcre2-simple/CMakeLists.txt Normal file
View File

@@ -0,0 +1,316 @@
# SPDX-License-Identifier: BSD-2-Clause
#
# Copyright (c) 2025 Maarten L. Hekkelman
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# A simplified wrapper CMakeLists.txt file for PCRE2
#
# This will generate an OBJECT library so it can be linked into another library
cmake_minimum_required(VERSION 3.25)
include(FetchContent)
project(pcre2s VERSION 1.0.0 LANGUAGES C CXX)
# The original code:
file(DOWNLOAD https://github.com/PCRE2Project/pcre2/releases/download/pcre2-10.46/pcre2-10.46.tar.gz
${CMAKE_CURRENT_BINARY_DIR}/pcre2-code.tgz
EXPECTED_HASH SHA256=8d28d7f2c3b970c3a4bf3776bcbb5adfc923183ce74bc8df1ebaad8c1985bd07)
file(ARCHIVE_EXTRACT INPUT ${CMAKE_CURRENT_BINARY_DIR}/pcre2-code.tgz
DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
set(PCRE2_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/pcre2-10.46)
set(PCRE2_MAJOR 10)
set(PCRE2_MINOR 46)
set(PCRE2_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}")
set(PCRE2_DATE "2024-06-09")
# Some needed configuration options
# option(PCRE2_BUILD_PCRE2_8 "Build 8 bit PCRE2 library" ON)
# option(PCRE2_BUILD_PCRE2_16 "Build 16 bit PCRE2 library" OFF)
# option(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF)
option(PCRE2_STATIC_PIC "Build the static library with the option position independent code enabled." OFF)
set(PCRE2_NEWLINE "LF" CACHE STRING "What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF, NUL)." FORCE)
set_property(CACHE PCRE2_NEWLINE PROPERTY STRINGS "CR" "LF" "CRLF" "ANY" "ANYCRLF" "NUL")
set(PCRE2_LINK_SIZE "2" CACHE STRING "Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
set_property(CACHE PCRE2_LINK_SIZE PROPERTY STRINGS "2" "3" "4")
set(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING "Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
set(PCRE2_HEAP_LIMIT "20000000" CACHE STRING "Default limit on heap memory (kibibytes). See HEAP_LIMIT in config.h.in for details.")
set(PCRE2_MAX_VARLOOKBEHIND "255" CACHE STRING "Default limit on variable lookbehinds.")
set(PCRE2_MATCH_LIMIT "10000000" CACHE STRING "Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
set(PCRE2_MATCH_LIMIT_DEPTH "MATCH_LIMIT" CACHE STRING "Default limit on internal depth of search. See MATCH_LIMIT_DEPTH in config.h.in for details.")
set(PCRE2GREP_BUFSIZE "20480" CACHE STRING "Buffer starting size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details.")
set(PCRE2GREP_MAX_BUFSIZE "1048576" CACHE STRING "Buffer maximum size parameter for pcre2grep. See PCRE2GREP_MAX_BUFSIZE in config.h.in for details.")
set(PCRE2_SUPPORT_JIT OFF CACHE BOOL "Enable support for Just-in-time compiling.")
if(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
set(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL "Enable SELinux compatible execmem allocator in JIT (experimental).")
else()
set(PCRE2_SUPPORT_JIT_SEALLOC IGNORE)
endif()
set(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL "Enable use of Just-in-time compiling in pcre2grep.")
set(PCRE2GREP_SUPPORT_CALLOUT ON CACHE BOOL "Enable callout string support in pcre2grep.")
set(PCRE2GREP_SUPPORT_CALLOUT_FORK ON CACHE BOOL "Enable callout string fork support in pcre2grep.")
set(PCRE2_SUPPORT_UNICODE ON CACHE BOOL "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.")
set(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL "ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
set(PCRE2_NEVER_BACKSLASH_C OFF CACHE BOOL "If ON, backslash-C (upper case C) is locked out.")
set(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL "Enable Valgrind support.")
if(MINGW)
option(NON_STANDARD_LIB_PREFIX "ON=Shared libraries built in mingw will be named pcre2.dll, etc., instead of libpcre2.dll, etc." OFF)
option(NON_STANDARD_LIB_SUFFIX "ON=Shared libraries built in mingw will be named libpcre2-0.dll, etc., instead of libpcre2.dll, etc." OFF)
endif()
#
set(NEWLINE_DEFAULT "")
if(PCRE2_NEWLINE STREQUAL "CR")
set(NEWLINE_DEFAULT "1")
elseif(PCRE2_NEWLINE STREQUAL "LF")
set(NEWLINE_DEFAULT "2")
elseif(PCRE2_NEWLINE STREQUAL "CRLF")
set(NEWLINE_DEFAULT "3")
elseif(PCRE2_NEWLINE STREQUAL "ANY")
set(NEWLINE_DEFAULT "4")
elseif(PCRE2_NEWLINE STREQUAL "ANYCRLF")
set(NEWLINE_DEFAULT "5")
elseif(PCRE2_NEWLINE STREQUAL "NUL")
set(NEWLINE_DEFAULT "6")
else()
message(FATAL_ERROR "The PCRE2_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\".")
endif()
# Some tests
include(CheckCSourceCompiles)
include(CheckFunctionExists)
include(CheckSymbolExists)
include(CheckIncludeFile)
check_include_file(assert.h HAVE_ASSERT_H)
check_include_file(dirent.h HAVE_DIRENT_H)
check_include_file(sys/stat.h HAVE_SYS_STAT_H)
check_include_file(sys/types.h HAVE_SYS_TYPES_H)
check_include_file(unistd.h HAVE_UNISTD_H)
check_include_file(windows.h HAVE_WINDOWS_H)
check_symbol_exists(bcopy "strings.h" HAVE_BCOPY)
check_symbol_exists(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE)
check_symbol_exists(memmove "string.h" HAVE_MEMMOVE)
check_symbol_exists(secure_getenv "stdlib.h" HAVE_SECURE_GETENV)
check_symbol_exists(strerror "string.h" HAVE_STRERROR)
check_c_source_compiles(
"int main(void) { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }"
HAVE_ATTRIBUTE_UNINITIALIZED
)
check_c_source_compiles(
[=[
extern __attribute__ ((visibility ("default"))) int f(void);
int main(void) { return f(); }
int f(void) { return 42; }
]=]
HAVE_VISIBILITY
)
if(HAVE_VISIBILITY)
set(PCRE2_EXPORT [=[__attribute__ ((visibility ("default")))]=])
else()
set(PCRE2_EXPORT)
endif()
check_c_source_compiles("int main(void) { __assume(1); return 0; }" HAVE_BUILTIN_ASSUME)
check_c_source_compiles(
[=[
#include <stddef.h>
int main(void) { int a,b; size_t m; __builtin_mul_overflow(a,b,&m); return 0; }
]=]
HAVE_BUILTIN_MUL_OVERFLOW
)
check_c_source_compiles(
"int main(int c, char *v[]) { if (c) __builtin_unreachable(); return (int)(*v[0]); }"
HAVE_BUILTIN_UNREACHABLE
)
# # Check whether Intel CET is enabled, and if so, adjust compiler flags. This
# # code was written by PH, trying to imitate the logic from the autotools
# # configuration.
# check_c_source_compiles(
# [=[
# #ifndef __CET__
# #error CET is not enabled
# #endif
# int main() { return 0; }
# ]=]
# INTEL_CET_ENABLED
# )
# if(INTEL_CET_ENABLED)
# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk")
# endif()
# Set up some dependencies first
configure_file(
${PCRE2_SOURCE_DIR}/src/pcre2_chartables.c.dist
${CMAKE_CURRENT_BINARY_DIR}/pcre2_chartables.c
COPYONLY
)
configure_file(
${PCRE2_SOURCE_DIR}/config-cmake.h.in
${CMAKE_CURRENT_BINARY_DIR}/interface/config.h
@ONLY
)
configure_file(
${PCRE2_SOURCE_DIR}/src/pcre2.h.in
${CMAKE_CURRENT_BINARY_DIR}/interface/pcre2.h
@ONLY
)
# Define our library
list(APPEND PCRE2_HEADERS
${CMAKE_CURRENT_BINARY_DIR}/interface/pcre2.h)
list(APPEND PCRE2_SOURCES
${PCRE2_SOURCE_DIR}/src/pcre2_auto_possess.c
${CMAKE_CURRENT_BINARY_DIR}/pcre2_chartables.c
${PCRE2_SOURCE_DIR}/src/pcre2_chkdint.c
${PCRE2_SOURCE_DIR}/src/pcre2_compile.c
${PCRE2_SOURCE_DIR}/src/pcre2_compile_class.c
${PCRE2_SOURCE_DIR}/src/pcre2_config.c
${PCRE2_SOURCE_DIR}/src/pcre2_context.c
${PCRE2_SOURCE_DIR}/src/pcre2_convert.c
${PCRE2_SOURCE_DIR}/src/pcre2_dfa_match.c
${PCRE2_SOURCE_DIR}/src/pcre2_error.c
${PCRE2_SOURCE_DIR}/src/pcre2_extuni.c
${PCRE2_SOURCE_DIR}/src/pcre2_find_bracket.c
${PCRE2_SOURCE_DIR}/src/pcre2_jit_compile.c
${PCRE2_SOURCE_DIR}/src/pcre2_maketables.c
${PCRE2_SOURCE_DIR}/src/pcre2_match.c
${PCRE2_SOURCE_DIR}/src/pcre2_match_data.c
${PCRE2_SOURCE_DIR}/src/pcre2_newline.c
${PCRE2_SOURCE_DIR}/src/pcre2_ord2utf.c
${PCRE2_SOURCE_DIR}/src/pcre2_pattern_info.c
${PCRE2_SOURCE_DIR}/src/pcre2_script_run.c
${PCRE2_SOURCE_DIR}/src/pcre2_serialize.c
${PCRE2_SOURCE_DIR}/src/pcre2_string_utils.c
${PCRE2_SOURCE_DIR}/src/pcre2_study.c
${PCRE2_SOURCE_DIR}/src/pcre2_substitute.c
${PCRE2_SOURCE_DIR}/src/pcre2_substring.c
${PCRE2_SOURCE_DIR}/src/pcre2_tables.c
${PCRE2_SOURCE_DIR}/src/pcre2_ucd.c
${PCRE2_SOURCE_DIR}/src/pcre2_valid_utf.c
${PCRE2_SOURCE_DIR}/src/pcre2_xclass.c
)
add_library(pcre2s OBJECT)
target_sources(pcre2s
PRIVATE ${PCRE2_SOURCES}
PUBLIC
FILE_SET pcre2_headers TYPE HEADERS
BASE_DIRS ${PCRE2_SOURCE_DIR}/include ${CMAKE_CURRENT_BINARY_DIR}/interface
FILES ${PCRE2_HEADERS}
)
target_compile_definitions(pcre2s PUBLIC PCRE2_CODE_UNIT_WIDTH=8 HAVE_CONFIG_H)
if(NOT BUILD_SHARED_LIBS)
target_compile_definitions(pcre2s PUBLIC PCRE2_STATIC)
endif()
target_include_directories(pcre2s PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/interface ${PCRE2_SOURCE_DIR}/src)
if(PCRE2_STATIC_PIC)
set_target_properties(pcre2s PROPERTIES POSITION_INDEPENDENT_CODE 1)
endif()
# # Installation and config files
# include(CMakePackageConfigHelpers)
# include(GenerateExportHeader)
# # Install rules
# install(TARGETS pcre2s
# EXPORT pcre2s
# FILE_SET pcre2_headers DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
# if(MSVC AND BUILD_SHARED_LIBS)
# install(
# FILES $<TARGET_PDB_FILE:pcre2s>
# DESTINATION ${CMAKE_INSTALL_LIBDIR}
# OPTIONAL)
# endif()
# install(EXPORT pcre2s
# NAMESPACE pcre2s::
# FILE "pcre2s-targets.cmake"
# DESTINATION lib/cmake/pcre2s)
# configure_package_config_file(
# ${CMAKE_CURRENT_SOURCE_DIR}/pcre2s-config.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/pcre2s/pcre2s-config.cmake
# INSTALL_DESTINATION lib/cmake/pcre2s)
# install(
# FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2s/pcre2s-config.cmake"
# "${CMAKE_CURRENT_BINARY_DIR}/pcre2s/pcre2s-config-version.cmake"
# DESTINATION lib/cmake/pcre2s)
# set_target_properties(
# pcre2s
# PROPERTIES VERSION ${PCRE2_VERSION}
# SOVERSION ${PCRE2_VERSION}
# INTERFACE_pcre2s_MAJOR_VERSION ${PCRE2_MAJOR})
# set_property(
# TARGET pcre2s
# APPEND
# PROPERTY COMPATIBLE_INTERFACE_STRING pcre2s_MAJOR_VERSION)
# write_basic_package_version_file(
# "${CMAKE_CURRENT_BINARY_DIR}/pcre2s/pcre2s-config-version.cmake"
# VERSION "${PCRE2_VERSION}"
# COMPATIBILITY AnyNewerVersion)
# # Testing
# if(PROJECT_IS_TOP_LEVEL)
# include(CTest)
# if(BUILD_TESTING)
# add_subdirectory(test)
# endif()
# endif()

2080
sql-92.bnf Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -25,6 +25,7 @@
*/
#include "cif++/category.hpp"
#include "cif++/datablock.hpp"
#include "cif++/parser.hpp"
#include "cif++/utilities.hpp"
@@ -32,6 +33,8 @@
#include <numeric>
#include <stack>
#include <utility>
#include <vector>
// TODO: Find out what the rules are exactly for linked items, the current implementation
// is inconsistent. It all depends whether a link is satified if a item taking part in the
@@ -92,7 +95,7 @@ class row_comparator
return d;
}
int operator()(const category &cat, const row_initializer &a, const row *b) const
int operator()(const category &cat, const category::key_type &a, const row *b) const
{
assert(b);
@@ -105,10 +108,11 @@ class row_comparator
{
assert(ai != a.end());
std::string_view ka = ai->value();
std::string_view ka = ai->value;
std::string_view kb = rhb[k].text();
d = f(ka, kb);
if (not(ai->may_be_null and rhb[k].empty()))
d = f(ka, kb);
if (d != 0)
break;
@@ -142,7 +146,7 @@ class category_index
}
row *find(const category &cat, row *k) const;
row *find_by_value(const category &cat, row_initializer k) const;
row *find_by_value(const category &cat, const category::key_type &k) const;
void insert(category &cat, row *r);
void erase(category &cat, row *r);
@@ -352,19 +356,19 @@ row *category_index::find(const category &cat, row *k) const
return r ? r->m_row : nullptr;
}
row *category_index::find_by_value(const category &cat, row_initializer k) const
row *category_index::find_by_value(const category &cat, const category::key_type &k) const
{
// sort the values in k first
row_initializer k2;
category::key_type k2;
for (auto &f : cat.key_item_indices())
{
auto fld = cat.get_item_name(f);
auto ki = find_if(k.begin(), k.end(), [&fld](auto &i)
{ return i.name() == fld; });
{ return i.name == fld; });
if (ki == k.end())
k2.emplace_back(fld, "");
k2.emplace_back(std::string{ fld }, "");
else
k2.emplace_back(*ki);
}
@@ -534,6 +538,7 @@ void swap(category &a, category &b) noexcept
std::swap(a.m_index, b.m_index);
std::swap(a.m_head, b.m_head);
std::swap(a.m_tail, b.m_tail);
std::swap(a.m_dirty, b.m_dirty);
}
category::~category()
@@ -605,6 +610,30 @@ void category::remove_item(std::string_view item_name)
}
}
void category::drop_empty_items()
{
std::vector<bool> is_empty(m_items.size(), true);
for (auto &row : *this)
{
for (size_t ix = 0; ix < m_items.size(); ++ix)
{
if (is_empty[ix] and not row[ix].empty())
is_empty[ix] = false;
}
}
std::vector<std::string> items;
for (size_t ix = 0; ix < m_items.size(); ++ix)
{
if (is_empty[ix])
items.push_back(m_items[ix].m_name);
}
for (auto &item : items)
remove_item(item);
}
void category::rename_item(std::string_view from_name, std::string_view to_name)
{
for (std::size_t ix = 0; ix < m_items.size(); ++ix)
@@ -619,12 +648,12 @@ void category::rename_item(std::string_view from_name, std::string_view to_name)
}
}
iset category::get_items() const
std::vector<std::string> category::get_items() const
{
iset result;
std::vector<std::string> result;
for (auto &col : m_items)
result.insert(col.m_name);
result.emplace_back(col.m_name);
return result;
}
@@ -1247,6 +1276,7 @@ void category::clear()
delete m_index;
m_index = nullptr;
m_dirty = true;
}
void category::erase_orphans(condition &&cond, category &parent)
@@ -1338,8 +1368,7 @@ std::string category::get_unique_value(std::string_view item_name)
// brain-dead implementation
for (std::size_t ix = 0; ix < size(); ++ix)
{
// result = m_name + "-" + std::to_string(ix);
result = cif_id_for_number(ix);
result = cif_id_for_number(static_cast<int>(ix));
if (not contains(key(item_name) == result))
break;
}
@@ -1496,6 +1525,8 @@ void category::update_value(row *row, uint16_t item, std::string_view value, boo
if (value == oldValue) // no need to update
return;
m_dirty = true;
std::string oldStrValue{ oldValue };
// check the value
@@ -1637,6 +1668,8 @@ void category::delete_row(row *r)
row_allocator_type ra(get_allocator());
row_allocator_traits::destroy(ra, r);
row_allocator_traits::deallocate(ra, r, 1);
m_dirty = true;
}
}
@@ -1737,6 +1770,8 @@ category::iterator category::insert_impl(const_iterator pos, row *n)
n = n->m_next = m_head->m_next;
}
m_dirty = true;
return iterator(*this, n);
}
catch (const std::exception &e)
@@ -1759,6 +1794,8 @@ void category::swap_item(uint16_t item_ix, row_handle &a, row_handle &b)
auto &ra = *a.m_row;
auto &rb = *b.m_row;
m_dirty = true;
while (ra.size() <= item_ix)
ra.emplace_back("");
@@ -1773,6 +1810,8 @@ void category::sort(std::function<int(row_handle, row_handle)> f)
if (m_head == nullptr)
return;
m_dirty = true;
std::vector<row_handle> rows;
for (auto itemRow = m_head; itemRow != nullptr; itemRow = itemRow->m_next)
rows.emplace_back(*this, *itemRow);
@@ -1912,10 +1951,10 @@ void category::write(std::ostream &os) const
{
std::vector<uint16_t> order(m_items.size());
iota(order.begin(), order.end(), static_cast<uint16_t>(0));
write(os, order, false);
write_cif(os, order, false);
}
void category::write(std::ostream &os, const std::vector<std::string> &items, bool addMissingItems)
void category::write(std::ostream &os, output_format fmt, const std::vector<std::string> &items, bool addMissingItems)
{
// make sure all items are present
for (auto &c : items)
@@ -1936,10 +1975,43 @@ void category::write(std::ostream &os, const std::vector<std::string> &items, bo
}
}
write(os, order, true);
switch (fmt)
{
case output_format::cif:
write_cif(os, order, addMissingItems);
break;
case output_format::csv:
write_delimited(os, order, addMissingItems, ",", false, true);
break;
case output_format::tsv:
write_delimited(os, order, addMissingItems, "\t", false, true);
break;
case output_format::list:
write_delimited(os, order, addMissingItems, "|", false, false);
break;
case output_format::column:
write_delimited(os, order, addMissingItems, " ", true, true);
break;
case output_format::markdown:
write_markdown(os, order, addMissingItems);
break;
case output_format::table:
write_table(os, order, addMissingItems, true);
break;
case output_format::box:
write_table(os, order, addMissingItems, false);
break;
}
}
void category::write(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyItems) const
void category::write_cif(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyItems) const
{
if (empty())
return;
@@ -2112,6 +2184,423 @@ void category::write(std::ostream &os, const std::vector<uint16_t> &order, bool
os << "# \n";
}
void category::write_delimited(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyItems,
std::string_view delimiter, bool aligned, bool header) const
{
if (empty())
return;
std::vector<bool> right_aligned(m_items.size(), false);
if (aligned and m_cat_validator != nullptr)
{
for (auto cix : order)
{
auto &col = m_items[cix];
right_aligned[cix] = col.m_validator != nullptr and
col.m_validator->m_type != nullptr and
col.m_validator->m_type->m_primitive_type == cif::DDL_PrimitiveType::Numb;
}
}
std::vector<std::size_t> itemWidths(m_items.size());
auto get_line = [delimiter](std::string_view s) -> std::string
{
if (delimiter == ",")
{
if (s.find_first_of("\",") == std::string::npos)
return std::string{ s };
std::string r{ '"' };
r.reserve(s.length() + 2);
for (auto ch : s)
{
if (ch == '"')
r.append("\"\"");
else
r.push_back(ch);
}
r.push_back('"');
return r;
}
else if (delimiter == "\t")
{
std::string r;
r.reserve(s.length());
for (auto ch : s)
{
if (ch == '\r' or ch == '\n' or ch == '\t' or ch == '\\')
r.push_back('\\');
r.push_back(ch);
}
return r;
}
else if (delimiter == "|" or delimiter == " ")
return std::string{ s };
else
{
assert(false);
return std::string{ s };
}
};
if (aligned)
{
if (header)
{
for (auto cix : order)
{
auto &col = m_items[cix];
itemWidths[cix] = col.m_name.length();
}
}
for (auto r = m_head; r != nullptr; r = r->m_next)
{
for (uint16_t ix = 0; ix < r->size(); ++ix)
{
auto v = r->get(ix);
if (v == nullptr)
continue;
size_t l = get_line(v->text()).length();
if (itemWidths[ix] < l)
itemWidths[ix] = l;
}
}
}
if (header)
{
for (bool first = true; uint16_t cix : order)
{
if (not std::exchange(first, false))
os << delimiter;
std::size_t w = itemWidths[cix];
std::string_view s = m_items[cix].m_name;
if (s.length() < w)
{
if (delimiter == " ")
{
int l = (w - s.length()) / 2;
int r = w - s.length() - l;
os << std::string(l, ' ') << s << std::string(r, ' ');
}
else
{
if (right_aligned[cix])
os << std::string(w - s.length(), ' ');
os << s;
if (not right_aligned[cix])
os << std::string(w - s.length(), ' ');
}
}
else
os << s;
}
os << '\n';
if (delimiter == " ")
{
for (bool first = true; uint16_t cix : order)
{
if (not std::exchange(first, false))
os << delimiter;
os << std::string(itemWidths[cix], '-');
}
os << '\n';
}
}
for (auto r = m_head; r != nullptr; r = r->m_next) // loop over rows
{
for (bool first = true; uint16_t cix : order)
{
if (not std::exchange(first, false))
os << delimiter;
std::size_t w = itemWidths[cix];
std::string_view s;
auto iv = r->get(cix);
if (iv != nullptr)
s = iv->text();
if (s == "?" or s == ".")
s = "";
if (s.length() < w)
{
if (right_aligned[cix])
os << std::string(w - s.length(), ' ');
os << s;
if (not right_aligned[cix])
os << std::string(w - s.length(), ' ');
}
else
os << s;
}
os << '\n';
}
}
void category::write_markdown(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyItems) const
{
if (empty())
return;
std::vector<bool> right_aligned(m_items.size(), false);
if (m_cat_validator != nullptr)
{
for (auto cix : order)
{
auto &col = m_items[cix];
right_aligned[cix] = col.m_validator != nullptr and
col.m_validator->m_type != nullptr and
col.m_validator->m_type->m_primitive_type == cif::DDL_PrimitiveType::Numb;
}
}
std::vector<std::size_t> itemWidths(m_items.size());
for (auto cix : order)
{
auto &col = m_items[cix];
itemWidths[cix] = col.m_name.length();
}
for (auto r = m_head; r != nullptr; r = r->m_next)
{
for (uint16_t ix = 0; ix < r->size(); ++ix)
{
auto v = r->get(ix);
if (v == nullptr)
continue;
size_t l = v->text().length();
if (itemWidths[ix] < l)
itemWidths[ix] = l;
}
}
os << "| ";
for (bool first = true; uint16_t cix : order)
{
if (not std::exchange(first, false))
os << " | ";
std::size_t w = itemWidths[cix];
std::string_view s = m_items[cix].m_name;
if (s.length() < w)
{
int l = (w - s.length()) / 2;
int r = w - s.length() - l;
os << std::string(l, ' ') << s << std::string(r, ' ');
}
else
os << s;
}
os << " |\n";
os << "| ";
for (bool first = true; uint16_t cix : order)
{
if (not std::exchange(first, false))
os << " | ";
if (not right_aligned[cix])
os << ':';
os << std::string(itemWidths[cix] - 1, '-');
if (right_aligned[cix])
os << ':';
}
os << " |\n";
for (auto r = m_head; r != nullptr; r = r->m_next) // loop over rows
{
os << "| ";
for (bool first = true; uint16_t cix : order)
{
if (not std::exchange(first, false))
os << " | ";
std::size_t w = itemWidths[cix];
std::string_view s;
auto iv = r->get(cix);
if (iv != nullptr)
s = iv->text();
if (s == "?" or s == ".")
s = "";
if (s.length() < w)
{
if (right_aligned[cix])
os << std::string(w - s.length(), ' ');
os << s;
if (not right_aligned[cix])
os << std::string(w - s.length(), ' ');
}
else
os << s;
}
os << " |\n";
}
}
void category::write_table(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyItems, bool ascii) const
{
static constexpr const std::string_view
kUnicodeBox[13] = {
"┌─", "─┬─", "─┐\n",
"├─", "─┼─", "─┤\n",
"└─", "─┴─", "─┘\n",
"", "", "\n",
""
},
kAsciiBox[13] = { //
"+-", "-+-", "-+\n", //
"+-", "-+-", "-+\n", //
"+-", "-+-", "-+\n", //
"| ", " | ", " |\n", //
"-"
};
if (empty())
return;
auto box = ascii ? kAsciiBox : kUnicodeBox;
std::vector<bool> right_aligned(m_items.size(), false);
if (m_cat_validator != nullptr)
{
for (auto cix : order)
{
auto &col = m_items[cix];
right_aligned[cix] = col.m_validator != nullptr and
col.m_validator->m_type != nullptr and
col.m_validator->m_type->m_primitive_type == cif::DDL_PrimitiveType::Numb;
}
}
std::vector<std::size_t> itemWidths(m_items.size());
for (auto cix : order)
{
auto &col = m_items[cix];
itemWidths[cix] = col.m_name.length();
}
for (auto r = m_head; r != nullptr; r = r->m_next)
{
for (uint16_t ix = 0; ix < r->size(); ++ix)
{
auto v = r->get(ix);
if (v == nullptr)
continue;
size_t l = v->text().length();
if (itemWidths[ix] < l)
itemWidths[ix] = l;
}
}
os << box[0];
for (bool first = true; uint16_t cix : order)
{
if (not std::exchange(first, false))
os << box[1];
for (size_t i = 0; i < itemWidths[cix]; ++i)
os << box[12];
}
os << box[2];
os << box[9];
for (bool first = true; uint16_t cix : order)
{
if (not std::exchange(first, false))
os << box[10];
std::size_t w = itemWidths[cix];
std::string_view s = m_items[cix].m_name;
if (s.length() < w)
{
int l = (w - s.length()) / 2;
int r = w - s.length() - l;
os << std::string(l, ' ') << s << std::string(r, ' ');
}
else
os << s;
}
os << box[11];
os << box[3];
for (bool first = true; uint16_t cix : order)
{
if (not std::exchange(first, false))
os << box[4];
for (size_t i = 0; i < itemWidths[cix]; ++i)
os << box[12];
}
os << box[5];
for (auto r = m_head; r != nullptr; r = r->m_next) // loop over rows
{
os << box[9];
for (bool first = true; uint16_t cix : order)
{
if (not std::exchange(first, false))
os << box[10];
std::size_t w = itemWidths[cix];
std::string_view s;
auto iv = r->get(cix);
if (iv != nullptr)
s = iv->text();
if (s == "?" or s == ".")
s = "";
if (s.length() < w)
{
if (right_aligned[cix])
os << std::string(w - s.length(), ' ');
os << s;
if (not right_aligned[cix])
os << std::string(w - s.length(), ' ');
}
else
os << s;
}
os << box[11];
}
os << box[6];
for (bool first = true; uint16_t cix : order)
{
if (not std::exchange(first, false))
os << box[7];
for (size_t i = 0; i < itemWidths[cix]; ++i)
os << box[12];
}
os << box[8];
}
bool category::operator==(const category &rhs) const
{
// shortcut

View File

@@ -24,18 +24,38 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cif++.hpp"
#include "cif++/compound.hpp" // for compound_atom, compound_bond, compoun...
#if HAVE_CURL
# include <curl/curl.h>
#endif
#include "cif++/atom_type.hpp" // for atom_type_traits
#include "cif++/category.hpp" // for category
#include "cif++/datablock.hpp" // for datablock
#include "cif++/file.hpp" // for file
#include "cif++/item.hpp" // for item
#include "cif++/iterator.hpp" // for iterator_proxy
#include "cif++/parser.hpp" // for parser
#include "cif++/point.hpp" // for distance, point
#include "cif++/row.hpp" // for tie, row_initializer, tie_wrap
#include "cif++/text.hpp" // for iequals, replace_all, iset
#include "cif++/utilities.hpp" // for load_resource, VERBOSE, colour_type
#include <filesystem>
#include <fstream>
#include <map>
#include <mutex>
#include <numeric>
#include <shared_mutex>
#include <algorithm> // for find_if
#include <cstddef> // for size_t
#include <exception> // for exception, throw_with_nested
#include <filesystem> // for path, exists
#include <fstream> // for char_traits, basic_ostream, operator<<
#include <iomanip> // for operator<<, quoted
#include <iostream> // for clog, cout, cerr
#include <limits> // for numeric_limits
#include <list> // for _List_iterator
#include <map> // for allocator, map, _Rb_tree_iterator
#include <memory> // for shared_ptr, unique_ptr, __shared_ptr_...
#include <optional> // for optional
#include <shared_mutex> // for shared_lock, shared_timed_mutex
#include <stdexcept> // for runtime_error, invalid_argument, out_...
#include <string> // for basic_string, string, operator==, ope...
#include <string_view> // for string_view, basic_string_view
#include <utility> // for pair, exchange, move
#include <vector> // for vector
namespace fs = std::filesystem;
@@ -298,6 +318,25 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto
delete c;
}
virtual bool exists_self(const std::string &id) const
{
if (m_missing.contains(id))
return false;
if (std::find_if(m_compounds.begin(), m_compounds.end(), [id](compound *c)
{ return c->id() == id; }) != m_compounds.end())
return true;
return m_next and m_next->exists_self(id);
}
bool exists(std::string_view id)
{
std::shared_lock lock(mMutex);
return exists_self(std::string{ id });
}
compound *get(std::string id)
{
std::shared_lock lock(mMutex);
@@ -487,10 +526,13 @@ compound *local_compound_factory_impl::create(const std::string &id)
try
{
const auto &[id, name, threeLetterCode, group] =
const auto &[id2, name, threeLetterCode, group] =
chem_comp->front().get<std::string, std::string, std::string, std::string>("id", "name", "three_letter_code", "group");
result = construct_compound(db, id, name, threeLetterCode, group);
if (id == id2)
result = construct_compound(db, id, name, threeLetterCode, group);
else
throw std::runtime_error("Compound ID's don't match: id 1=" + id + ", id 2=" + id2);
}
catch (const std::exception &ex)
{
@@ -601,16 +643,11 @@ compound *local_compound_factory_impl::construct_compound(const datablock &rdb,
std::unique_ptr<compound_factory> compound_factory::s_instance;
thread_local std::unique_ptr<compound_factory> compound_factory::tl_instance;
compound_factory_options compound_factory::s_options;
bool compound_factory::s_use_thread_local_instance;
void compound_factory::init(bool useThreadLocalInstanceOnly)
{
init({ .use_thread_local_instance_only = useThreadLocalInstanceOnly });
}
void compound_factory::init(compound_factory_options options)
{
s_options = options;
s_use_thread_local_instance = useThreadLocalInstanceOnly;
}
compound_factory::compound_factory()
@@ -629,7 +666,7 @@ compound_factory::~compound_factory()
compound_factory &compound_factory::instance()
{
if (s_options.use_thread_local_instance_only)
if (s_use_thread_local_instance)
{
if (not tl_instance)
tl_instance.reset(new compound_factory());
@@ -645,7 +682,7 @@ compound_factory &compound_factory::instance()
void compound_factory::clear()
{
if (s_options.use_thread_local_instance_only)
if (s_use_thread_local_instance)
tl_instance.reset(nullptr);
else
s_instance.reset();
@@ -699,6 +736,11 @@ void compound_factory::pop_dictionary()
m_impl = m_impl->next();
}
bool compound_factory::exists(std::string_view id) const
{
return m_impl and m_impl->exists(id);
}
const compound *compound_factory::create(std::string_view id)
{
auto result = m_impl ? m_impl->get(std::string{ id }) : nullptr;
@@ -761,8 +803,7 @@ bool compound_factory::is_monomer(std::string_view res_name) const
void compound_factory::report_missing_compound(std::string_view compound_id)
{
static bool s_reported = false;
if (std::exchange(s_reported, true) == false)
if (std::exchange(m_report_missing, false))
{
using namespace cif::colour;
@@ -786,7 +827,7 @@ void compound_factory::report_missing_compound(std::string_view compound_id)
<< "in /var/cache/libcifpp using the following commands:\n\n"
<< "curl -o " << CACHE_DIR << "/components.cif https://files.wwpdb.org/pub/pdb/data/monomers/components.cif\n"
<< "curl -o " << CACHE_DIR << "/mmcif_pdbx.dic https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic\n"
<< "curl -o " << CACHE_DIR << "/mmcif_ma.dic https://github.com/ihmwg/ModelCIF/raw/master/dist/mmcif_ma.dic\n\n";
<< "curl -o " << CACHE_DIR << "/mmcif_ma.dic https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_ma.dic\n\n";
#endif
if (m_impl)

View File

@@ -24,8 +24,8 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cif++/category.hpp"
#include "cif++/condition.hpp"
#include "cif++/category.hpp"
#include "cif++/validate.hpp"
namespace cif
@@ -61,6 +61,52 @@ bool is_item_type_uchar(const category &cat, std::string_view col)
namespace detail
{
// // index lookup
// struct index_lookup_condition_impl : public condition_impl
// {
// index_lookup_condition_impl(row_initializer &&key_values)
// : m_key_values(std::move(key_values))
// {
// }
//
// condition_impl *prepare(const category &c) override
// {
// m_single_hit = c[m_key_values];
// return this;
// }
//
// bool test(row_handle r) const override
// {
// return m_single_hit == r;
// }
//
// void str(std::ostream &os) const override
// {
// os << "index scan";
// }
//
// virtual std::optional<row_handle> single() const override
// {
// return m_single_hit;
// }
//
// virtual bool equals(const condition_impl *rhs) const override
// {
// if (typeid(*rhs) == typeid(index_lookup_condition_impl))
// {
// auto ri = static_cast<const index_lookup_condition_impl *>(rhs);
// if (m_single_hit or ri->m_single_hit)
// return m_single_hit == ri->m_single_hit;
// else
// // watch out, both m_item_ix might be the same while item_names might be diffent (in case they both do not exist in the category)
// return m_key_values == ri->m_key_values;
// }
// return this == rhs;
// }
//
// row_initializer m_key_values;
// row_handle m_single_hit;
// };
condition_impl *key_equals_condition_impl::prepare(const category &c)
{
@@ -85,7 +131,8 @@ namespace detail
c.key_item_indices().contains(m_item_ix) and
c.key_item_indices().size() == 1)
{
m_single_hit = c[{ { m_item_name, m_value } }];
item v(m_item_name, m_value);
m_single_hit = c[{ { m_item_name, std::string{ v.value() }, false } }];
}
return this;
@@ -99,7 +146,8 @@ namespace detail
{
auto &cs = (*s)->m_sub;
if (find_if(cs.begin(), cs.end(), [c](const condition_impl *i) { return i->equals(c); }) == cs.end())
if (find_if(cs.begin(), cs.end(), [c](const condition_impl *i)
{ return i->equals(c); }) == cs.end())
{
result = false;
break;
@@ -119,7 +167,8 @@ namespace detail
for (size_t fc_i = 0; fc_i < fc.size();)
{
auto c = fc[fc_i];
if (not found_in_range(c, subs.begin() + 1, subs.end())) {
if (not found_in_range(c, subs.begin() + 1, subs.end()))
{
++fc_i;
continue;
}
@@ -137,11 +186,12 @@ namespace detail
for (size_t ssub_i = 0; ssub_i < ssub.size();)
{
auto sc = ssub[ssub_i];
if (not sc->equals(c)) {
if (not sc->equals(c))
{
++ssub_i;
continue;
}
ssub.erase(ssub.begin() + ssub_i);
delete sc;
break;
@@ -158,6 +208,99 @@ namespace detail
return oc;
}
condition_impl *and_condition_impl::prepare(const category &c)
{
for (auto &sub : m_sub)
sub = sub->prepare(c);
if (auto cv = c.get_cat_validator(); cv != nullptr)
{
// See if we can collapse a search part of this and_condition into a single index lookup
cif::iset keys{ cv->m_keys.begin(), cv->m_keys.end() };
category::key_type lookup;
std::vector<condition_impl *> subs;
std::vector<std::string> may_be_empty;
for (auto &sub : m_sub)
{
if (auto s = dynamic_cast<const key_equals_condition_impl *>(sub); s != nullptr)
{
if (keys.contains(s->m_item_name))
{
lookup.emplace_back(s->m_item_name, s->m_value);
subs.emplace_back(sub);
}
continue;
}
if (auto s = dynamic_cast<const key_equals_number_condition_impl *>(sub); s != nullptr)
{
if (keys.contains(s->m_item_name))
{
item v{ s->m_item_name, s->m_value };
lookup.emplace_back(s->m_item_name, std::string{ v.value() } );
subs.emplace_back(sub);
}
continue;
}
if (auto s = dynamic_cast<const key_equals_or_empty_condition_impl *>(sub); s != nullptr)
{
if (keys.contains(s->m_item_name))
{
lookup.emplace_back(s->m_item_name, s->m_value, true);
subs.emplace_back(sub);
may_be_empty.emplace_back(s->m_item_name);
}
continue;
}
if (auto s = dynamic_cast<const key_equals_number_or_empty_condition_impl *>(sub); s != nullptr)
{
if (keys.contains(s->m_item_name))
{
item v{ s->m_item_name, s->m_value };
lookup.emplace_back(s->m_item_name, std::string{ v.value() }, true );
subs.emplace_back(sub);
}
continue;
}
}
if (lookup.size() == keys.size())
{
m_single = c[lookup];
for (auto s : subs)
m_sub.erase(std::remove(m_sub.begin(), m_sub.end(), s), m_sub.end());
}
}
return this;
}
bool and_condition_impl::test(row_handle r) const
{
bool result = true;
if (m_single.has_value() and *m_single != r)
result = false;
else
{
for (auto sub : m_sub)
{
if (sub->test(r))
continue;
result = false;
break;
}
}
return result;
}
condition_impl *or_condition_impl::prepare(const category &c)
{
std::vector<and_condition_impl *> and_conditions;
@@ -181,7 +324,7 @@ void condition::prepare(const category &c)
{
if (m_impl)
m_impl = m_impl->prepare(c);
m_prepared = true;
}

1102
src/cql.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -25,8 +25,11 @@
*/
#include "cif++/datablock.hpp"
#include "cif++/validate.hpp"
#include <exception>
namespace cif
{
@@ -42,7 +45,28 @@ datablock::datablock(const datablock &db)
void datablock::load_dictionary()
{
if (auto *audit_conform = get("audit_conform"); audit_conform and not audit_conform->empty())
set_validator(&validator_factory::instance().get(*audit_conform));
{
try
{
set_validator(validator_factory::instance().get(*audit_conform));
}
catch (const std::exception &ex)
{
std::clog << ex.what() << '\n';
}
}
}
void datablock::load_dictionary(std::string_view dict)
{
try
{
set_validator(validator_factory::instance().get(dict));
}
catch (const std::exception &ex)
{
std::clog << ex.what() << '\n';
}
}
void datablock::set_validator(const validator *v)
@@ -96,7 +120,8 @@ bool datablock::strip()
bool result = true;
// remove all categories that have no validator
erase(std::remove_if(begin(), end(), [](category &c) {
erase(std::remove_if(begin(), end(), [](category &c)
{
bool result = false;
if (c.get_cat_validator() == nullptr)
{
@@ -104,8 +129,8 @@ bool datablock::strip()
std::clog << "Dropping category " << c.name() << '\n';
result = true;
}
return result;
}), end());
return result; }),
end());
// then strip the remaining categories
for (auto &cat : *this)

View File

@@ -28,6 +28,9 @@
#include "cif++/dictionary_parser.hpp"
#include "cif++/file.hpp"
#include "cif++/parser.hpp"
#include <exception>
#include <iomanip>
#include <stdexcept>
namespace cif
{
@@ -46,7 +49,7 @@ class dictionary_parser : public parser
void load_dictionary()
{
std::unique_ptr<datablock> dict;
auto savedDatablock = m_datablock;
auto savedDatablock = std::exchange(m_datablock, nullptr);
try
{
@@ -75,6 +78,9 @@ class dictionary_parser : public parser
error(ex.what());
}
if (m_datablock == nullptr)
throw std::runtime_error("Dictionary file is empty?");
// store all validators
for (auto &ic : mCategoryValidators)
m_validator.add_category_validator(std::move(ic));

View File

@@ -25,6 +25,7 @@
*/
#include "cif++/file.hpp"
#include "cif++/condition.hpp"
#include "cif++/gzio.hpp"
namespace cif
@@ -46,8 +47,16 @@ bool file::is_valid()
{
bool result = not empty();
for (auto &d : *this)
result = d.is_valid() and result;
for (bool first = true; auto &d : *this)
{
if (first)
{
result = d.is_valid() and result;
first = false;
}
else if (d.get_validator() != nullptr)
result = d.is_valid() and result;
}
if (result)
result = validate_links();

View File

@@ -24,13 +24,17 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cif++/model.hpp"
#include "cif++.hpp"
#include "cif++/point.hpp"
#include <filesystem>
#include <fstream>
#include <initializer_list>
#include <iomanip>
#include <numeric>
#include <stack>
#include <stdexcept>
namespace fs = std::filesystem;
@@ -47,15 +51,10 @@ void atom::atom_impl::moveTo(const point &p)
auto r = row();
#if __cpp_lib_format
r.assign("Cartn_x", std::format("{:.3f}", p.m_x), false, false);
r.assign("Cartn_y", std::format("{:.3f}", p.m_y), false, false);
r.assign("Cartn_z", std::format("{:.3f}", p.m_z), false, false);
#else
r.assign("Cartn_x", cif::format("%.3f", p.m_x).str(), false, false);
r.assign("Cartn_y", cif::format("%.3f", p.m_y).str(), false, false);
r.assign("Cartn_z", cif::format("%.3f", p.m_z).str(), false, false);
#endif
r.assign("Cartn_x", cif::format("{:.3f}", p.m_x), false, false);
r.assign("Cartn_y", cif::format("{:.3f}", p.m_y), false, false);
r.assign("Cartn_z", cif::format("{:.3f}", p.m_z), false, false);
m_location = p;
}
@@ -353,17 +352,7 @@ std::tuple<point, float> residue::center_and_radius() const
for (auto &a : m_atoms)
pts.push_back(a.get_location());
auto center = centroid(pts);
float radius = 0;
for (auto &pt : pts)
{
float d = static_cast<float>(distance(pt, center));
if (radius < d)
radius = d;
}
return std::make_tuple(center, radius);
return smallest_sphere_around_points(pts);
}
bool residue::has_alternate_atoms() const
@@ -1280,28 +1269,28 @@ void structure::load_atoms_for_model(structure_open_options options)
else
{
std::vector<cif::mm::atom> atoms;
std::map<std::tuple<std::string,int>, std::map<std::string, float>> alts;
std::map<std::tuple<std::string, int>, std::map<std::string, float>> alts;
for (auto id : atom_site.find<std::string>(std::move(c), "id"))
{
auto a = atoms.emplace_back(std::make_shared<atom::atom_impl>(m_db, id));
if (a.is_alternate())
{
auto key = std::make_tuple(a.get_label_asym_id(), a.get_label_seq_id());
auto alt_id = a.get_label_alt_id();
if (auto i = alts.find(key); i != alts.end())
i->second[alt_id] += a.get_occupancy();
else
alts[key][alt_id] = a.get_occupancy();
}
}
for (auto &&[key, value] : alts)
{
const auto &[asym_id, seq_id] = key;
// const auto &[asym_id, seq_id] = key;
// select highest occupancy for this residue's alternates
std::string alt_id;
float occupancy = options.occupancy_mode == occupancy_policy::MAX ? 0.f : std::numeric_limits<float>::max();
@@ -1324,11 +1313,11 @@ void structure::load_atoms_for_model(structure_open_options options)
}
}
}
value.clear();
value.emplace(alt_id, occupancy);
}
for (auto a : atoms)
{
if (a.is_alternate())
@@ -1340,10 +1329,8 @@ void structure::load_atoms_for_model(structure_open_options options)
}
else
emplace_atom(a);
}
}
}
void structure::load_data()
@@ -1903,13 +1890,12 @@ void structure::swap_atoms(atom a1, atom a2)
auto r1 = atomSites.find1(key("id") == a1.id());
auto r2 = atomSites.find1(key("id") == a2.id());
auto l1 = r1["label_atom_id"];
auto l2 = r2["label_atom_id"];
l1.swap(l2);
auto l3 = r1["auth_atom_id"];
auto l4 = r2["auth_atom_id"];
l3.swap(l4);
for (std::string fld : std::initializer_list<std::string>{ "label_atom_id", "auth_atom_id", "type_symbol" })
{
auto l1 = r1[fld];
auto l2 = r2[fld];
l1.swap(l2);
}
}
catch (const std::exception &ex)
{
@@ -2353,6 +2339,36 @@ std::string structure::create_non_poly(const std::string &entity_id, std::vector
return asym_id;
}
std::string structure::create_non_poly(const std::string &compound_id, bool skip_hydrogen)
{
auto compound = cif::compound_factory::instance().create(compound_id);
if (compound == nullptr)
throw std::runtime_error(std::format("{} is not a known compound", compound_id));
std::vector<cif::row_initializer> atoms;
for (auto a : compound->atoms())
{
// We skip H-atoms, as fitting without H-atoms works better and we avoid conflicts in protonation states between CCD and MONLIB
if (skip_hydrogen and cif::atom_type_traits(a.type_symbol).symbol() == "H")
continue;
auto ax = a.get_location().get_x();
auto ay = a.get_location().get_y();
auto az = a.get_location().get_z();
atoms.emplace_back(cif::row_initializer{
{ "type_symbol", cif::atom_type_traits(a.type_symbol).symbol() },
{ "label_atom_id", a.id },
{ "auth_atom_id", a.id },
{ "Cartn_x", ax },
{ "Cartn_y", ay },
{ "Cartn_z", az },
{ "B_iso_or_equiv", 30.00 } });
}
return create_non_poly(create_non_poly_entity(compound_id), atoms);
}
void structure::create_water(row_initializer atom)
{
using namespace literals;
@@ -2417,6 +2433,61 @@ void structure::create_water(row_initializer atom)
});
}
std::string structure::create_link(atom a1, atom a2, const std::string &link_type, const std::string &role)
{
using namespace literals;
auto &struct_conn = m_db["struct_conn"];
auto &struct_conn_type = m_db["struct_conn_type"];
// This will validate link_type :-)
if (not struct_conn_type.contains("id"_key == link_type))
struct_conn_type.emplace({ { "id", link_type } });
std::string link_id = struct_conn.get_unique_id(link_type + '_');
item label_seq_id_1("ptnr1_label_seq_id");
if (int nr = a1.get_label_seq_id(); nr != 0)
label_seq_id_1.value(std::to_string(nr));
item label_seq_id_2("ptnr2_label_seq_id");
if (int nr = a2.get_label_seq_id(); nr != 0)
label_seq_id_2.value(std::to_string(nr));
struct_conn.emplace(
{ //
{ "id", link_id },
{ "conn_type_id", link_type },
{ "pdbx_leaving_atom_flag", "one" },
{ "ptnr1_label_asym_id", a1.get_label_asym_id() },
{ "ptnr1_label_comp_id", a1.get_label_comp_id() },
label_seq_id_1,
{ "ptnr1_label_atom_id", a1.get_label_atom_id() },
{ "pdbx_ptnr1_label_alt_id", a1.get_label_alt_id() },
{ "pdbx_ptnr1_PDB_ins_code", a1.get_pdb_ins_code() },
{ "ptnr1_auth_asym_id", a1.get_auth_asym_id() },
{ "ptnr1_auth_comp_id", a1.get_auth_comp_id() },
{ "ptnr1_auth_seq_id", a1.get_auth_seq_id() },
{ "ptnr1_symmetry", a1.symmetry() },
{ "ptnr2_label_asym_id", a2.get_label_asym_id() },
{ "ptnr2_label_comp_id", a2.get_label_comp_id() },
label_seq_id_2,
{ "ptnr2_label_atom_id", a2.get_label_atom_id() },
{ "pdbx_ptnr2_label_alt_id", a2.get_label_alt_id() },
{ "pdbx_ptnr2_PDB_ins_code", a2.get_pdb_ins_code() },
{ "ptnr2_auth_asym_id", a2.get_auth_asym_id() },
{ "ptnr2_auth_comp_id", a2.get_auth_comp_id() },
{ "ptnr2_auth_seq_id", a2.get_auth_seq_id() },
{ "ptnr2_symmetry", a2.symmetry() },
{ "pdbx_dist_value", distance(a1.get_location(), a2.get_location()), 3 },
{ "pdbx_role", role } });
return link_id;
}
branch &structure::create_branch()
{
auto &entity = m_db["entity"];
@@ -2850,8 +2921,8 @@ static int compare_numbers(std::string_view a, std::string_view b)
std::from_chars_result ra, rb;
ra = selected_charconv<double>::from_chars(a.data(), a.data() + a.length(), da);
rb = selected_charconv<double>::from_chars(b.data(), b.data() + b.length(), db);
ra = from_chars(a.data(), a.data() + a.length(), da);
rb = from_chars(b.data(), b.data() + b.length(), db);
if (not(bool) ra.ec and not(bool) rb.ec)
{
@@ -2874,7 +2945,7 @@ static int compare_numbers(std::string_view a, std::string_view b)
int compare_cif_id(const std::string &a, const std::string &b)
{
int d = a.length() - b.length();
int d = static_cast<int>(a.length() - b.length());
if (d == 0)
d = a.compare(b);
return d;

View File

@@ -345,6 +345,7 @@ sac_parser::CIFToken sac_parser::get_next_token()
{
retract();
result = CIFToken::VALUE;
m_token_value = "?";
}
else
state = State::Value;

View File

@@ -33,7 +33,6 @@
#include <regex>
#include <set>
namespace cif::pdb
{
@@ -58,9 +57,9 @@ std::string cif2pdbDate(const std::string &d)
int month = std::stoi(m[2].str());
if (m[3].matched)
result = cif::format("%02.2d-%3.3s-%02.2d", stoi(m[3].str()), kMonths[month - 1], (year % 100)).str();
result = cif::format("{:02}-{:3.3}-{:02}", stoi(m[3].str()), kMonths[month - 1], (year % 100));
else
result = cif::format("%3.3s-%02.2d", kMonths[month - 1], (year % 100)).str();
result = cif::format("{:3.3}-{:02}", kMonths[month - 1], (year % 100));
}
return result;
@@ -258,16 +257,14 @@ std::size_t WriteCitation(std::ostream &pdbFile, const datablock &db, row_handle
{
to_upper(pubname);
const std::string kRefHeader = s1 + "REF %2.2s %-28.28s %2.2s%4.4s %5.5s %4.4s";
pdbFile << cif::format(kRefHeader, "" /* continuation */, pubname, (volume.empty() ? "" : "V."), volume, pageFirst, year)
pdbFile << s1 << cif::format("REF {:2.2s} {:<28.28s} {:2.2s}{:>4.4s} {:>5.5s} {:4.4s}", "" /* continuation */, pubname, (volume.empty() ? "" : "V."), volume, pageFirst, year)
<< '\n';
++result;
}
if (not issn.empty())
{
const std::string kRefHeader = s1 + "REFN ISSN %-25.25s";
pdbFile << cif::format(kRefHeader, issn) << '\n';
pdbFile << s1 << cif::format("REFN ISSN {:<25.25s}", issn) << '\n';
++result;
}
@@ -276,27 +273,25 @@ std::size_t WriteCitation(std::ostream &pdbFile, const datablock &db, row_handle
//// 0 1 2 3 4 5 6 7 8
//// HEADER xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxDDDDDDDDD IIII
// const char kRefHeader[] =
// "REMARK 1 REFN %4.4s %-6.6s %2.2s %-25.25s";
// "REMARK 1 REFN {:4.4s} {:<6.6s} {:2.2s} {:<25.25s}";
//
// pdbFile << (boost::cif::format(kRefHeader)
// % (astm.empty() ? "" : "ASTN")
// % astm
// % country
// % issn).str()
// % issn)
// << '\n';
// }
if (not pmid.empty())
{
const std::string kPMID = s1 + "PMID %-60.60s ";
pdbFile << cif::format(kPMID, pmid) << '\n';
pdbFile << s1 << cif::format("PMID {:<60.60s} ", pmid) << '\n';
++result;
}
if (not doi.empty())
{
const std::string kDOI = s1 + "DOI %-60.60s ";
pdbFile << cif::format(kDOI, doi) << '\n';
pdbFile << s1 << cif::format("DOI {:<60.60s} ", doi) << '\n';
++result;
}
@@ -307,10 +302,10 @@ void write_header_lines(std::ostream &pdbFile, const datablock &db)
{
// 0 1 2 3 4 5 6 7 8
// HEADER xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxDDDDDDDDD IIII
const char kHeader[] =
"HEADER %-40.40s"
"%-9.9s"
" %-4.4s";
// const char kHeader[] =
// "HEADER {:<40.40s}"
// "{:<9.9s}"
// " {:<4.4s}";
// HEADER
@@ -345,7 +340,12 @@ void write_header_lines(std::ostream &pdbFile, const datablock &db)
}
}
pdbFile << cif::format(kHeader, keywords, date, db.name()) << '\n';
pdbFile << cif::format(/* kHeader */
"HEADER {:<40.40s}"
"{:<9.9s}"
" {:<4.4s}"
, keywords, date, db.name()) << '\n';
// TODO: implement
// OBSLTE (skip for now)
@@ -535,7 +535,6 @@ void WriteTitle(std::ostream &pdbFile, const datablock &db)
write_header_lines(pdbFile, db);
// REVDAT
const char kRevDatFmt[] = "REVDAT %3d%2.2s %9.9s %4.4s %1d ";
auto &cat2 = db["database_PDB_rev"];
std::vector<row_handle> rev(cat2.begin(), cat2.end());
sort(rev.begin(), rev.end(), [](row_handle a, row_handle b) -> bool
@@ -559,9 +558,9 @@ void WriteTitle(std::ostream &pdbFile, const datablock &db)
{
std::string cs = ++continuation > 1 ? std::to_string(continuation) : std::string();
pdbFile << cif::format(kRevDatFmt, revNum, cs, date, db.name(), modType);
pdbFile << cif::format("REVDAT {:3}{:2.2s} {:9.9s} {:4.4s} {:1} ", revNum, cs, date, db.name(), modType);
for (std::size_t i = 0; i < 4; ++i)
pdbFile << cif::format(" %-6.6s", (i < types.size() ? types[i] : std::string()));
pdbFile << cif::format(" {:<6.6s}", (i < types.size() ? types[i] : std::string()));
pdbFile << '\n';
if (types.size() > 4)
@@ -614,7 +613,7 @@ void WriteRemark2(std::ostream &pdbFile, const datablock &db)
{
float resHigh = refine.front()["ls_d_res_high"].as<float>();
pdbFile << "REMARK 2\n"
<< cif::format("REMARK 2 RESOLUTION. %7.2f ANGSTROMS.", resHigh) << '\n';
<< cif::format("REMARK 2 RESOLUTION. {:7.2f} ANGSTROMS.", resHigh) << '\n';
}
catch (...)
{ /* skip it */
@@ -761,10 +760,7 @@ class Fs : public FBase
else
{
os << '\n';
std::stringstream ss;
ss << "REMARK " << std::setw(3) << std::right << mNr << ' ';
WriteOneContinuedLine(os, ss.str(), 0, s);
WriteOneContinuedLine(os, cif::format("REMARK {:3} ", mNr), 0, s);
}
}
@@ -1617,7 +1613,7 @@ void WriteRemark3Phenix(std::ostream &pdbFile, const datablock &db)
percent_reflns_obs /= 100;
pdbFile << RM3(" ") << cif::format("%3d %7.4f - %7.4f %4.2f %8d %5d %6.4f %6.4f", bin++, d_res_low, d_res_high, percent_reflns_obs, number_reflns_R_work, number_reflns_R_free, R_factor_R_work, R_factor_R_free) << '\n';
pdbFile << RM3(" ") << cif::format("{:3} {:7.4f} - {:7.4f} {:4.2f} {:8} {:5} {:6.4f} {:6.4f}", bin++, d_res_low, d_res_high, percent_reflns_obs, number_reflns_R_work, number_reflns_R_free, R_factor_R_work, R_factor_R_free) << '\n';
}
pdbFile << RM3("") << '\n'
@@ -2585,7 +2581,7 @@ void WriteRemark465(std::ostream &pdbFile, const datablock &db)
cif::tie(modelNr, resName, chainID, iCode, seqNr) =
r.get("PDB_model_num", "auth_comp_id", "auth_asym_id", "PDB_ins_code", "auth_seq_id");
pdbFile << cif::format("REMARK 465 %3.3s %3.3s %1.1s %5d%1.1s", modelNr, resName, chainID, seqNr, iCode) << '\n';
pdbFile << cif::format("REMARK 465 {:3.3s} {:3.3s} {:1.1s} {:5}{:1.1s}", modelNr, resName, chainID, seqNr, iCode) << '\n';
}
}
@@ -2632,7 +2628,7 @@ void WriteRemark470(std::ostream &pdbFile, const datablock &db)
while (not a.second.empty())
{
pdbFile << cif::format("REMARK 470 %3.3s %3.3s %1.1s%4d%1.1s ", modelNr, resName, chainID, seqNr, iCode) << " ";
pdbFile << cif::format("REMARK 470 {:>3.3s} {:3.3s} {:1.1s}{:4}{:1.1s} ", modelNr, resName, chainID, seqNr, iCode) << " ";
for (std::size_t i = 0; i < 6 and not a.second.empty(); ++i)
{
@@ -2730,16 +2726,16 @@ int WritePrimaryStructure(std::ostream &pdbFile, const datablock &db)
if (dbAccession.length() > 8 or db_code.length() > 12 or atoi(dbseqEnd.c_str()) >= 100000)
pdbFile << cif::format(
"DBREF1 %4.4s %1.1s %4.4s%1.1s %4.4s%1.1s %-6.6s %-20.20s",
"DBREF1 {:>4.4s} {:1.1s} {:>4.4s}{:1.1s} {:>4.4s}{:1.1s} {:<6.6s} {:<20.20s}",
idCode, chainID, seqBegin, insertBegin, seqEnd, insertEnd, db_name, db_code)
<< '\n'
<< cif::format(
"DBREF2 %4.4s %1.1s %-22.22s %10.10s %10.10s",
"DBREF2 {:>4.4s} {:1.1s} {:<22.22s} {:10.10s} {:10.10s}",
idCode, chainID, dbAccession, dbseqBegin, dbseqEnd)
<< '\n';
else
pdbFile << cif::format(
"DBREF %4.4s %1.1s %4.4s%1.1s %4.4s%1.1s %-6.6s %-8.8s %-12.12s %5.5s%1.1s %5.5s%1.1s",
"DBREF {:>4.4s} {:1.1s} {:>4.4s}{:1.1s} {:>4.4s}{:1.1s} {:<6.6s} {:<8.8s} {:<12.12s} {:>5.5s}{:1.1s} {:>5.5s}{:1.1s}",
idCode, chainID, seqBegin, insertBegin, seqEnd, insertEnd, db_name, dbAccession, db_code, dbseqBegin, dbinsBeg, dbseqEnd, dbinsEnd)
<< '\n';
}
@@ -2758,9 +2754,8 @@ int WritePrimaryStructure(std::ostream &pdbFile, const datablock &db)
to_upper(conflict);
pdbFile << cif::format(
"SEQADV %4.4s %3.3s %1.1s %4.4s%1.1s %-4.4s %-9.9s %3.3s %5.5s %-21.21s",
"SEQADV {:4.4s} {:3.3s} {:1.1s} {:>4.4s}{:1.1s} {:<4.4s} {:<9.9s} {:3.3s} {:>5.5s} {:<21.21s}",
idCode, resName, chainID, seqNum, iCode, database, dbAccession, dbRes, dbSeq, conflict)
.str()
<< '\n';
}
@@ -2788,7 +2783,7 @@ int WritePrimaryStructure(std::ostream &pdbFile, const datablock &db)
t = 13;
pdbFile << cif::format(
"SEQRES %3d %1.1s %4d %-51.51s ",
"SEQRES {:3} {:1.1s} {:4} {:<51.51s} ",
n++, std::string{ chainID }, seqresl[chainID], join(seq.begin(), seq.begin() + t, " "))
<< '\n';
@@ -2808,9 +2803,8 @@ int WritePrimaryStructure(std::ostream &pdbFile, const datablock &db)
r.get("auth_asym_id", "auth_seq_id", "auth_comp_id", "PDB_ins_code", "parent_comp_id", "details");
pdbFile << cif::format(
"MODRES %4.4s %3.3s %1.1s %4.4s%1.1s %3.3s %-41.41s",
"MODRES {:4.4s} {:3.3s} {:1.1s} {:4.4s}{:1.1s} {:3.3s} {:<41.41s}",
db.name(), resName, chainID, seqNum, iCode, stdRes, comment)
.str()
<< '\n';
}
@@ -2925,7 +2919,7 @@ int WriteHeterogen(std::ostream &pdbFile, const datablock &db)
{
if (h.water)
continue;
pdbFile << cif::format("HET %3.3s %c%4d%c %5d", h.hetID, h.chainID, h.seqNum, h.iCode, h.numHetAtoms) << '\n';
pdbFile << cif::format("HET {:3.3s} {:1c}{:4}{:1c} {:5}", h.hetID, h.chainID, h.seqNum, h.iCode, h.numHetAtoms) << '\n';
++numHet;
}
@@ -2940,7 +2934,7 @@ int WriteHeterogen(std::ostream &pdbFile, const datablock &db)
for (;;)
{
pdbFile << cif::format("HETNAM %2.2s %3.3s ", (c > 1 ? std::to_string(c) : std::string()), id);
pdbFile << cif::format("HETNAM {:2.2s} {:3.3s} ", (c > 1 ? std::to_string(c) : std::string()), id);
++c;
if (name.length() > 55)
@@ -3032,7 +3026,7 @@ int WriteHeterogen(std::ostream &pdbFile, const datablock &db)
{
std::stringstream fs;
fs << cif::format("FORMUL %2d %3.3s %2.2s%c", componentNr, hetID, (c > 1 ? std::to_string(c) : std::string()), (hetID == water_comp_id ? '*' : ' '));
fs << cif::format("FORMUL {:2} {:3.3s} {:2.2s}{:1c}", componentNr, hetID, (c > 1 ? std::to_string(c) : std::string()), (hetID == water_comp_id ? '*' : ' '));
++c;
if (formula.length() > 51)
@@ -3099,7 +3093,7 @@ std::tuple<int, int> WriteSecondaryStructure(std::ostream &pdbFile, const databl
"pdbx_PDB_helix_class", "pdbx_PDB_helix_length", "beg_auth_seq_id", "end_auth_seq_id");
++numHelix;
pdbFile << cif::format("HELIX %3d %3.3s %3.3s %1.1s %4d%1.1s %3.3s %1.1s %4d%1.1s%2d%-30.30s %5d",
pdbFile << cif::format("HELIX {:3} {:>3.3s} {:3.3s} {:1.1s} {:4}{:1.1s} {:3.3s} {:1.1s} {:4}{:1.1s}{:2}{:<30.30s} {:5}",
numHelix, pdbx_PDB_helix_id, beg_label_comp_id, beg_auth_asym_id, beg_auth_seq_id, pdbx_beg_PDB_ins_code, end_label_comp_id, end_auth_asym_id, end_auth_seq_id, pdbx_end_PDB_ins_code, pdbx_PDB_helix_class, details, pdbx_PDB_helix_length)
<< '\n';
}
@@ -3136,7 +3130,7 @@ std::tuple<int, int> WriteSecondaryStructure(std::ostream &pdbFile, const databl
"pdbx_end_PDB_ins_code", "beg_auth_comp_id", "beg_auth_asym_id", "beg_auth_seq_id",
"end_auth_comp_id", "end_auth_asym_id", "end_auth_seq_id");
pdbFile << cif::format("SHEET %3.3s %3.3s%2d %3.3s %1.1s%4d%1.1s %3.3s %1.1s%4d%1.1s%2d", rangeID1, sheetID, numStrands, initResName, initChainID, initSeqNum, initICode, endResName, endChainID, endSeqNum, endICode, 0) << '\n';
pdbFile << cif::format("SHEET {:>3.3s} {:>3.3s}{:2} {:3.3s} {:1.1s}{:4}{:1.1s} {:3.3s} {:1.1s}{:4}{:1.1s}{:2}", rangeID1, sheetID, numStrands, initResName, initChainID, initSeqNum, initICode, endResName, endChainID, endSeqNum, endICode, 0) << '\n';
first = false;
}
@@ -3155,7 +3149,7 @@ std::tuple<int, int> WriteSecondaryStructure(std::ostream &pdbFile, const databl
if (h.empty())
{
pdbFile << cif::format("SHEET %3.3s %3.3s%2d %3.3s %1.1s%4d%1.1s %3.3s %1.1s%4d%1.1s%2d", rangeID2, sheetID, numStrands, initResName, initChainID, initSeqNum, initICode, endResName, endChainID, endSeqNum, endICode, sense) << '\n';
pdbFile << cif::format("SHEET {:>3.3s} {:>3.3s}{:2} {:3.3s} {:1.1s}{:4}{:1.1s} {:3.3s} {:1.1s}{:4}{:1.1s}{:2}", rangeID2, sheetID, numStrands, initResName, initChainID, initSeqNum, initICode, endResName, endChainID, endSeqNum, endICode, sense) << '\n';
}
else
{
@@ -3168,8 +3162,8 @@ std::tuple<int, int> WriteSecondaryStructure(std::ostream &pdbFile, const databl
curAtom = cif2pdbAtomName(curAtom, compID[0], db);
prevAtom = cif2pdbAtomName(prevAtom, compID[1], db);
pdbFile << cif::format("SHEET %3.3s %3.3s%2d %3.3s %1.1s%4d%1.1s %3.3s %1.1s%4d%1.1s%2d "
"%-4.4s%3.3s %1.1s%4d%1.1s %-4.4s%3.3s %1.1s%4d%1.1s",
pdbFile << cif::format("SHEET {:>3.3s} {:>3.3s}{:2} {:3.3s} {:1.1s}{:4}{:1.1s} {:3.3s} {:1.1s}{:4}{:1.1s}{:2} "
"{:<4.4s}{:3.3s} {:1.1s}{:4}{:1.1s} {:<4.4s}{:3.3s} {:1.1s}{:4}{:1.1s}",
rangeID2, sheetID, numStrands, initResName, initChainID, initSeqNum, initICode, endResName, endChainID, endSeqNum, endICode, sense, curAtom, curResName, curChainID, curResSeq, curICode, prevAtom, prevResName, prevChainID, prevResSeq, prevICode)
<< '\n';
}
@@ -3207,7 +3201,7 @@ void WriteConnectivity(std::ostream &pdbFile, const datablock &db)
sym1 = cif2pdbSymmetry(sym1);
sym2 = cif2pdbSymmetry(sym2);
pdbFile << cif::format("SSBOND %3d CYS %1.1s %4d%1.1s CYS %1.1s %4d%1.1s %6.6s %6.6s %5.2f", nr, chainID1, seqNum1, icode1, chainID2, seqNum2, icode2, sym1, sym2, Length) << '\n';
pdbFile << cif::format("SSBOND {:3} CYS {:1.1s} {:4}{:1.1s} CYS {:1.1s} {:4}{:1.1s} {:6.6s} {:6.6s} {:5.2f}", nr, chainID1, seqNum1, icode1, chainID2, seqNum2, icode2, sym1, sym2, Length) << '\n';
++nr;
}
@@ -3234,10 +3228,10 @@ void WriteConnectivity(std::ostream &pdbFile, const datablock &db)
sym1 = cif2pdbSymmetry(sym1);
sym2 = cif2pdbSymmetry(sym2);
pdbFile << cif::format("LINK %-4.4s%1.1s%3.3s %1.1s%4d%1.1s %-4.4s%1.1s%3.3s %1.1s%4d%1.1s %6.6s %6.6s", name1, altLoc1, resName1, chainID1, resSeq1, iCode1, name2, altLoc2, resName2, chainID2, resSeq2, iCode2, sym1, sym2);
pdbFile << cif::format("LINK {:<4.4s}{:1.1s}{:3.3s} {:1.1s}{:4}{:1.1s} {:<4.4s}{:1.1s}{:3.3s} {:1.1s}{:4}{:1.1s} {:>6.6s} {:>6.6s}", name1, altLoc1, resName1, chainID1, resSeq1, iCode1, name2, altLoc2, resName2, chainID2, resSeq2, iCode2, sym1, sym2);
if (not Length.empty())
pdbFile << cif::format(" %5.2f", stod(Length));
pdbFile << cif::format(" {:5.2f}", stod(Length));
pdbFile << '\n';
}
@@ -3255,7 +3249,7 @@ void WriteConnectivity(std::ostream &pdbFile, const datablock &db)
"pdbx_label_comp_id_2", "pdbx_auth_asym_id_2", "pdbx_auth_seq_id_2", "pdbx_PDB_ins_code_2",
"pdbx_PDB_model_num", "pdbx_omega_angle");
pdbFile << cif::format("CISPEP %3.3s %3.3s %1.1s %4d%1.1s %3.3s %1.1s %4d%1.1s %3.3s %6.2f",
pdbFile << cif::format("CISPEP {:3.3s} {:3.3s} {:1.1s} {:4}{:1.1s} {:3.3s} {:1.1s} {:4}{:1.1s} {:3.3s} {:6.2f}",
serNum, pep1, chainID1, seqNum1, icode1, pep2, chainID2, seqNum2, icode2, modNum, measure) << '\n';
}
}
@@ -3276,7 +3270,7 @@ int WriteMiscellaneousFeatures(std::ostream &pdbFile, const datablock &db)
cif::tie(siteID, resName, chainID, seq, iCode) =
r.get("site_id", "auth_comp_id", "auth_asym_id", "auth_seq_id", "pdbx_auth_ins_code");
sites[siteID].push_back(cif::format("%3.3s %1.1s%4d%1.1s ", resName, chainID, seq, iCode).str());
sites[siteID].push_back(cif::format("{:3.3s} {:1.1s}{:4}{:1.1s} ", resName, chainID, seq, iCode));
}
for (auto s : sites)
@@ -3289,7 +3283,7 @@ int WriteMiscellaneousFeatures(std::ostream &pdbFile, const datablock &db)
int nr = 1;
while (res.empty() == false)
{
pdbFile << cif::format("SITE %3d %3.3s %2d ", nr, siteID, numRes);
pdbFile << cif::format("SITE {:3} {:3.3s} {:2} ", nr, siteID, numRes);
for (int i = 0; i < 4; ++i)
{
@@ -3318,7 +3312,7 @@ void WriteCrystallographic(std::ostream &pdbFile, const datablock &db)
r = db["cell"].find_first(key("entry_id") == db.name());
pdbFile << cif::format("CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f %-11.11s%4d", r["length_a"].as<double>(), r["length_b"].as<double>(), r["length_c"].as<double>(), r["angle_alpha"].as<double>(), r["angle_beta"].as<double>(), r["angle_gamma"].as<double>(), symmetry, r["Z_PDB"].as<int>()) << '\n';
pdbFile << cif::format("CRYST1{:9.3f}{:9.3f}{:9.3f}{:7.2f}{:7.2f}{:7.2f} {:<11.11s}{:4}", r["length_a"].as<double>(), r["length_b"].as<double>(), r["length_c"].as<double>(), r["angle_alpha"].as<double>(), r["angle_beta"].as<double>(), r["angle_gamma"].as<double>(), symmetry, r["Z_PDB"].as<int>()) << '\n';
}
int WriteCoordinateTransformation(std::ostream &pdbFile, const datablock &db)
@@ -3327,18 +3321,18 @@ int WriteCoordinateTransformation(std::ostream &pdbFile, const datablock &db)
for (auto r : db["database_PDB_matrix"])
{
pdbFile << cif::format("ORIGX%1d %10.6f%10.6f%10.6f %10.5f", 1, r["origx[1][1]"].as<float>(), r["origx[1][2]"].as<float>(), r["origx[1][3]"].as<float>(), r["origx_vector[1]"].as<float>()) << '\n';
pdbFile << cif::format("ORIGX%1d %10.6f%10.6f%10.6f %10.5f", 2, r["origx[2][1]"].as<float>(), r["origx[2][2]"].as<float>(), r["origx[2][3]"].as<float>(), r["origx_vector[2]"].as<float>()) << '\n';
pdbFile << cif::format("ORIGX%1d %10.6f%10.6f%10.6f %10.5f", 3, r["origx[3][1]"].as<float>(), r["origx[3][2]"].as<float>(), r["origx[3][3]"].as<float>(), r["origx_vector[3]"].as<float>()) << '\n';
pdbFile << cif::format("ORIGX{:1} {:10.6f}{:10.6f}{:10.6f} {:10.5f}", 1, r["origx[1][1]"].as<float>(), r["origx[1][2]"].as<float>(), r["origx[1][3]"].as<float>(), r["origx_vector[1]"].as<float>()) << '\n';
pdbFile << cif::format("ORIGX{:1} {:10.6f}{:10.6f}{:10.6f} {:10.5f}", 2, r["origx[2][1]"].as<float>(), r["origx[2][2]"].as<float>(), r["origx[2][3]"].as<float>(), r["origx_vector[2]"].as<float>()) << '\n';
pdbFile << cif::format("ORIGX{:1} {:10.6f}{:10.6f}{:10.6f} {:10.5f}", 3, r["origx[3][1]"].as<float>(), r["origx[3][2]"].as<float>(), r["origx[3][3]"].as<float>(), r["origx_vector[3]"].as<float>()) << '\n';
result += 3;
break;
}
for (auto r : db["atom_sites"])
{
pdbFile << cif::format("SCALE%1d %10.6f%10.6f%10.6f %10.5f", 1, r["fract_transf_matrix[1][1]"].as<float>(), r["fract_transf_matrix[1][2]"].as<float>(), r["fract_transf_matrix[1][3]"].as<float>(), r["fract_transf_vector[1]"].as<float>()) << '\n';
pdbFile << cif::format("SCALE%1d %10.6f%10.6f%10.6f %10.5f", 2, r["fract_transf_matrix[2][1]"].as<float>(), r["fract_transf_matrix[2][2]"].as<float>(), r["fract_transf_matrix[2][3]"].as<float>(), r["fract_transf_vector[2]"].as<float>()) << '\n';
pdbFile << cif::format("SCALE%1d %10.6f%10.6f%10.6f %10.5f", 3, r["fract_transf_matrix[3][1]"].as<float>(), r["fract_transf_matrix[3][2]"].as<float>(), r["fract_transf_matrix[3][3]"].as<float>(), r["fract_transf_vector[3]"].as<float>()) << '\n';
pdbFile << cif::format("SCALE{:1} {:10.6f}{:10.6f}{:10.6f} {:10.5f}", 1, r["fract_transf_matrix[1][1]"].as<float>(), r["fract_transf_matrix[1][2]"].as<float>(), r["fract_transf_matrix[1][3]"].as<float>(), r["fract_transf_vector[1]"].as<float>()) << '\n';
pdbFile << cif::format("SCALE{:1} {:10.6f}{:10.6f}{:10.6f} {:10.5f}", 2, r["fract_transf_matrix[2][1]"].as<float>(), r["fract_transf_matrix[2][2]"].as<float>(), r["fract_transf_matrix[2][3]"].as<float>(), r["fract_transf_vector[2]"].as<float>()) << '\n';
pdbFile << cif::format("SCALE{:1} {:10.6f}{:10.6f}{:10.6f} {:10.5f}", 3, r["fract_transf_matrix[3][1]"].as<float>(), r["fract_transf_matrix[3][2]"].as<float>(), r["fract_transf_matrix[3][3]"].as<float>(), r["fract_transf_vector[3]"].as<float>()) << '\n';
result += 3;
break;
}
@@ -3348,9 +3342,9 @@ int WriteCoordinateTransformation(std::ostream &pdbFile, const datablock &db)
{
std::string given = r["code"] == "given" ? "1" : "";
pdbFile << cif::format("MTRIX%1d %3d%10.6f%10.6f%10.6f %10.5f %1.1s", 1, nr, r["matrix[1][1]"].as<float>(), r["matrix[1][2]"].as<float>(), r["matrix[1][3]"].as<float>(), r["vector[1]"].as<float>(), given) << '\n';
pdbFile << cif::format("MTRIX%1d %3d%10.6f%10.6f%10.6f %10.5f %1.1s", 2, nr, r["matrix[2][1]"].as<float>(), r["matrix[2][2]"].as<float>(), r["matrix[2][3]"].as<float>(), r["vector[2]"].as<float>(), given) << '\n';
pdbFile << cif::format("MTRIX%1d %3d%10.6f%10.6f%10.6f %10.5f %1.1s", 3, nr, r["matrix[3][1]"].as<float>(), r["matrix[3][2]"].as<float>(), r["matrix[3][3]"].as<float>(), r["vector[3]"].as<float>(), given) << '\n';
pdbFile << cif::format("MTRIX{:1} {:3}{:10.6f}{:10.6f}{:10.6f} {:10.5f} {:1.1s}", 1, nr, r["matrix[1][1]"].as<float>(), r["matrix[1][2]"].as<float>(), r["matrix[1][3]"].as<float>(), r["vector[1]"].as<float>(), given) << '\n';
pdbFile << cif::format("MTRIX{:1} {:3}{:10.6f}{:10.6f}{:10.6f} {:10.5f} {:1.1s}", 2, nr, r["matrix[2][1]"].as<float>(), r["matrix[2][2]"].as<float>(), r["matrix[2][3]"].as<float>(), r["vector[2]"].as<float>(), given) << '\n';
pdbFile << cif::format("MTRIX{:1} {:3}{:10.6f}{:10.6f}{:10.6f} {:10.5f} {:1.1s}", 3, nr, r["matrix[3][1]"].as<float>(), r["matrix[3][2]"].as<float>(), r["matrix[3][3]"].as<float>(), r["vector[3]"].as<float>(), given) << '\n';
++nr;
result += 3;
@@ -3369,10 +3363,6 @@ std::tuple<int, int> WriteCoordinatesForModel(std::ostream &pdbFile, const datab
auto &atom_site = db["atom_site"];
auto &atom_site_anisotrop = db["atom_site_anisotrop"];
auto &entity = db["entity"];
// auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"];
// auto &pdbx_nonpoly_scheme = db["pdbx_nonpoly_scheme"];
auto &pdbx_branch_scheme = db["pdbx_branch_scheme"];
int serial = 1;
auto ri = atom_site.begin();
@@ -3417,7 +3407,7 @@ std::tuple<int, int> WriteCoordinatesForModel(std::ostream &pdbFile, const datab
if (terminate)
{
pdbFile << cif::format("TER %5d %3.3s %1.1s%4d%1.1s", serial, resName, chainID, resSeq, iCode) << '\n';
pdbFile << cif::format("TER {:5} {:3.3s} {:1.1s}{:4}{:1.1s}", serial, resName, chainID, resSeq, iCode) << '\n';
++serial;
terminatedChains.insert(chainID);
@@ -3446,26 +3436,6 @@ std::tuple<int, int> WriteCoordinatesForModel(std::ostream &pdbFile, const datab
r.get("id", "group_PDB", "label_atom_id", "label_alt_id", "auth_comp_id", "auth_asym_id", "auth_seq_id",
"pdbx_PDB_ins_code", "Cartn_x", "Cartn_y", "Cartn_z", "occupancy", "B_iso_or_equiv", "type_symbol", "pdbx_formal_charge");
if (resName != "HOH")
{
int entity_id = r.get<int>("label_entity_id");
try
{
auto type = entity.find1<std::string>("id"_key == entity_id, "type");
if (type == "branched") // find the real auth_seq_num, since sugars have their auth_seq_num reused as sugar number... sigh.
resSeq = pdbx_branch_scheme.find1<int>("asym_id"_key == r.get<std::string>("label_asym_id") and "pdb_seq_num"_key == resSeq, "auth_seq_num");
// else if (type == "non-polymer") // same for non-polymers
// resSeq = pdbx_nonpoly_scheme.find1<int>("asym_id"_key == r.get<std::string>("label_asym_id") and "pdb_seq_num"_key == resSeq, "auth_seq_num");
// else if (type == "polymer")
// resSeq = pdbx_poly_seq_scheme.find1<int>("asym_id"_key == r.get<std::string>("label_asym_id") and "pdb_seq_num"_key == resSeq, "auth_seq_num");
}
catch (const std::exception &ex)
{
std::cerr << "Oops, there was not exactly one entity with id " << entity_id << '\n';
}
}
if (chainID.length() > 1)
throw std::runtime_error("Chain ID " + chainID + " won't fit into a PDB file");
@@ -3476,7 +3446,8 @@ std::tuple<int, int> WriteCoordinatesForModel(std::ostream &pdbFile, const datab
if (charge != 0)
sCharge = std::to_string(charge) + (charge > 0 ? '+' : '-');
pdbFile << cif::format("%-6.6s%5d %-4.4s%1.1s%3.3s %1.1s%4d%1.1s %8.3f%8.3f%8.3f%6.2f%6.2f %2.2s%2.2s", group, serial, name, altLoc, resName, chainID, resSeq, iCode, x, y, z, occupancy, tempFactor, element, sCharge) << '\n';
pdbFile << cif::format("{:<6.6s}{:5} {:<4.4s}{:1.1s}{:3.3s} {:1.1s}{:4}{:1.1s} {:8.3f}{:8.3f}{:8.3f}{:6.2f}{:6.2f} {:>2.2s}{:2.2s}",
group, serial, name, altLoc, resName, chainID, resSeq, iCode, x, y, z, occupancy, tempFactor, element, sCharge) << '\n';
++numCoord;
@@ -3491,7 +3462,7 @@ std::tuple<int, int> WriteCoordinatesForModel(std::ostream &pdbFile, const datab
tie(u11, u22, u33, u12, u13, u23) =
ai.get("U[1][1]", "U[2][2]", "U[3][3]", "U[1][2]", "U[1][3]", "U[2][3]");
pdbFile << cif::format("ANISOU%5d %-4.4s%1.1s%3.3s %1.1s%4d%1.1s %7d%7d%7d%7d%7d%7d %2.2s%2.2s", serial, name, altLoc, resName, chainID, resSeq, iCode, std::lrintf(u11 * 10000), std::lrintf(u22 * 10000), std::lrintf(u33 * 10000), std::lrintf(u12 * 10000), std::lrintf(u13 * 10000), std::lrintf(u23 * 10000), element, sCharge) << '\n';
pdbFile << cif::format("ANISOU{:5} {:<4.4s}{:1.1s}{:3.3s} {:1.1s}{:4}{:1.1s} {:7}{:7}{:7}{:7}{:7}{:7} {:2.2s}{:2.2s}", serial, name, altLoc, resName, chainID, resSeq, iCode, std::lrintf(u11 * 10000), std::lrintf(u22 * 10000), std::lrintf(u33 * 10000), std::lrintf(u12 * 10000), std::lrintf(u13 * 10000), std::lrintf(u23 * 10000), element, sCharge) << '\n';
}
++serial;
@@ -3543,7 +3514,7 @@ std::tuple<int, int> WriteCoordinate(std::ostream &pdbFile, const datablock &db)
for (int model_nr : models)
{
if (models.size() > 1)
pdbFile << cif::format("MODEL %4d", model_nr) << '\n';
pdbFile << cif::format("MODEL {:4}", model_nr) << '\n';
std::set<std::string> TERminatedChains;
auto n = WriteCoordinatesForModel(pdbFile, db, last_resseq_for_chain_map, TERminatedChains, model_nr);
@@ -3615,7 +3586,7 @@ std::string get_HEADER_line(const datablock &db, std::string::size_type truncate
}
}
return FixStringLength(cif::format("HEADER %-40.40s%-9.9s %-4.4s", keywords, date, db.name()).str(), truncate_at);
return FixStringLength(cif::format("HEADER {:<40.40s}{:<9.9s} {:<4.4s}", keywords, date, db.name()), truncate_at);
}
std::string get_COMPND_line(const datablock &db, std::string::size_type truncate_at)
@@ -3788,7 +3759,7 @@ void write(std::ostream &os, const datablock &db)
numXform = WriteCoordinateTransformation(os, db);
std::tie(numCoord, numTer) = WriteCoordinate(os, db);
os << cif::format("MASTER %5d 0%5d%5d%5d%5d%5d%5d%5d%5d%5d%5d", numRemark, numHet, numHelix, numSheet, numTurn, numSite, numXform, numCoord, numTer, numConect, numSeq) << '\n'
os << cif::format("MASTER {:5} 0{:5}{:5}{:5}{:5}{:5}{:5}{:5}{:5}{:5}{:5}", numRemark, numHet, numHelix, numSheet, numTurn, numSite, numXform, numCoord, numTer, numConect, numSeq) << '\n'
<< "END\n";
}

View File

@@ -26,19 +26,16 @@
#include "pdb2cif_remark_3.hpp"
#include "cif++.hpp"
#include <cif++/compound.hpp>
#include <cif++/gzio.hpp>
#include <cif++/model.hpp>
#include <cif++/pdb.hpp>
#include <cif++/symmetry.hpp>
#include <iomanip>
#include <map>
#include <set>
#include <stack>
using cif::category;
using cif::datablock;
using cif::iequals;
using cif::key;
using cif::to_lower;
using cif::to_lower_copy;
#include <stdexcept>
// --------------------------------------------------------------------
// attempt to come up with better error handling
@@ -895,12 +892,7 @@ class PDBFileParser
if (year < 1950)
year += 100;
std::stringstream ss;
ss << std::setw(4) << std::setfill('0') << year << '-'
<< std::setw(2) << std::setfill('0') << month << '-'
<< std::setw(2) << std::setfill('0') << day;
s = ss.str();
s = cif::format("{:04}-{:02}-{:02}", year, month, day);
}
else if (regex_match(s, m, rx2))
{
@@ -912,7 +904,7 @@ class PDBFileParser
if (year < 1950)
year += 100;
s = cif::format("%04d-%02d", year, month).str();
s = cif::format("{:04}-{:02}", year, month);
}
else
ec = error::make_error_code(error::pdbErrors::invalidDate);
@@ -3146,7 +3138,6 @@ void PDBFileParser::ParseRemark350()
std::map<std::string, std::string> values;
std::vector<std::string> asymIdList;
std::smatch m;
cif::row_handle genR;
std::vector<double> mat, vec;
@@ -3341,18 +3332,18 @@ void PDBFileParser::ParseRemark350()
{ "type", type },
// { "name", "" },
// { "symmetryOperation", "" },
{ "matrix[1][1]", cif::format("%12.10f", mat[0]).str() },
{ "matrix[1][2]", cif::format("%12.10f", mat[1]).str() },
{ "matrix[1][3]", cif::format("%12.10f", mat[2]).str() },
{ "vector[1]", cif::format("%12.10f", vec[0]).str() },
{ "matrix[2][1]", cif::format("%12.10f", mat[3]).str() },
{ "matrix[2][2]", cif::format("%12.10f", mat[4]).str() },
{ "matrix[2][3]", cif::format("%12.10f", mat[5]).str() },
{ "vector[2]", cif::format("%12.10f", vec[1]).str() },
{ "matrix[3][1]", cif::format("%12.10f", mat[6]).str() },
{ "matrix[3][2]", cif::format("%12.10f", mat[7]).str() },
{ "matrix[3][3]", cif::format("%12.10f", mat[8]).str() },
{ "vector[3]", cif::format("%12.10f", vec[2]).str() }
{ "matrix[1][1]", cif::format("{:12.10f}", mat[0]) },
{ "matrix[1][2]", cif::format("{:12.10f}", mat[1]) },
{ "matrix[1][3]", cif::format("{:12.10f}", mat[2]) },
{ "vector[1]", cif::format("{:12.10f}", vec[0]) },
{ "matrix[2][1]", cif::format("{:12.10f}", mat[3]) },
{ "matrix[2][2]", cif::format("{:12.10f}", mat[4]) },
{ "matrix[2][3]", cif::format("{:12.10f}", mat[5]) },
{ "vector[2]", cif::format("{:12.10f}", vec[1]) },
{ "matrix[3][1]", cif::format("{:12.10f}", mat[6]) },
{ "matrix[3][2]", cif::format("{:12.10f}", mat[7]) },
{ "matrix[3][3]", cif::format("{:12.10f}", mat[8]) },
{ "vector[3]", cif::format("{:12.10f}", vec[2]) }
});
// clang-format on
@@ -4318,7 +4309,7 @@ void PDBFileParser::ConstructEntities()
}
// build sugar trees first
ConstructSugarTrees(asymNr);
// ConstructSugarTrees(asymNr);
// done with the sugar, resume operation as before
@@ -5772,6 +5763,9 @@ void PDBFileParser::ParseCoordinate(int modelNr)
std::string element = vS(77, 78); // 77 - 78 LString(2) element Element symbol, right-justified.
std::string charge = vS(79, 80); // 79 - 80 LString(2) charge Charge on the atom.
if (element.empty())
throw std::runtime_error("Empty element column in PDB file at line " + std::to_string(mRec->mLineNr));
std::string entityID = mAsymID2EntityID[asymID];
charge = pdb2cifCharge(charge);
@@ -5850,7 +5844,7 @@ void PDBFileParser::ParseCoordinate(int modelNr)
auto f = [](float f) -> std::string
{
return cif::format("%6.4f", f).str();
return cif::format("{:6.4f}", f);
};
// clang-format off
@@ -6376,7 +6370,7 @@ void read_pdb_file(std::istream &pdbFile, cif::file &cifFile)
{
cifFile.front().load_dictionary();
if (cifFile.front().get_validator() == nullptr)
cifFile.front().set_validator(&validator_factory::instance().get("mmcif_pdbx.dic"));
cifFile.front().set_validator(validator_factory::instance().get("mmcif_pdbx.dic"));
if (not cifFile.is_valid() and cif::VERBOSE >= 0)
std::cerr << "Resulting mmCIF file is not valid!\n";
@@ -6406,7 +6400,7 @@ file read(std::istream &is)
if (std::isalpha(ch) and std::toupper(ch) != 'D')
{
read_pdb_file(is, result);
reconstruct_pdbx(result);
fixup_pdbx(result);
}
else
{
@@ -6442,8 +6436,14 @@ file read(std::istream &is)
}
// Must be a PDB like file, right?
if (not result.empty() and result.front().get_validator() == nullptr)
result.front().set_validator(&validator_factory::instance().get("mmcif_pdbx.dic"));
if (not result.empty())
{
auto &db = result.front();
if (db.get_validator() == nullptr)
db.set_validator(validator_factory::instance().get("mmcif_pdbx.dic"));
if (db.is_valid())
db.get_validator()->fill_audit_conform(db["audit_conform"]);
}
return result;
}

View File

@@ -1478,7 +1478,7 @@ bool Remark3Parser::parse(const std::string &expMethod, PDBRecord *r, cif::datab
best.parser->fixup();
auto &validator = cif::validator_factory::instance().get("mmcif_pdbx.dic");
auto &validator = cif::validator_factory::instance()["mmcif_pdbx.dic"];
for (auto &cat1 : best.parser->mDb)
{

View File

@@ -25,6 +25,11 @@
*/
#include "cif++.hpp"
#include "cif++/compound.hpp"
#include "cif++/row.hpp"
#include <stdexcept>
#include <string>
// --------------------------------------------------------------------
@@ -138,15 +143,15 @@ void checkEntities(datablock &db)
auto compound = cf.create(comp_id);
if (compound)
formula_weight += compound->formula_weight();
else if (cif::VERBOSE > 0)
std::clog << "missing information for compound " + comp_id << '\n';
// else if (cif::VERBOSE > 0)
// std::clog << "missing information for compound " + comp_id << '\n';
++n;
}
formula_weight -= (n - 1) * 18.015;
formula_weight -= (n - 1) * 18.015f;
}
else if (type == "water")
formula_weight = 18.015;
formula_weight = 18.015f;
else if (type == "branched")
{
int n = 0;
@@ -156,12 +161,12 @@ void checkEntities(datablock &db)
auto compound = cf.create(comp_id);
if (compound)
formula_weight += compound->formula_weight();
else if (cif::VERBOSE > 0)
std::clog << "missing information for compound " + comp_id << '\n';
// else if (cif::VERBOSE > 0)
// std::clog << "missing information for compound " + comp_id << '\n';
++n;
}
formula_weight -= (n - 1) * 18.015;
formula_weight -= (n - 1) * 18.015f;
}
else if (type == "non-polymer")
{
@@ -171,7 +176,7 @@ void checkEntities(datablock &db)
auto compound = cf.create(*comp_id);
if (not compound)
{
std::cerr << "missing information for compound " << *comp_id << "\n";
// std::cerr << "missing information for compound " << *comp_id << "\n";
continue;
}
formula_weight = compound->formula_weight();
@@ -185,6 +190,8 @@ void checkEntities(datablock &db)
void createEntityIDs(datablock &db)
{
using namespace literals;
// Suppose the file does not have entity ID's. We have to make up some
// walk the atoms. For each auth_asym_id we have a new struct_asym.
@@ -196,28 +203,44 @@ void createEntityIDs(datablock &db)
// that should cover it
auto &atom_site = db["atom_site"];
auto &entity = db["entity"];
auto &cf = compound_factory::instance();
std::vector<std::vector<residue_key_type>> entities;
std::vector<std::vector<row_handle>> entities;
std::string lastAsymID;
int lastSeqID = -1;
std::vector<residue_key_type> waters;
std::vector<row_handle> waters;
for (residue_key_type k : atom_site.rows<std::optional<std::string>,
std::optional<int>,
std::optional<std::string>,
std::optional<std::string>,
std::optional<int>,
std::optional<std::string>>(
"auth_asym_id", "auth_seq_id", "auth_comp_id",
"label_asym_id", "label_seq_id", "label_comp_id"))
int nextEntityID;
try
{
if (entity.empty())
nextEntityID = 1;
else
nextEntityID = entity.find_max<int>("id") + 1;
}
catch (...)
{
nextEntityID = 1;
}
for (auto rh : atom_site.find("label_entity_id"_key == cif::null))
{
residue_key_type k = rh.get<std::optional<std::string>,
std::optional<int>,
std::optional<std::string>,
std::optional<std::string>,
std::optional<int>,
std::optional<std::string>>(
"auth_asym_id", "auth_seq_id", "auth_comp_id",
"label_asym_id", "label_seq_id", "label_comp_id");
std::string comp_id = get_comp_id(k);
if (cf.is_water(comp_id))
{
waters.emplace_back(k);
waters.emplace_back(rh);
continue;
}
@@ -226,19 +249,20 @@ void createEntityIDs(datablock &db)
bool is_monomer = cf.is_monomer(comp_id);
if (lastAsymID == asym_id and lastSeqID == seq_id and not is_monomer)
continue;
// if (lastAsymID == asym_id and lastSeqID == seq_id and not is_monomer)
// continue;
if (asym_id != lastAsymID or (not is_monomer and lastSeqID != seq_id))
entities.push_back({});
entities.back().emplace_back(k);
entities.back().emplace_back(rh);
lastAsymID = asym_id;
lastSeqID = seq_id;
}
std::map<std::size_t, std::string> entity_ids;
std::map<std::string, std::string> newEntitiesForCompound;
atom_site.add_item("label_entity_id");
@@ -247,7 +271,39 @@ void createEntityIDs(datablock &db)
if (entity_ids.contains(i))
continue;
auto entity_id = std::to_string(i + 1);
residue_key_type k = entities[i].front().get<std::optional<std::string>, std::optional<int>, std::optional<std::string>, std::optional<std::string>, std::optional<int>, std::optional<std::string>>(
"auth_asym_id", "auth_seq_id", "auth_comp_id",
"label_asym_id", "label_seq_id", "label_comp_id");
std::string comp_id = get_comp_id(k);
std::string entity_id;
if (auto v = db["pdbx_entity_nonpoly"].find_first("comp_id"_key == comp_id); v)
entity_id = v.get<std::string>("entity_id");
else if (auto i2 = newEntitiesForCompound.find(comp_id); i2 != newEntitiesForCompound.end())
entity_id = i2->second;
else
{
entity_id = std::to_string(nextEntityID++);
if (cf.is_monomer(comp_id))
entity.emplace({ //
{ "id", entity_id },
{ "type", "polymer" } });
else if (cf.is_water(comp_id))
entity.emplace({ //
{ "id", entity_id },
{ "type", "water" } });
else
{
entity.emplace({ //
{ "id", entity_id },
{ "type", "non-polymer" } });
newEntitiesForCompound[comp_id] = entity_id;
}
}
entity_ids[i] = entity_id;
for (std::size_t j = i + 1; j < entities.size(); ++j)
@@ -259,20 +315,17 @@ void createEntityIDs(datablock &db)
for (std::size_t ix = 0; auto &e : entities)
{
auto k = e.front();
const auto &entity_id = entity_ids[ix++];
std::string comp_id = get_comp_id(k);
for (auto &k : e)
atom_site.update_value(get_condition(k), "label_entity_id", entity_id);
for (auto rh : e)
rh["label_entity_id"] = entity_id;
}
if (not waters.empty())
{
std::string waterEntityID = std::to_string(entities.size() + 1);
for (auto &k : waters)
atom_site.update_value(get_condition(k), "label_entity_id", waterEntityID);
for (auto rh : waters)
rh["label_entity_id"] = waterEntityID;
}
}
@@ -319,7 +372,7 @@ void fillLabelAsymID(category &atom_site)
{
if (not mapAuthAsymIDAndEntityToLabelAsymID.contains(key))
{
std::string asym_id = cif_id_for_number(mapAuthAsymIDAndEntityToLabelAsymID.size());
std::string asym_id = cif_id_for_number(static_cast<int>(mapAuthAsymIDAndEntityToLabelAsymID.size()));
mapAuthAsymIDAndEntityToLabelAsymID[key] = asym_id;
}
}
@@ -439,9 +492,38 @@ void checkAtomRecords(datablock &db)
if (atom_site.contains(key("label_seq_id") < 0))
fixNegativeSeqID(atom_site);
std::set<int> polymer_entities;
for (int id : db["entity"].find<int>("type"_key == "polymer", "id"))
polymer_entities.insert(id);
std::set<std::string> polymer_entities;
if (db["entity"].empty())
{
// No entity, so we have to guess the types based on the content of atom_site
std::string last_entity_id;
std::optional<int> last_label_seq_id, last_auth_seq_id;
std::set<std::string> entityIDs;
for (auto &[entity_id, label_comp_id, label_seq_id, auth_comp_id, auth_seq_id] :
atom_site.rows<std::string, std::string, std::optional<int>, std::string, std::optional<int>>(
"label_entity_id", "label_comp_id", "label_seq_id", "auth_comp_id", "auth_seq_id"))
{
if (cf.is_water(label_comp_id) or cf.is_water(auth_comp_id))
continue;
if (polymer_entities.contains(entity_id))
continue;
if (last_entity_id == entity_id and (last_label_seq_id != label_seq_id or last_auth_seq_id != auth_seq_id))
polymer_entities.emplace(entity_id);
last_entity_id = entity_id;
last_label_seq_id = label_seq_id;
last_auth_seq_id = auth_seq_id;
}
}
else
{
for (std::string id : db["entity"].find<std::string>("type"_key == "polymer", "id"))
polymer_entities.insert(id);
}
std::set<std::string> missingCompounds;
@@ -478,13 +560,14 @@ void checkAtomRecords(datablock &db)
if (missingCompounds.contains(comp_id))
continue;
bool is_polymer = polymer_entities.contains(row["label_entity_id"].as<int>());
bool is_polymer = polymer_entities.contains(row["label_entity_id"].as<std::string>());
auto compound = cf.create(comp_id);
if (not compound)
{
missingCompounds.insert(comp_id);
std::cerr << "Missing compound information for " << comp_id << "\n";
// if (cif::VERBOSE > 0)
// std::cerr << "Missing compound information for " << comp_id << "\n";
continue;
}
@@ -531,18 +614,24 @@ void checkAtomRecords(datablock &db)
if (is_polymer and row["label_seq_id"].empty() and cf.is_monomer(comp_id))
row["label_seq_id"] = std::to_string(seq_id);
if (row["label_atom_id"].empty())
row["label_atom_id"] = row["auth_atom_id"].text();
if (row["label_asym_id"].empty())
row["label_asym_id"] = row["auth_asym_id"].text();
else if (row["auth_asym_id"].empty())
row["auth_asym_id"] = row["label_asym_id"].text();
if (row["label_comp_id"].empty())
row["label_comp_id"] = row["auth_comp_id"].text();
else if (row["auth_comp_id"].empty())
row["auth_comp_id"] = row["label_comp_id"].text();
if (row["label_atom_id"].empty())
row["label_atom_id"] = row["auth_atom_id"].text();
else if (row["auth_atom_id"].empty())
row["auth_atom_id"] = row["label_atom_id"].text();
// Rewrite the coordinates and other items that look better in a fixed format
// Be careful not to nuke invalidly formatted data here
for (auto [item_name, prec] : std::vector<std::tuple<std::string_view, std::string::size_type>>{
for (auto [item_name, prec] : std::vector<std::tuple<std::string_view, int>>{
{ "cartn_x", 3 },
{ "cartn_y", 3 },
{ "cartn_z", 3 },
@@ -557,11 +646,11 @@ void checkAtomRecords(datablock &db)
if (auto [ptr, ec] = cif::from_chars(s.data(), s.data() + s.length(), v); (bool)ec)
continue;
if (s.length() < prec + 1 or s[s.length() - prec - 1] != '.')
if (s.length() < prec + 1UL or s[s.length() - prec - 1] != '.')
{
char b[12];
if (auto [ptr, ec] = cif::to_chars(b, b + sizeof(b), v, cif::chars_format::fixed, prec); (bool)ec)
if (auto [ptr, ec] = std::to_chars(b, b + sizeof(b), v, std::chars_format::fixed, prec); ec == std::errc{})
row.assign(item_name, { b, static_cast<std::string::size_type>(ptr - b) }, false, false);
}
}
@@ -603,19 +692,24 @@ void checkAtomAnisotropRecords(datablock &db)
std::vector<row_handle> to_be_deleted;
std::map<int, row_handle> atoms;
for (auto rh : atom_site)
atoms[rh.get<int>("id")] = rh;
bool warnReplaceTypeSymbol = true;
for (auto row : atom_site_anisotrop)
{
auto parents = atom_site_anisotrop.get_parents(row, atom_site);
if (parents.size() != 1)
auto ai = atoms.find(row.get<int>("id"));
if (ai == atoms.end())
{
to_be_deleted.emplace_back(row);
continue;
}
// this happens sometimes (Phenix):
auto parent = ai->second;
auto parent = parents.front();
// this happens sometimes (Phenix):
if (row["type_symbol"].empty())
row["type_symbol"] = parent["type_symbol"].text();
@@ -628,16 +722,14 @@ void checkAtomAnisotropRecords(datablock &db)
if (row["pdbx_auth_alt_id"].empty() and not parent["pdbx_auth_alt_id"].empty())
row["pdbx_auth_alt_id"] = parent["pdbx_auth_alt_id"].text();
if (row["pdbx_label_seq_id"].empty() and not parent["pdbx_label_seq_id"].empty())
if (row["pdbx_label_seq_id"].empty() and not parent["label_seq_id"].empty())
row["pdbx_label_seq_id"] = parent["label_seq_id"].text();
if (row["pdbx_label_asym_id"].empty() and not parent["pdbx_label_asym_id"].empty())
if (row["pdbx_label_asym_id"].empty() and not parent["label_asym_id"].empty())
row["pdbx_label_asym_id"] = parent["label_asym_id"].text();
if (row["pdbx_label_atom_id"].empty() and not parent["pdbx_label_atom_id"].empty())
if (row["pdbx_label_atom_id"].empty() and not parent["label_atom_id"].empty())
row["pdbx_label_atom_id"] = parent["label_atom_id"].text();
if (row["pdbx_label_comp_id"].empty() and not parent["pdbx_label_comp_id"].empty())
if (row["pdbx_label_comp_id"].empty() and not parent["label_comp_id"].empty())
row["pdbx_label_comp_id"] = parent["label_comp_id"].text();
// if (row["pdbx_PDB_model_num"].empty() and not parent["pdbx_PDB_model_num"].empty())
// row["pdbx_PDB_model_num"] = parent["pdbx_PDB_model_num"].text();
}
if (not to_be_deleted.empty())
@@ -650,23 +742,53 @@ void checkAtomAnisotropRecords(datablock &db)
}
}
void createStructAsym(datablock &db)
void checkStructAsym(datablock &db)
{
auto &atom_site = db["atom_site"];
auto &struct_asym = db["struct_asym"];
for (const auto &[label_asym_id, entity_id] : atom_site.rows<std::string, std::string>("label_asym_id", "label_entity_id"))
if (struct_asym.empty())
{
if (label_asym_id.empty())
throw std::runtime_error("File contains atom_site records without a label_asym_id");
if (struct_asym.count(key("id") == label_asym_id) == 0)
for (const auto &[label_asym_id, entity_id] : atom_site.rows<std::string, std::string>("label_asym_id", "label_entity_id"))
{
struct_asym.emplace({
// clang-format off
{ "id", label_asym_id },
{ "entity_id", entity_id }
//clang-format on
});
if (label_asym_id.empty())
throw std::runtime_error("File contains atom_site records without a label_asym_id");
if (struct_asym.count(key("id") == label_asym_id) == 0)
{
struct_asym.emplace({
// clang-format off
{ "id", label_asym_id },
{ "entity_id", entity_id }
//clang-format on
});
}
}
}
else
{
for (const auto &[label_asym_id, entity_id] :
atom_site.rows<std::string, std::string>("label_asym_id", "label_entity_id"))
{
if (label_asym_id.empty())
throw std::runtime_error("File contains atom_site records without a label_asym_id");
auto sa = struct_asym.find_first(key("id") == label_asym_id);
if (sa)
{
if (sa["entity_id"].empty())
sa.assign("entity_id", entity_id, false, true);
else if (sa.get<std::string>("entity_id") != entity_id)
throw std::runtime_error("Inconsistent entity ID's in struct_asym");
}
else
{
struct_asym.emplace({
// clang-format off
{ "id", label_asym_id },
{ "entity_id", entity_id }
//clang-format on
});
}
}
}
}
@@ -722,7 +844,7 @@ void createEntity(datablock &db)
std::string type, desc;
float weight = 0;
int count = 0;
size_t count = 0;
auto first_comp_id = std::get<0>(content.front());
@@ -737,8 +859,11 @@ void createEntity(datablock &db)
auto c = cf.create(first_comp_id);
type = "non-polymer";
desc = c->name();
weight = c->formula_weight();
if (c)
{
desc = c->name();
weight = c->formula_weight();
}
}
else
{
@@ -809,28 +934,28 @@ void createEntityPoly(datablock &db)
if (type != "other")
{
std::string c_type;
if (cf.is_base(comp_id))
if (auto i = compound_factory::kBaseMap.find(comp_id); i != compound_factory::kBaseMap.end())
{
c_type = "polydeoxyribonucleotide";
letter_can = compound_factory::kBaseMap.at(comp_id);
letter_can = i->second;
if (comp_id.length() == 1)
letter = letter_can;
else
letter = '(' + letter_can + ')';
letter = '(' + comp_id + ')';
}
else if (cf.is_peptide(comp_id))
else if (auto i2 = compound_factory::kAAMap.find(comp_id); i2 != compound_factory::kAAMap.end())
{
c_type = "polypeptide(L)";
letter = letter_can = compound_factory::kAAMap.at(comp_id);
letter = letter_can = i2->second;
}
else if (iequals(c->type(), "D-PEPTIDE LINKING"))
{
c_type = "polypeptide(D)";
letter_can = c->one_letter_code();
if (letter_can == 0)
letter_can = 'X';
letter = '(' + comp_id + ')';
non_std_linkage = true;
@@ -841,9 +966,6 @@ void createEntityPoly(datablock &db)
c_type = "polypeptide(L)";
letter_can = c->one_letter_code();
if (letter_can == 0)
letter_can = 'X';
letter = '(' + comp_id + ')';
non_std_monomer = true;
@@ -853,9 +975,6 @@ void createEntityPoly(datablock &db)
// c_type = "other";
letter_can = c->one_letter_code();
if (letter_can == 0)
letter_can = 'X';
letter = '(' + comp_id + ')';
non_std_monomer = true;
@@ -868,7 +987,7 @@ void createEntityPoly(datablock &db)
}
seq[auth_asym_id] += letter;
seq_can[auth_asym_id] += letter_can;
seq_can[auth_asym_id] += letter_can ? letter_can : 'X';
if (find(pdb_strand_ids.begin(), pdb_strand_ids.end(), auth_asym_id) == pdb_strand_ids.end())
pdb_strand_ids.emplace_back(auth_asym_id);
@@ -1167,7 +1286,7 @@ void createPdbxNonpolyScheme(datablock &db)
for (int ndb_nr = 1; auto row : atom_site.find("label_entity_id"_key == entity_id and "label_comp_id"_key == comp_id))
{
// Skip existing records
auto linked = atom_site.get_linked(row, pdbx_nonpoly_scheme);
auto linked = atom_site.get_children(row, pdbx_nonpoly_scheme);
if (not linked.empty())
continue;
@@ -1242,6 +1361,101 @@ void createPdbxBranchScheme(datablock &db)
}
}
void reconstruct_index_for_category(const validator &validator, category &cat, datablock &db)
{
auto cv = validator.get_validator_for_category(cat.name());
enum class State
{
Start,
MissingKeys,
DuplicateKeys
} state = State::Start;
for (;;)
{
// See if we can build an index
try
{
cat.set_validator(&validator, db);
}
catch (const missing_key_error &ex)
{
if (state == State::MissingKeys)
{
if (cif::VERBOSE > 0)
std::clog << "Repairing failed for category " << cat.name() << ", missing keys remain: " << ex.what() << '\n';
throw;
}
state = State::MissingKeys;
auto key = ex.get_key();
if (cif::VERBOSE > 1)
std::clog << "Need to add key " << key << " to category " << cat.name() << '\n';
for (auto row : cat)
{
auto ord = row.get<std::string>(key.c_str());
if (ord.empty())
row.assign({ //
{ key, cat.get_unique_value(key) } });
}
continue;
}
catch (const duplicate_key_error &ex)
{
if (state == State::DuplicateKeys)
{
if (cif::VERBOSE > 0)
std::clog << "Repairing failed for category " << cat.name() << ", duplicate keys remain: " << ex.what() << '\n';
throw;
}
state = State::DuplicateKeys;
if (cif::VERBOSE > 0)
std::clog << "Attempt to fix " << cat.name() << " failed: " << ex.what() << '\n';
// replace items that do not define a relation to a parent
std::set<std::string> replaceableKeys;
for (auto key : cv->m_keys)
{
bool replaceable = true;
for (auto lv : validator.get_links_for_child(cat.name()))
{
if (find(lv->m_child_keys.begin(), lv->m_child_keys.end(), key) != lv->m_child_keys.end())
{
replaceable = false;
break;
}
}
if (replaceable)
replaceableKeys.insert(key);
}
if (replaceableKeys.empty())
throw std::runtime_error("Cannot repair category " + cat.name() + " since it contains duplicate keys that cannot be replaced");
for (auto key : replaceableKeys)
{
for (auto row : cat)
row.assign(key, cat.get_unique_value(key), false, false);
}
continue;
}
break;
}
}
bool reconstruct_pdbx(file &file)
{
if (file.empty())
@@ -1250,9 +1464,9 @@ bool reconstruct_pdbx(file &file)
auto &db = file.front();
if (auto ac = db.get("audit_conform"); ac != nullptr)
return reconstruct_pdbx(file, validator_factory::instance().get(*ac));
return reconstruct_pdbx(file, validator_factory::instance()[*ac]);
else
return reconstruct_pdbx(file, validator_factory::instance().get("mmcif_pdbx.dic"));
return reconstruct_pdbx(file, validator_factory::instance()["mmcif_pdbx.dic"]);
}
bool reconstruct_pdbx(file &file, const validator &validator)
@@ -1293,7 +1507,7 @@ bool reconstruct_pdbx(file &file, const validator &validator)
checkChemCompRecords(db);
// If the data is really horrible, it might not contain entities
if (not db["atom_site"].find_first(key("label_entity_id") != null))
if (db["atom_site"].find_first(key("label_entity_id") == null))
createEntityIDs(db);
// Now see if atom records make sense at all
@@ -1337,7 +1551,7 @@ bool reconstruct_pdbx(file &file, const validator &validator)
iv->m_type != nullptr and
iv->m_type->m_primitive_type == cif::DDL_PrimitiveType::Numb;
for (std::size_t ix = 0; auto row : cat)
for (int ix = 0; auto row : cat)
{
if (number)
row.assign(key, std::to_string(++ix), false, false);
@@ -1408,95 +1622,7 @@ bool reconstruct_pdbx(file &file, const validator &validator)
}
}
enum class State
{
Start,
MissingKeys,
DuplicateKeys
} state = State::Start;
for (;;)
{
// See if we can build an index
try
{
cat.set_validator(&validator, db);
}
catch (const missing_key_error &ex)
{
if (state == State::MissingKeys)
{
if (cif::VERBOSE > 0)
std::clog << "Repairing failed for category " << cat.name() << ", missing keys remain: " << ex.what() << '\n';
throw;
}
state = State::MissingKeys;
auto key = ex.get_key();
if (cif::VERBOSE > 0)
std::clog << "Need to add key " << key << " to category " << cat.name() << '\n';
for (auto row : cat)
{
auto ord = row.get<std::string>(key.c_str());
if (ord.empty())
row.assign({ //
{ key, cat.get_unique_value(key) } });
}
continue;
}
catch (const duplicate_key_error &ex)
{
if (state == State::DuplicateKeys)
{
if (cif::VERBOSE > 0)
std::clog << "Repairing failed for category " << cat.name() << ", duplicate keys remain: " << ex.what() << '\n';
throw;
}
state = State::DuplicateKeys;
if (cif::VERBOSE > 0)
std::clog << "Attempt to fix " << cat.name() << " failed: " << ex.what() << '\n';
// replace items that do not define a relation to a parent
std::set<std::string> replaceableKeys;
for (auto key : cv->m_keys)
{
bool replaceable = true;
for (auto lv : validator.get_links_for_child(cat.name()))
{
if (find(lv->m_child_keys.begin(), lv->m_child_keys.end(), key) != lv->m_child_keys.end())
{
replaceable = false;
break;
}
}
if (replaceable)
replaceableKeys.insert(key);
}
if (replaceableKeys.empty())
throw std::runtime_error("Cannot repair category " + cat.name() + " since it contains duplicate keys that cannot be replaced");
for (auto key : replaceableKeys)
{
for (auto row : cat)
row.assign(key, cat.get_unique_value(key), false, false);
}
continue;
}
break;
}
reconstruct_index_for_category(validator, cat, db);
}
catch (const std::exception &ex)
{
@@ -1525,9 +1651,8 @@ bool reconstruct_pdbx(file &file, const validator &validator)
checkAtomAnisotropRecords(db);
// Now create any missing categories
// Next make sure we have struct_asym records
if (auto cat = db.get("struct_asym"); cat == nullptr or cat->empty())
createStructAsym(db);
// Next make sure we have good struct_asym records
checkStructAsym(db);
if (auto cat = db.get("entity"); cat == nullptr or cat->empty())
createEntity(db);
@@ -1537,7 +1662,7 @@ bool reconstruct_pdbx(file &file, const validator &validator)
if (auto cat = db.get("ndb_poly_seq_scheme"); cat == nullptr or cat->empty())
comparePolySeqSchemes(db);
createPdbxNonpolyScheme(db);
// Create a minimal set of branch records
@@ -1564,9 +1689,9 @@ void fixup_pdbx(file &file)
auto &db = file.front();
if (auto ac = db.get("audit_conform"); ac != nullptr)
fixup_pdbx(file, validator_factory::instance().get(*ac));
fixup_pdbx(file, validator_factory::instance()[*ac]);
else
fixup_pdbx(file, validator_factory::instance().get("mmcif_pdbx.dic"));
fixup_pdbx(file, validator_factory::instance()["mmcif_pdbx.dic"]);
}
void fixup_pdbx(file &file, const validator &validator)
@@ -1577,11 +1702,16 @@ void fixup_pdbx(file &file, const validator &validator)
// assuming the first datablock contains the entry ...
auto &db = file.front();
if (auto cat = db.get("atom_site"); cat == nullptr or cat->empty())
throw std::runtime_error("Cannot reconstruct PDBx file, atom data missing");
// ... and any additional datablock will contain compound information
cif::compound_source cs(file);
if (auto cat = db.get("atom_site"); cat == nullptr or cat->empty())
throw std::runtime_error("Cannot reconstruct PDBx file, atom data missing");
// Be silent about missing compound info in fixup
auto &cf = compound_factory::instance();
bool save_report = cf.get_report_missing();
cf.set_report_missing(cif::VERBOSE > 1);
std::string entry_id;
@@ -1610,17 +1740,29 @@ void fixup_pdbx(file &file, const validator &validator)
if (not db["atom_site"].find_first(key("label_entity_id") != null))
createEntityIDs(db);
// Now see if atom records make sense at all
// Now see if atom records make sense at all, but in a silent way, this time
checkAtomRecords(db);
db["chem_comp"].reorder_by_index();
// See if we can easily reconstruct missing data fields in order to create an index
for (auto &cat : db)
{
try
{
cat.set_validator(&validator, db);
}
catch (const missing_key_error &)
{
reconstruct_index_for_category(validator, cat, db);
}
}
db.set_validator(&validator);
// Now create any missing categories
// Next make sure we have struct_asym records
if (auto cat = db.get("struct_asym"); cat == nullptr or cat->empty())
createStructAsym(db);
// Next make sure we have good struct_asym records
checkStructAsym(db);
if (auto cat = db.get("entity"); cat == nullptr or cat->empty())
createEntity(db);
@@ -1630,7 +1772,7 @@ void fixup_pdbx(file &file, const validator &validator)
if (auto cat = db.get("ndb_poly_seq_scheme"); cat == nullptr or cat->empty())
comparePolySeqSchemes(db);
createPdbxNonpolyScheme(db);
// Create a minimal set of branch records
@@ -1640,6 +1782,7 @@ void fixup_pdbx(file &file, const validator &validator)
checkEntities(db);
// That's it
cf.set_report_missing(save_report);
}
} // namespace cif::pdb

View File

@@ -25,6 +25,7 @@
*/
#include "cif++.hpp"
#include "cif++/validate.hpp"
namespace cif::pdb
{
@@ -61,17 +62,22 @@ condition get_parents_condition(const validator &validator, row_handle rh, const
result = std::move(result) or std::move(cond);
}
}
else if (cif::VERBOSE > 0)
std::cerr << "warning: no child to parent links were found for child " << childName << " and parent " << parentName << '\n';
return result;
}
bool is_valid_pdbx_file(const file &file)
{
std::error_code ec;
bool result = is_valid_pdbx_file(file, validator_factory::instance()["mmcif_pdbx.dic"], ec);
return result and not(bool) ec;
}
bool is_valid_pdbx_file(const file &file, const validator &v)
{
std::error_code ec;
bool result = is_valid_pdbx_file(file, v, ec);
return result and not (bool)ec;
return result and not(bool) ec;
}
bool is_valid_pdbx_file(const file &file, std::error_code &ec)
@@ -81,10 +87,10 @@ bool is_valid_pdbx_file(const file &file, std::error_code &ec)
if (file.empty())
ec = make_error_code(validation_error::empty_file);
else if (auto ac = file.front().get("audit_conform"); ac != nullptr)
result = is_valid_pdbx_file(file, validator_factory::instance().get(*ac), ec);
result = is_valid_pdbx_file(file, validator_factory::instance()[*ac], ec);
else
result = is_valid_pdbx_file(file, validator_factory::instance().get("mmcif_pdbx.dic"), ec);
result = is_valid_pdbx_file(file, validator_factory::instance()["mmcif_pdbx.dic"], ec);
return result;
}
@@ -92,7 +98,7 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
{
using namespace cif::literals;
bool result = true;
bool result = true, warned_missing_parents = false;
try
{
@@ -129,10 +135,18 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
if (not cf.is_monomer(comp_id))
continue;
auto p = pdbx_poly_seq_scheme.find(get_parents_condition(validator, r, pdbx_poly_seq_scheme));
auto cond = get_parents_condition(validator, r, pdbx_poly_seq_scheme);
if (not cond)
{
if (VERBOSE > 0 and std::exchange(warned_missing_parents, true) == false)
std::cerr << "warning: missing links for atom_site/pdbx_poly_seq_scheme\n";
continue;
}
auto p = pdbx_poly_seq_scheme.find(std::move(cond));
if (p.size() != 1)
{
if (cif::VERBOSE > 0)
if (VERBOSE > 0)
std::clog << "In atom_site record: " << r["id"].text() << '\n';
throw std::runtime_error("For each monomer in atom_site there should be exactly one pdbx_poly_seq_scheme record");
}
@@ -161,7 +175,7 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
const auto entity_poly_type = entity_poly.find1<std::string>("entity_id"_key == entity_id, "type");
std::map<int,std::set<std::string>> mon_per_seq_id;
std::map<int, std::set<std::string>> mon_per_seq_id;
for (const auto &[num, mon_id, hetero] : entity_poly_seq.find<int, std::string, bool>("entity_id"_key == entity_id, "num", "mon_id", "hetero"))
{
@@ -196,28 +210,37 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
throw std::runtime_error("Mismatch between the hetero flag in the poly seq schemes and the number residues per seq_id");
}
for (const auto &[seq_id, mon_ids] : mon_per_seq_id)
{
for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
{
condition cond;
for (auto mon_id : mon_ids)
cond = std::move(cond) or "label_comp_id"_key == mon_id;
// This code proved to take too much time ...
cond = "label_entity_id"_key == entity_id and
"label_asym_id"_key == asym_id and
"label_seq_id"_key == seq_id and not std::move(cond);
if (atom_site.contains(std::move(cond)))
throw std::runtime_error("An atom_site record exists that has no parent in the poly seq scheme categories");
}
// for (const auto &[seq_id, mon_ids] : mon_per_seq_id)
// {
// for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
// {
// condition cond;
// for (auto mon_id : mon_ids)
// cond = std::move(cond) or "label_comp_id"_key == mon_id;
// cond = "label_entity_id"_key == entity_id and
// "label_asym_id"_key == asym_id and
// "label_seq_id"_key == seq_id and not std::move(cond);
// if (atom_site.contains(std::move(cond)))
// throw std::runtime_error("An atom_site record exists that has no parent in the poly seq scheme categories");
// }
// }
// ... so we're using this instead, should be almost the same...
for (const auto &[comp_id, seq_id] :
atom_site.find<std::string, int>("label_entity_id"_key == entity_id, "label_comp_id", "label_seq_id"))
{
if (not mon_per_seq_id[seq_id].contains(comp_id))
throw std::runtime_error("An atom_site record exists that has no parent in the poly seq scheme categories");
}
auto &&[seq, seq_can] = entity_poly.find1<std::optional<std::string>, std::optional<std::string>>("entity_id"_key == entity_id,
"pdbx_seq_one_letter_code", "pdbx_seq_one_letter_code_can");
std::string::const_iterator si, sci, se, sce;
auto seq_match = [&](bool can, std::string::const_iterator si, std::string::const_iterator se)
{
@@ -254,8 +277,8 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
else
letter = '(' + comp_id + ')';
}
if (iequals(std::string{si, si + letter.length()}, letter))
if (iequals(std::string{ si, si + letter.length() }, letter))
{
match = true;
si += letter.length();
@@ -274,12 +297,14 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
if (not seq.has_value())
{
if (cif::VERBOSE > 0)
if (VERBOSE > 0)
std::clog << "Warning: entity_poly has no sequence for entity_id " << entity_id << '\n';
}
else
{
seq->erase(std::remove_if(seq->begin(), seq->end(), [](char ch) { return std::isspace(ch); }), seq->end());
seq->erase(std::remove_if(seq->begin(), seq->end(), [](char ch)
{ return std::isspace(ch); }),
seq->end());
if (not seq_match(false, seq->begin(), seq->end()))
throw std::runtime_error("Sequences do not match for entity " + entity_id);
@@ -287,12 +312,14 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
if (not seq_can.has_value())
{
if (cif::VERBOSE > 1)
if (VERBOSE > 1)
std::clog << "Warning: entity_poly has no canonical sequence for entity_id " << entity_id << '\n';
}
else
{
seq_can->erase(std::remove_if(seq_can->begin(), seq_can->end(), [](char ch) { return std::isspace(ch); }), seq_can->end());
seq_can->erase(std::remove_if(seq_can->begin(), seq_can->end(), [](char ch)
{ return std::isspace(ch); }),
seq_can->end());
if (not seq_match(true, seq_can->begin(), seq_can->end()))
throw std::runtime_error("Canonical sequences do not match for entity " + entity_id);
@@ -304,16 +331,15 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
catch (const std::exception &ex)
{
result = false;
if (cif::VERBOSE > 0)
if (VERBOSE > 0)
std::clog << ex.what() << '\n';
ec = make_error_code(validation_error::not_valid_pdbx);
}
if (not result and (bool)ec)
if (not result and (bool) ec)
ec = make_error_code(validation_error::not_valid_pdbx);
return result;
}
} // namespace cif::pdb

View File

@@ -25,17 +25,19 @@
*/
#include "cif++/point.hpp"
#include "cif++/matrix.hpp"
#include <cassert>
#include <random>
#include "cif++/matrix.hpp" // for matrix_subtraction, matrix_cofactors
#include <initializer_list>
#include <random> // for uniform_real_distribution, normal_distri...
#include <stdexcept>
namespace cif
{
// --------------------------------------------------------------------
template<typename T>
template <typename T>
quaternion_type<T> normalize(quaternion_type<T> q)
{
std::valarray<double> t(4);
@@ -126,10 +128,9 @@ quaternion construct_for_dihedral_angle(point p1, point p2, point p3, point p4,
p4 -= p3;
p3 -= p3;
quaternion q;
auto axis = -p2;
float dh = dihedral_angle(p1, p2, p3, p4);
return construct_from_angle_axis(angle - dh, axis);
}
@@ -293,9 +294,9 @@ quaternion align_points(const std::vector<point> &pa, const std::vector<point> &
}
quaternion q(
static_cast<float>(cf(maxR, 0)),
static_cast<float>(cf(maxR, 1)),
static_cast<float>(cf(maxR, 2)),
static_cast<float>(cf(maxR, 0)),
static_cast<float>(cf(maxR, 1)),
static_cast<float>(cf(maxR, 2)),
static_cast<float>(cf(maxR, 3)));
q = normalize(q);
@@ -327,4 +328,251 @@ point nudge(point p, float offset)
return p + r;
}
// --------------------------------------------------------------------
std::tuple<point, float> smallest_sphere_around_2_points(std::array<cif::point, 2> pts)
{
return { (pts[0] + pts[1]) / 2, distance(pts[0], pts[1]) / 2 };
}
std::tuple<point, float> smallest_sphere_around_3_points(std::array<cif::point, 3> pts)
{
// Find two bisectors
auto vz = cross_product(pts[1] - pts[0], pts[2] - pts[0]);
auto bs1 = cross_product(vz, pts[1] - pts[0]);
bs1.normalize();
auto v1 = (pts[1] - pts[0]);
v1.normalize();
auto s1 = pts[0] + (distance(pts[1], pts[0]) / 2) * v1;
auto bs2 = cross_product(vz, pts[2] - pts[0]);
bs2.normalize();
auto v2 = (pts[2] - pts[0]);
v2.normalize();
auto s2 = pts[0] + (distance(pts[2], pts[0]) / 2) * v2;
auto c = line_line_intersection(s1, s1 + bs1, s2, s2 + bs2);
if (c)
return { *c, distance(*c, pts[0]) };
// Colinear points I guess, try something else
auto l1 = distance_squared(pts[0], pts[1]);
auto l2 = distance_squared(pts[0], pts[2]);
auto l3 = distance_squared(pts[1], pts[2]);
if (l1 > l2 and l1 > l3)
return smallest_sphere_around_2_points({ pts[0], pts[1] });
else if (l2 > l1 and l2 > l3)
return smallest_sphere_around_2_points({ pts[0], pts[2] });
else
return smallest_sphere_around_2_points({ pts[1], pts[2] });
}
std::tuple<point, float> smallest_sphere_around_4_points(std::array<cif::point, 4> pts)
{
auto t0 = -norm_squared(pts[0]);
auto t1 = -norm_squared(pts[1]);
auto t2 = -norm_squared(pts[2]);
auto t3 = -norm_squared(pts[3]);
// clang-format off
matrix4x4<float> Tm({
pts[0].m_x, pts[0].m_y, pts[0].m_z, 1,
pts[1].m_x, pts[1].m_y, pts[1].m_z, 1,
pts[2].m_x, pts[2].m_y, pts[2].m_z, 1,
pts[3].m_x, pts[3].m_y, pts[3].m_z, 1
});
auto T = determinant(Tm);
if (T != 0)
{
matrix4x4<float> Dm({
t0, pts[0].m_y, pts[0].m_z, 1,
t1, pts[1].m_y, pts[1].m_z, 1,
t2, pts[2].m_y, pts[2].m_z, 1,
t3, pts[3].m_y, pts[3].m_z, 1
});
auto D = determinant(Dm) / T;
matrix4x4<float> Em({
pts[0].m_x, t0, pts[0].m_z, 1,
pts[1].m_x, t1, pts[1].m_z, 1,
pts[2].m_x, t2, pts[2].m_z, 1,
pts[3].m_x, t3, pts[3].m_z, 1
});
auto E = determinant(Em) / T;
matrix4x4<float> Fm({
pts[0].m_x, pts[0].m_y, t0, 1,
pts[1].m_x, pts[1].m_y, t1, 1,
pts[2].m_x, pts[2].m_y, t2, 1,
pts[3].m_x, pts[3].m_y, t3, 1
});
auto F = determinant(Fm) / T;
matrix4x4<float> Gm({
pts[0].m_x, pts[0].m_y, pts[0].m_z, t0,
pts[1].m_x, pts[1].m_y, pts[1].m_z, t1,
pts[2].m_x, pts[2].m_y, pts[2].m_z, t2,
pts[3].m_x, pts[3].m_y, pts[3].m_z, t3
});
auto G = determinant(Gm) / T;
point center{ -D / 2, -E / 2, -F / 2 };
float radius = std::sqrt(D * D + E * E + F * F - 4 * G) / 2;
// clang-format on
return { center, radius };
}
// Perhaps some colinear points, try something else:
for (auto ix : std::initializer_list<std::array<size_t, 4>>{
{ 1, 2, 3, 0 },
{ 0, 2, 3, 1 },
{ 0, 1, 3, 2 },
{ 0, 1, 2, 3 },
})
{
auto [center, radius] =
smallest_sphere_around_3_points({ pts[ix[0]], pts[ix[1]], pts[ix[2]] });
if (distance(pts[ix[3]], center) <= radius)
return { center, radius };
}
assert(false);
exit(1);
}
std::tuple<point, float> smallest_sphere_around_all_points(std::vector<point> P, std::vector<point> R)
{
if (P.empty() or R.size() == 4)
{
switch (R.size())
{
case 1:
return { R[0], 0 };
case 2:
return smallest_sphere_around_2_points({ R[0], R[1] });
case 3:
return smallest_sphere_around_3_points({ R[0], R[1], R[2] });
case 4:
return smallest_sphere_around_4_points({ R[0], R[1], R[2], R[3] });
default:
assert(false);
}
}
auto p = P.back();
P.pop_back();
auto [c, r] = smallest_sphere_around_all_points(P, R);
assert(not std::isnan(r));
if (distance(c, p) <= r)
return { c, r };
R.emplace_back(p);
return smallest_sphere_around_all_points(P, R);
}
bool point_in_circle(point p, std::vector<point> c)
{
switch (c.size())
{
case 0:
return false;
case 1:
return p == c.front();
case 2:
{
auto [center, radius] = smallest_sphere_around_2_points({ c[0], c[1] });
return cif::distance_squared(p, center) <= radius * radius;
}
case 3:
{
auto [center, radius] = smallest_sphere_around_3_points({ c[0], c[1], c[2] });
return cif::distance_squared(p, center) <= radius * radius;
}
case 4:
{
auto [center, radius] = smallest_sphere_around_4_points({ c[0], c[1], c[2], c[3] });
return cif::distance_squared(p, center) <= radius * radius;
}
default:
assert(false);
throw std::runtime_error("Error finding smallest sphere");
}
}
std::tuple<point, float> smallest_sphere_around_points(std::vector<point> pts)
{
std::random_device rd;
std::mt19937 g(rd());
std::shuffle(pts.begin(), pts.end(), g);
std::vector<size_t> cix;
auto cirle_points = [&]()
{
std::vector<point> result;
for (auto ix : cix)
result.emplace_back(pts[ix]);
return result;
};
size_t i = 0;
while (i < pts.size())
{
if (std::find(cix.begin(), cix.end(), i) != cix.end() or
point_in_circle(pts[i], cirle_points()))
{
++i;
}
else
{
cix.erase(std::remove_if(cix.begin(), cix.end(), [i](size_t j)
{ return j < i; }),
cix.end());
cix.push_back(i);
if (cix.size() < 4)
i = 0;
else
++i;
}
}
switch (cix.size())
{
case 1:
return { pts[cix[0]], 0 };
case 2:
return smallest_sphere_around_2_points({ pts[cix[0]], pts[cix[1]] });
case 3:
return smallest_sphere_around_3_points({ pts[cix[0]], pts[cix[1]], pts[cix[2]] });
case 4:
return smallest_sphere_around_4_points({ pts[cix[0]], pts[cix[1]], pts[cix[2]], pts[cix[3]] });
default:
assert(false);
throw std::runtime_error("Error finding smallest sphere");
}
}
} // namespace cif

265928
src/sqlite3/sqlite3.c Normal file

File diff suppressed because it is too large Load Diff

13972
src/sqlite3/sqlite3.h Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -32,6 +32,11 @@
#include "symop_table_data.hpp"
#if defined(_MSC_VER)
#pragma warning (disable : 5054) // warning C5054: operator '&': deprecated between enumerations of different types
#pragma warning (disable : 4127) // conditional expression is constant
#endif
#include <Eigen/Eigen>
namespace cif
@@ -90,10 +95,10 @@ float cell::get_volume() const
auto cos_beta = std::cos(beta);
auto cos_gamma = std::cos(gamma);
auto vol = m_a * m_b * m_c;
double vol = m_a * m_b * m_c;
vol *= std::sqrt(1.0f - cos_alpha * cos_alpha - cos_beta * cos_beta - cos_gamma * cos_gamma + 2.0f * cos_alpha * cos_beta * cos_gamma);
return vol;
return static_cast<float>(vol);
}
// --------------------------------------------------------------------

View File

@@ -28,6 +28,11 @@
#include <algorithm>
#include <cassert>
#include <charconv>
#if __has_include("fast_float/fast_float.h")
#include "fast_float/fast_float.h"
#endif
namespace cif
{
@@ -512,4 +517,32 @@ std::vector<std::string> word_wrap(const std::string &text, std::size_t width)
return result;
}
#if __has_include("fast_float/fast_float.h")
template <typename T>
std::from_chars_result ff_charconv<T, typename std::enable_if_t<std::is_floating_point_v<T>>>::from_chars(const char *a, const char *b, T &v)
{
auto r = fast_float::from_chars(a, b, v);
return { r.ptr, r.ec };
}
template struct ff_charconv<float>;
template struct ff_charconv<double>;
// template struct ff_charconv<long double>;
#ifdef __STDCPP_FLOAT64_T__
template struct ff_charconv<std::float64_t>;
#endif
#ifdef __STDCPP_FLOAT32_T__
template struct ff_charconv<std::float32_t>;
#endif
#ifdef __STDCPP_FLOAT16_T__
template struct ff_charconv<std::float16_t>;
#endif
#ifdef __STDCPP_BFLOAT16_T__
template struct ff_charconv<std::bfloat16_t>;
#endif
#endif
} // namespace cif

View File

@@ -34,14 +34,20 @@
#include <condition_variable>
#include <cstring>
#include <deque>
#include <format>
#include <fstream>
#include <functional>
#include <iomanip>
#include <iostream>
#include <map>
#include <mutex>
#include <sstream>
#include <string>
#include <thread>
#include <utility>
#if __cpp_lib_jthread >= 201911L
#include <stop_token>
#endif
namespace fs = std::filesystem;
@@ -65,27 +71,50 @@ std::string get_version_nr()
#if defined(_WIN32) or defined(__MINGW32__)
}
#include <windows.h>
#include <libloaderapi.h>
#include <wincon.h>
#include <codecvt>
// clang-format off
# include <windows.h>
# include <libloaderapi.h>
# include <wincon.h>
// clang-format on
namespace cif
{
uint32_t get_terminal_width()
{
CONSOLE_SCREEN_BUFFER_INFO csbi;
::GetConsoleScreenBufferInfo(::GetStdHandle(STD_OUTPUT_HANDLE), &csbi);
return csbi.srWindow.Right - csbi.srWindow.Left + 1;
CONSOLE_SCREEN_BUFFER_INFO csbi;
return ::GetConsoleScreenBufferInfo(::GetStdHandle(STD_OUTPUT_HANDLE), &csbi)
? csbi.srWindow.Right - csbi.srWindow.Left + 1
: 80;
}
void write_to_console(const std::string &s)
{
auto h = ::GetStdHandle(STD_OUTPUT_HANDLE);
CONSOLE_SCREEN_BUFFER_INFO csbi;
if (auto l = ::MultiByteToWideChar(CP_UTF8, 0, s.data(), s.length(), nullptr, 0);
l > 0 and ::GetConsoleScreenBufferInfo(::GetStdHandle(STD_OUTPUT_HANDLE), &csbi))
{
std::u16string ws(l, 0);
::MultiByteToWideChar(CP_UTF8, 0, s.data(), s.length(), (LPWSTR)ws.data(), l);
DWORD w;
::WriteConsoleW(h, ws.data(), ws.length(), &w, nullptr);
}
else
{
std::cout.write(s.data(), s.length());
std::cout.flush();
}
}
#else
#include <sys/ioctl.h>
#include <termios.h>
#include <limits.h>
# include <limits.h>
# include <sys/ioctl.h>
# include <termios.h>
uint32_t get_terminal_width()
{
@@ -100,59 +129,220 @@ uint32_t get_terminal_width()
return result;
}
inline void write_to_console(const std::string &s)
{
std::cout << s << std::flush;
}
#endif
// --------------------------------------------------------------------
struct progress_bar_impl
{
progress_bar_impl(int64_t inMax, const std::string &inAction)
: m_max_value(inMax)
progress_bar_impl(uint64_t max_value, const std::string &message)
: m_max_value(max_value)
, m_consumed(0)
, m_action(inAction)
, m_message(inAction)
, m_thread(std::bind(&progress_bar_impl::run, this))
, m_action(message)
, m_message(message)
{
}
progress_bar_impl(const progress_bar_impl&) = delete;
progress_bar_impl &operator=(const progress_bar_impl &) = delete;
virtual ~progress_bar_impl() {}
~progress_bar_impl();
void run();
void consumed(int64_t n);
void progress(int64_t p);
void message(const std::string &msg);
void print_progress();
void print_done();
virtual void consumed(uint64_t n);
virtual void progress(uint64_t p);
virtual void message(const std::string &msg);
virtual void print_done();
using time_point = std::chrono::time_point<std::chrono::system_clock>;
int64_t m_max_value;
std::atomic<int64_t> m_consumed;
int64_t m_last_consumed = 0;
int m_spinner_index = 0;
uint64_t m_max_value;
std::atomic<uint64_t> m_consumed;
std::string m_action, m_message;
std::mutex m_mutex;
std::thread m_thread;
time_point m_start = std::chrono::system_clock::now();
time_point m_last = std::chrono::system_clock::now();
bool m_stop = false;
};
progress_bar_impl::~progress_bar_impl()
void progress_bar_impl::consumed(uint64_t n)
{
m_consumed += n;
}
void progress_bar_impl::progress(uint64_t p)
{
m_consumed = p;
}
void progress_bar_impl::message(const std::string &msg)
{
m_message = msg;
}
void progress_bar_impl::print_done()
{
std::chrono::duration<double> elapsed = std::chrono::system_clock::now() - m_start;
std::string days, hours, minutes, seconds;
uint64_t s = static_cast<uint64_t>(std::trunc(elapsed.count()));
if (s > 24 * 60 * 60)
{
days = std::format("{:d}d ", s / (24 * 60 * 60));
s %= 24 * 60 * 60;
}
if (s > 60 * 60)
{
hours = std::format("{:2d}h ", s / (60 * 60));
s %= 60 * 60;
}
if (s > 60)
{
minutes = std::format("{:2d}m ", s / 60);
s %= 60;
}
std::string msg = std::format("{} done in {}{}{}{:.1f}s", m_action, days, hours, minutes, s + 1e-6 * (elapsed.count() - s));
uint32_t width = get_terminal_width();
if (msg.length() < width)
msg += std::string(width - msg.length(), ' ');
write_to_console(msg += '\n');
}
// --------------------------------------------------------------------
struct simple_progress_bar_impl : public progress_bar_impl
{
simple_progress_bar_impl(uint64_t max_value, const std::string &message)
: progress_bar_impl(max_value, message)
{
}
~simple_progress_bar_impl()
{
if (m_printed_any)
print_done();
}
void consumed(uint64_t n) override
{
using namespace std::literals;
progress_bar_impl::consumed(n);
// print at most 10 steps, but only if it took long enough
int percentile = static_cast<int>(std::floor(10.f * m_consumed / m_max_value));
if (percentile > m_last_percentile and (m_printed_any or std::chrono::system_clock::now() - m_start >= 1s))
{
if (not std::exchange(m_printed_any, true))
write_to_console(m_action + ": ");
write_to_console(std::format("...{:d}0%", percentile));
m_last_percentile = percentile;
}
}
void progress(uint64_t p) override
{
consumed(p - m_consumed);
}
void print_done() override
{
if (m_printed_any)
{
write_to_console("\n");
progress_bar_impl::print_done();
}
}
bool m_printed_any = false;
int m_last_percentile = 0;
};
// --------------------------------------------------------------------
struct fancy_progress_bar_impl : public progress_bar_impl
{
fancy_progress_bar_impl(uint64_t max_value, const std::string &message)
: progress_bar_impl(max_value, message)
, m_thread(
#if __cpp_lib_jthread >= 201911L
[this](std::stop_token stoken)
{ this->run(stoken); }
#else
[this]()
{ this->run(); }
#endif
)
{
}
~fancy_progress_bar_impl();
#if __cpp_lib_jthread >= 201911L
void run(std::stop_token stoken);
#else
void run();
#endif
void consumed(uint64_t n) override;
void progress(uint64_t p) override;
void message(const std::string &msg) override;
void print_progress();
std::mutex m_mutex;
std::condition_variable m_cv;
float m_progress;
uint32_t m_width, m_bar_width;
uint32_t m_steps, m_last_steps = 0;
uint64_t m_last_consumed = 0;
#if __cpp_lib_jthread >= 201911L
std::jthread m_thread;
#else
std::thread m_thread;
bool m_stop = false;
#endif
};
const char *kBlocks[] = {
" ",
"",
"",
"",
"",
"",
"",
"",
"",
};
const size_t kBlockCount = sizeof(kBlocks) / sizeof(void *) - 1;
fancy_progress_bar_impl::~fancy_progress_bar_impl()
{
using namespace std::literals;
assert(m_thread.joinable());
#if __cpp_lib_jthread >= 201911L
m_thread.request_stop();
#else
m_stop = true;
#endif
m_thread.join();
}
void progress_bar_impl::run()
#if __cpp_lib_jthread >= 201911L
void fancy_progress_bar_impl::run(std::stop_token stoken)
#else
void fancy_progress_bar_impl::run()
#endif
{
using namespace std::literals;
@@ -160,25 +350,44 @@ void progress_bar_impl::run()
try
{
while (not m_stop)
for (;;)
{
std::unique_lock lock(m_mutex);
m_cv.wait_for(lock, 25ms);
#if __cpp_lib_jthread >= 201911L
if (stoken.stop_requested())
break;
#else
if (m_stop)
break;
#endif
auto now = std::chrono::system_clock::now();
if (now - m_start < 2s or now - m_last < 100ms)
{
std::this_thread::sleep_for(10ms);
if (m_consumed == m_last_consumed or now - m_start < 1s)
continue;
}
std::lock_guard lock(m_mutex);
m_last_consumed = m_consumed;
if (not printedAny and isatty(STDOUT_FILENO))
std::cout << "\x1b[?25l";
// See if we need to do work
m_width = get_terminal_width();
m_progress = static_cast<float>(m_consumed) / m_max_value;
m_bar_width = 7 * m_width / 10; // 70% of the width of the terminal
m_steps = static_cast<uint32_t>(std::ceil(m_progress * m_bar_width * kBlockCount));
if (m_steps == m_last_steps)
continue;
m_last_steps = m_steps;
if (not printedAny)
write_to_console("\x1b[?25l");
print_progress();
printedAny = true;
m_last = std::chrono::system_clock::now();
}
}
catch (...)
@@ -187,161 +396,94 @@ void progress_bar_impl::run()
if (printedAny)
{
write_to_console("\r\x1b[?25h");
print_done();
if (isatty(STDOUT_FILENO))
std::cout << "\x1b[?25h";
}
}
void progress_bar_impl::consumed(int64_t n)
void fancy_progress_bar_impl::consumed(uint64_t n)
{
m_consumed += n;
progress_bar_impl::consumed(n);
// m_cv.notify_one();
}
void progress_bar_impl::progress(int64_t p)
void fancy_progress_bar_impl::progress(uint64_t p)
{
m_consumed = p;
progress_bar_impl::progress(p);
// m_cv.notify_one();
}
void progress_bar_impl::message(const std::string &msg)
void fancy_progress_bar_impl::message(const std::string &msg)
{
std::unique_lock lock(m_mutex);
m_message = msg;
progress_bar_impl::message(msg);
// m_cv.notify_one();
}
const char* kSpinner[] = {
// ".", "o", "O", "0", "O", "o", ".", " "
// "⢄", "⢂", "⢁", "⡁", "⡈", "⡐", "⡠"
".", "o", "O", "0", "@", "*", " "
};
const std::size_t kSpinnerCount = sizeof(kSpinner) / sizeof(char*);
const int kSpinnerTimeInterval = 100;
const uint32_t kMinBarWidth = 40, kMinMsgWidth = 12;
void progress_bar_impl::print_progress()
void fancy_progress_bar_impl::print_progress()
{
const char *kBlocks[] = {
// "▯", // 0
// "▮", // 1
"=",
"-"
};
const uint32_t pct_width = 5;
uint32_t msg_width = m_width - m_bar_width - pct_width - 1;
uint32_t width = get_terminal_width();
float progress = static_cast<float>(m_consumed) / m_max_value;
if (width < kMinBarWidth)
std::cout << (100 * progress) << "%\n";
else
if (msg_width < kMinMsgWidth)
{
uint32_t bar_width = 7 * width / 10;
uint32_t pct_width = 7;
uint32_t msg_width = width - bar_width - pct_width - 1;
m_bar_width += kMinMsgWidth - msg_width;
msg_width = kMinMsgWidth;
}
if (msg_width < kMinMsgWidth)
{
bar_width += kMinMsgWidth - msg_width;
msg_width = kMinMsgWidth;
}
std::string bar;
bar.reserve(m_bar_width * 4);
std::ostringstream msg;
if (m_message.length() <= msg_width)
{
msg << m_message;
if (m_message.length() < msg_width)
msg << std::string(msg_width - m_message.length(), ' ');
}
for (uint32_t i = 0; i < m_bar_width; ++i)
{
if (i * kBlockCount <= m_steps)
bar += kBlocks[kBlockCount];
else if (i * kBlockCount > m_steps + kBlockCount)
bar += kBlocks[0];
else
msg << m_message.substr(0, msg_width - 3) << "...";
msg << ' ';
uint32_t pi = static_cast<uint32_t>(std::ceil(progress * bar_width));
for (uint32_t i = 0; i < bar_width; ++i)
msg << kBlocks[i > pi ? 1 : 0];
msg << ' ';
msg << std::setw(3) << static_cast<int>(std::ceil(progress * 100)) << "% ";
auto now = std::chrono::system_clock::now();
m_spinner_index = (std::chrono::duration_cast<std::chrono::milliseconds>(now - m_start).count() / kSpinnerTimeInterval) % kSpinnerCount;
msg << kSpinner[m_spinner_index];
std::cout << '\r' << msg.str();
std::cout.flush();
bar += kBlocks[1 + m_steps % kBlockCount];
}
}
namespace
{
std::ostream &operator<<(std::ostream &os, const std::chrono::duration<double> &t)
// make the bar more colorfull
struct color_type
{
uint64_t s = static_cast<uint64_t>(std::trunc(t.count()));
if (s > 24 * 60 * 60)
{
auto days = s / (24 * 60 * 60);
os << days << "d ";
s %= 24 * 60 * 60;
}
uint8_t r, g, b;
} fg{ 0, 3, 5 }, bg{ 0, 1, 2 };
if (s > 60 * 60)
{
auto hours = s / (60 * 60);
os << hours << "h ";
s %= 60 * 60;
}
auto esc_1 = std::format("\x1b[38;5;{}m\x1b[48;5;{}m",
16 + (fg.r * 36) + (fg.g * 6) + fg.b,
16 + (bg.r * 36) + (bg.g * 6) + bg.b);
std::string esc_2("\x1b[0m");
if (s > 60)
{
auto minutes = s / 60;
os << minutes << "m ";
s %= 60;
}
bar = esc_1 + bar + esc_2;
double ss = s + 1e-6 * (t.count() - s);
std::string msg = m_message.length() <= msg_width
? m_message
: m_message.substr(0, msg_width - 3) + "...";
os << std::fixed << std::setprecision(1) << ss << 's';
return os;
}
} // namespace
void progress_bar_impl::print_done()
{
std::chrono::duration<double> elapsed = std::chrono::system_clock::now() - m_start;
std::ostringstream msgstr;
msgstr << m_action << " done in " << elapsed << " seconds";
auto msg = msgstr.str();
uint32_t width = get_terminal_width();
if (msg.length() < width)
msg += std::string(width - msg.length(), ' ');
std::cout << '\r' << msg << '\n';
write_to_console(std::format("{:{}} {} {:3d}%\r", msg, msg_width, bar,
static_cast<int>(std::ceil(m_progress * 100))));
}
progress_bar::progress_bar(int64_t inMax, const std::string &inAction)
// --------------------------------------------------------------------
progress_bar::progress_bar(int64_t max_value, const std::string &message)
: m_impl(nullptr)
{
if (isatty(STDOUT_FILENO) and VERBOSE >= 0)
m_impl = new progress_bar_impl(inMax, inAction);
if (VERBOSE >= 0)
{
if (isatty(STDOUT_FILENO) and get_terminal_width() > kMinBarWidth)
m_impl = new fancy_progress_bar_impl(max_value, message);
else
m_impl = new simple_progress_bar_impl(max_value, message);
}
}
progress_bar::~progress_bar()
{
delete m_impl;
flush();
}
void progress_bar::consumed(int64_t inConsumed)
@@ -350,16 +492,25 @@ void progress_bar::consumed(int64_t inConsumed)
m_impl->consumed(inConsumed);
}
void progress_bar::progress(int64_t inProgress)
void progress_bar::progress(int64_t value)
{
if (m_impl != nullptr)
m_impl->progress(inProgress);
m_impl->progress(value);
}
void progress_bar::message(const std::string &inMessage)
void progress_bar::message(const std::string &message)
{
if (m_impl != nullptr)
m_impl->message(inMessage);
m_impl->message(message);
}
void progress_bar::flush()
{
if (m_impl)
{
delete m_impl;
m_impl = nullptr;
}
}
} // namespace cif
@@ -387,13 +538,13 @@ struct rsrc_imp
#if _WIN32
#if __MINGW32__
# if __MINGW32__
extern "C" __attribute__((weak, alias("gResourceIndexDefault"))) const mrsrc::rsrc_imp gResourceIndex[];
extern "C" __attribute__((weak, alias("gResourceDataDefault"))) const char gResourceData[];
extern "C" __attribute__((weak, alias("gResourceNameDefault"))) const char gResourceName[];
#else
# else
extern "C" const mrsrc::rsrc_imp *gResourceIndexDefault[1] = {};
extern "C" const char *gResourceDataDefault[1] = {};
@@ -403,11 +554,11 @@ extern "C" const mrsrc::rsrc_imp gResourceIndex[];
extern "C" const char gResourceData[];
extern "C" const char gResourceName[];
#pragma comment(linker, "/alternatename:gResourceIndex=gResourceIndexDefault")
#pragma comment(linker, "/alternatename:gResourceData=gResourceDataDefault")
#pragma comment(linker, "/alternatename:gResourceName=gResourceNameDefault")
# pragma comment(linker, "/alternatename:gResourceIndex=gResourceIndexDefault")
# pragma comment(linker, "/alternatename:gResourceData=gResourceDataDefault")
# pragma comment(linker, "/alternatename:gResourceName=gResourceNameDefault")
#endif
# endif
#else
extern const __attribute__((weak)) mrsrc::rsrc_imp gResourceIndex[];

View File

@@ -25,27 +25,25 @@
*/
#include "cif++/validate.hpp"
#include "cif++/category.hpp"
#include "cif++/dictionary_parser.hpp"
#include "cif++/gzio.hpp"
#include "cif++/utilities.hpp"
#include <cassert>
#include <fstream>
#include <format>
#include <iomanip>
#include <iostream>
#include <mutex>
#include <stdexcept>
// The validator depends on regular expressions. Unfortunately,
// the implementation of std::regex in g++ is buggy and crashes
// on reading the pdbx dictionary. Therefore, in case g++ is used
// the code will use boost::regex instead.
// on reading the pdbx dictionary. We used to use boost regex
// instead but using pcre2 is even easier and faster.
#if USE_BOOST_REGEX
# include <boost/regex.hpp>
using boost::regex;
#else
# include <regex>
using std::regex;
#endif
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>
namespace cif
{
@@ -67,14 +65,63 @@ validation_exception::validation_exception(std::error_code ec, std::string_view
// --------------------------------------------------------------------
struct regex_impl : public regex
struct regex_impl
{
regex_impl(std::string_view rx)
: regex(rx.begin(), rx.end(), regex::extended | regex::optimize)
{
}
regex_impl(std::string_view rx);
~regex_impl();
regex_impl(const regex_impl &) = delete;
regex_impl &operator=(const regex_impl &) = delete;
bool match(std::string_view v) const;
private:
pcre2_code *m_rx = nullptr;
pcre2_match_data *m_data = nullptr;
mutable std::mutex m_mutex;
};
regex_impl::regex_impl(std::string_view rx)
{
int err_code;
size_t err_offset;
m_rx = pcre2_compile((PCRE2_SPTR)rx.data(), rx.length(), 0, &err_code, &err_offset, nullptr);
if (m_rx == nullptr)
{
PCRE2_UCHAR buffer[256];
int n = pcre2_get_error_message(err_code, buffer, sizeof(buffer));
throw std::runtime_error(std::string("PCRE2 compilation failed: ") + std::string{ (char *)buffer, (char *)buffer + n });
}
m_data = pcre2_match_data_create_from_pattern(m_rx, nullptr);
}
regex_impl::~regex_impl()
{
std::unique_lock lock(m_mutex);
if (m_data)
pcre2_match_data_free(m_data);
if (m_rx)
pcre2_code_free(m_rx);
}
bool regex_impl::match(std::string_view v) const
{
std::unique_lock lock(m_mutex);
bool result = false;
if (int rc = pcre2_match(m_rx, (PCRE2_SPTR)v.data(), v.length(), 0, 0, m_data, nullptr); rc >= 0)
result = true;
else if (rc != PCRE2_ERROR_NOMATCH)
std::cerr << "Error matching with pcre\n";
return result;
}
// --------------------------------------------------------------------
DDL_PrimitiveType map_to_primitive_type(std::string_view s, std::error_code &ec) noexcept
@@ -142,8 +189,8 @@ int type_validator::compare(std::string_view a, std::string_view b) const
std::from_chars_result ra, rb;
ra = selected_charconv<double>::from_chars(a.data(), a.data() + a.length(), da);
rb = selected_charconv<double>::from_chars(b.data(), b.data() + b.length(), db);
ra = from_chars(a.data(), a.data() + a.length(), da);
rb = from_chars(b.data(), b.data() + b.length(), db);
if (not(bool) ra.ec and not(bool) rb.ec)
{
@@ -224,7 +271,7 @@ void item_validator::operator()(std::string_view value) const
{
std::error_code ec;
if (not validate_value(value, ec))
throw std::system_error(ec, std::string{ value } + " does not match rx for " + m_item_name);
throw std::system_error(ec, std::format("'{}' is not a valid value for {}", value, m_item_name));
}
bool item_validator::validate_value(std::string_view value, std::error_code &ec) const noexcept
@@ -233,7 +280,7 @@ bool item_validator::validate_value(std::string_view value, std::error_code &ec)
if (not value.empty() and value != "?" and value != ".")
{
if (m_type != nullptr and not regex_match(value.begin(), value.end(), *m_type->m_rx))
if (m_type != nullptr and not m_type->m_rx->match(value))
ec = make_error_code(validation_error::value_does_not_match_rx);
else if (not m_enums.empty() and m_enums.count(std::string{ value }) == 0)
ec = make_error_code(validation_error::value_is_not_in_enumeration_list);
@@ -251,15 +298,20 @@ void category_validator::add_item_validator(item_validator &&v)
v.m_category = m_name;
auto r = m_item_validators.insert(std::move(v));
if (not r.second and VERBOSE >= 4)
std::cout << "Could not add validator for item " << v.m_item_name << " to category " << m_name << '\n';
auto i = std::find(m_item_validators.begin(), m_item_validators.end(), v);
if (i != m_item_validators.end())
{
if (VERBOSE >= 4)
std::cout << "Could not add validator for item " << v.m_item_name << " to category " << m_name << '\n';
}
else
m_item_validators.emplace_back(std::move(v));
}
const item_validator *category_validator::get_validator_for_item(std::string_view item_name) const
{
const item_validator *result = nullptr;
auto i = m_item_validators.find(item_validator{ std::string(item_name) });
auto i = std::find(m_item_validators.begin(), m_item_validators.end(), item_validator{ std::string(item_name) });
if (i != m_item_validators.end())
result = &*i;
else if (VERBOSE > 4)
@@ -496,19 +548,18 @@ validator_factory &validator_factory::instance()
return s_instance;
}
const validator &validator_factory::get(std::string_view dictionary_name)
const validator *validator_factory::get(std::string_view dictionary_name)
{
category audit_conform("audit_conform");
for (auto part : cif::split(dictionary_name, ";", true))
for (auto part : cif::split(dictionary_name, ";,", true))
audit_conform.emplace({ { "dict_name", part } });
return get(audit_conform);
}
const validator &validator_factory::get(const category &audit_conform)
const validator *validator_factory::get(const category &audit_conform)
{
if (audit_conform.empty())
throw std::runtime_error("Empty audit_conform category, cannot create a validator");
const validator *result = nullptr;
std::lock_guard lock(m_mutex);
@@ -516,37 +567,60 @@ const validator &validator_factory::get(const category &audit_conform)
for (auto &v : m_validators)
{
if (v.matches_audit_conform(audit_conform))
return v;
result = &v;
}
// If the audit conform contains only one record, this is easy
if (audit_conform.size() == 1)
if (result == nullptr and audit_conform.size() == 1)
{
const auto &[name, version] = audit_conform.front().get<std::string, std::optional<std::string>>("dict_name", "dict_version");
return m_validators.emplace_back(construct_validator(name, version));
const auto &[name, version] =
audit_conform.front().get<std::string, std::optional<std::string>>("dict_name", "dict_version");
if (not name.empty())
result = &m_validators.emplace_back(construct_validator(name, version));
}
// A new, merged dictionary
std::optional<validator> v;
for (const auto &[name, version] : audit_conform.rows<std::string, std::optional<std::string>>("dict_name", "dict_version"))
if (result == nullptr)
{
if (not v)
v = construct_validator(name, version);
else
// A new, merged dictionary
std::optional<validator> v;
for (const auto &[name, version] : audit_conform.rows<std::string, std::optional<std::string>>("dict_name", "dict_version"))
{
auto data = load_resource(name);
if (not data)
throw std::runtime_error("Could not load dictionary " + std::string{ name });
if (name.empty())
continue;
v->parse(*data);
if (not v) // first dict
v = construct_validator(name, version);
else // additional/extending dict
{
auto data = load_resource(name);
if (not data)
throw std::runtime_error("Could not load dictionary " + std::string{ name });
v->parse(*data);
}
}
if (v)
result = &m_validators.emplace_back(std::move(*v));
}
if (not v)
throw std::runtime_error("Missing dictionary information?");
return result;
}
return m_validators.emplace_back(std::move(*v));
const validator &validator_factory::operator[](const category &audit_conform)
{
auto v = get(audit_conform);
if (v == nullptr)
throw std::runtime_error("Could not load dictionary for audit_conform");
return *v;
}
const validator &validator_factory::operator[](std::string_view dictionary_name)
{
auto v = get(dictionary_name);
if (v == nullptr)
throw std::runtime_error("Could not load dictionary for " + std::string{ dictionary_name });
return *v;
}
validator validator_factory::construct_validator(std::string_view name, std::optional<std::string> version)
@@ -565,7 +639,7 @@ validator validator_factory::construct_validator(std::string_view name, std::opt
not v.matches_audit_conform(category{ "audit_conform", //
{ { "dict_name", name }, { "dict_version", version } } }))
{
std::clog << "Invalid dictionary?\n";
std::clog << "Loaded dictionary does not match name=" << name << " and version=" << version.value_or("''") << "\n";
}
return v;

View File

@@ -1,19 +1,39 @@
# We're using the older version 2 of Catch2
# SPDX-License-Identifier: BSD-2-Clause
#
# Copyright (c) 2025 NKI/AVL, Netherlands Cancer Institute
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
if(NOT(Catch2_FOUND OR TARGET Catch2))
find_package(Catch2 QUIET)
if(NOT (Catch2_FOUND OR TARGET Catch2))
find_package(Catch2 3 QUIET)
if(NOT Catch2_FOUND)
include(FetchContent)
FetchContent_Declare(
Catch2
GIT_REPOSITORY https://github.com/catchorg/Catch2.git
GIT_TAG v2.13.9)
GIT_TAG v3.4.0)
FetchContent_MakeAvailable(Catch2)
set(Catch2_VERSION "2.13.9")
target_compile_features(Catch2 PRIVATE cxx_std_20)
endif()
endif()
@@ -22,25 +42,21 @@ list(
CIFPP_tests
unit-v2
unit-3d
format
model
query
rename-compound
sugar
spinner
# reconstruction
reconstruction
validate-pdbx
)
cql
matrix
)
add_library(test-main OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/test-main.cpp")
target_link_libraries(test-main cifpp::cifpp Catch2::Catch2)
if("${Catch2_VERSION}" VERSION_LESS 3.0.0)
target_compile_definitions(test-main PUBLIC CATCH22=1)
else()
target_compile_definitions(test-main PUBLIC CATCH22=0)
endif()
foreach(CIFPP_TEST IN LISTS CIFPP_tests)
set(CIFPP_TEST "${CIFPP_TEST}-test")
set(CIFPP_TEST_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/${CIFPP_TEST}.cpp")
@@ -48,12 +64,6 @@ foreach(CIFPP_TEST IN LISTS CIFPP_tests)
add_executable(
${CIFPP_TEST} ${CIFPP_TEST_SOURCE} $<TARGET_OBJECTS:test-main>)
if(${Catch2_VERSION} VERSION_GREATER_EQUAL 3.0.0)
target_compile_definitions(${CIFPP_TEST} PUBLIC CATCH22=0)
else()
target_compile_definitions(${CIFPP_TEST} PUBLIC CATCH22=1)
endif()
target_link_libraries(${CIFPP_TEST} PRIVATE cifpp::cifpp Catch2::Catch2)
target_include_directories(${CIFPP_TEST} PRIVATE "${EIGEN_INCLUDE_DIR}")
@@ -62,15 +72,8 @@ foreach(CIFPP_TEST IN LISTS CIFPP_tests)
target_compile_options(${CIFPP_TEST} PRIVATE /EHsc)
endif()
add_custom_target(
"run-${CIFPP_TEST}"
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch ${CIFPP_TEST})
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch
COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir
${CMAKE_CURRENT_SOURCE_DIR})
add_test(NAME ${CIFPP_TEST} COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir
${CMAKE_CURRENT_SOURCE_DIR})
endforeach()
if(NOT (CIFPP_TEST STREQUAL "spinner-test"))
add_test(NAME ${CIFPP_TEST}
COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir ${CMAKE_CURRENT_SOURCE_DIR})
endif()
endforeach()

537
test/cql-test.cpp Normal file
View File

@@ -0,0 +1,537 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2025 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "test-main.hpp"
#include <catch2/catch_test_macros.hpp>
#include <cif++.hpp>
#include <cif++/cql.hpp>
// --------------------------------------------------------------------
cif::file operator""_cf(const char *text, std::size_t length)
{
struct membuf : public std::streambuf
{
membuf(char *text, std::size_t length)
{
this->setg(text, text, text + length);
}
} buffer(const_cast<char *>(text), length);
std::istream is(&buffer);
return cif::file(is);
}
// --------------------------------------------------------------------
const char *kAuthors[] = {
"Kleywegt, G.J.",
"Bergfors, T.",
"Senn, H.",
"Le Motte, P.",
"Gsell, B.",
"Shudo, K.",
"Jones, T.A.",
"Banaszak, L.",
"Winter, N.",
"Xu, Z.",
"Bernlohr, D.A.",
"Cowan, S.W.",
"Jones, T.A.",
"Bergfors, T.",
"Kleywegt, G.J.",
"Jones, T.A.",
"Cowan, S.W.",
"Newcomer, M.E.",
"Jones, T.A.",
"Jones, T.A.",
"Bergfors, T.",
"Sedzik, J.",
"Unge, T."
};
// Test simple SELECT
TEST_CASE("cql-1")
{
cif::file f(gTestDir / ".." / "examples" / "1cbs.cif.gz");
auto &db = f.front();
db.load_dictionary("mmcif_pdbx.dic");
cif::cql::connection connection(db);
cif::cql::transaction tx(connection);
auto r = tx.exec("SELECT name, ordinal FROM citation_author WHERE citation_id = 'primary';");
CHECK(r.size() == 7);
for (size_t ix = 0; auto row : r)
{
REQUIRE(ix < (sizeof(kAuthors) / sizeof(char *)));
CHECK(row[0].as<std::string>() == kAuthors[ix]);
CHECK(row[1].as<size_t>() == ix + 1);
CHECK(row["name"].as<std::string>() == kAuthors[ix]);
CHECK(row["ordinal"].as<size_t>() == ix + 1);
++ix;
}
r = tx.exec("SELECT ordinal, name FROM citation_author WHERE citation_id = 'primary';");
CHECK(r.size() == 7);
for (size_t ix = 0; auto row : r)
{
REQUIRE(ix < (sizeof(kAuthors) / sizeof(char *)));
CHECK(row[1].as<std::string>() == kAuthors[ix]);
CHECK(row[0].as<size_t>() == ix + 1);
CHECK(row["name"].as<std::string>() == kAuthors[ix]);
CHECK(row["ordinal"].as<size_t>() == ix + 1);
++ix;
}
r = tx.exec("SELECT * FROM citation_author WHERE citation_id = 'primary';");
CHECK(r.size() == 7);
for (int ix = 0; auto row : r)
{
REQUIRE(static_cast<size_t>(ix) < (sizeof(kAuthors) / sizeof(char *)));
for (auto fld : row)
{
switch (fld.num())
{
case 0:
CHECK(fld.name() == "citation_id");
CHECK(fld.as<std::string>() == "primary");
break;
case 1:
CHECK(fld.name() == "name");
CHECK(fld.as<std::string>() == kAuthors[ix]);
break;
case 2:
CHECK(fld.name() == "ordinal");
CHECK(fld.as<int>() == ix + 1);
break;
default:
REQUIRE(false);
break;
}
}
CHECK(row["name"].as<std::string>() == kAuthors[ix]);
CHECK(row["ordinal"].as<int>() == ix + 1);
CHECK(row["citation_id"].as<std::string>() == "primary");
++ix;
}
}
// Test SELECT AS
TEST_CASE("cql-2")
{
cif::file f(gTestDir / ".." / "examples" / "1cbs.cif.gz");
auto &db = f.front();
db.load_dictionary("mmcif_pdbx.dic");
cif::cql::connection connection(db);
cif::cql::transaction tx(connection);
auto r = tx.exec("SELECT name AS v1, ordinal AS v2 FROM citation_author WHERE citation_id = 'primary';");
CHECK(r.size() == 7);
for (size_t ix = 0; auto row : r)
{
REQUIRE(ix < (sizeof(kAuthors) / sizeof(char *)));
CHECK(row[0].as<std::string>() == kAuthors[ix]);
CHECK(row[1].as<size_t>() == ix + 1);
CHECK(row["v1"].as<std::string>() == kAuthors[ix]);
CHECK(row["v2"].as<size_t>() == ix + 1);
++ix;
}
}
TEST_CASE("cql-3")
{
cif::file f(gTestDir / ".." / "examples" / "1cbs.cif.gz");
auto &db = f.front();
db.load_dictionary("mmcif_pdbx.dic");
cif::cql::connection connection(db);
cif::cql::transaction tx(connection);
auto r = tx.exec("SELECT name FROM citation_author WHERE ordinal = 10").one_field();
CHECK(r.as<std::string>() == kAuthors[9]);
}
TEST_CASE("cql-4")
{
cif::file f(gTestDir / ".." / "examples" / "1cbs.cif.gz");
auto &db = f.front();
db.load_dictionary("mmcif_pdbx.dic");
cif::cql::connection connection(db);
cif::cql::transaction tx(connection);
auto r = tx.exec("SELECT name FROM citation_author WHERE ordinal BETWEEN 10 AND 15");
REQUIRE(r.size() == 6);
}
TEST_CASE("cql-5")
{
cif::file f(gTestDir / ".." / "examples" / "1cbs.cif.gz");
auto &db = f.front();
db.load_dictionary("mmcif_pdbx.dic");
cif::cql::connection connection(db);
cif::cql::transaction tx(connection);
auto r = tx.exec("SELECT (SELECT year FROM citation WHERE id = citation_id) AS jaar FROM citation_author WHERE ordinal IS 23").one_field();
CHECK(r.name() == "jaar");
CHECK(r.as<int>() == 1988);
}
TEST_CASE("cql-6")
{
cif::file f(gTestDir / ".." / "examples" / "1cbs.cif.gz");
auto &db = f.front();
db.load_dictionary("mmcif_pdbx.dic");
cif::cql::connection connection(db);
cif::cql::transaction tx(connection);
auto r = tx.exec("SELECT COUNT(*) FROM citation WHERE page_last IS NULL").one_field();
CHECK(r.as<int>() == 4);
r = tx.exec("SELECT COUNT(*) FROM citation WHERE page_last IS NOT NULL").one_field();
CHECK(r.as<int>() == 1);
}
TEST_CASE("cql-stream-1")
{
cif::file f(gTestDir / ".." / "examples" / "1cbs.cif.gz");
auto &db = f.front();
db.load_dictionary("mmcif_pdbx.dic");
cif::cql::connection connection(db);
cif::cql::transaction tx(connection);
for (size_t ix = 0;
const auto &[name, ordinal] : tx.stream<std::string, size_t>(
"SELECT name, ordinal FROM citation_author WHERE citation_id = 'primary';"))
{
REQUIRE(ix < (sizeof(kAuthors) / sizeof(char *)));
CHECK(name == kAuthors[ix]);
CHECK(ordinal == (ix + 1));
++ix;
}
}
// --------------------------------------------------------------------
TEST_CASE("cql-insert-1")
{
auto f1 = R"(
data_T1
loop_
_table1.id
_table1.name
1 aap
2 noot)"_cf;
auto f0 = f1;
auto &db = f1.front();
cif::cql::connection connection(db);
cif::cql::transaction tx(connection);
auto count = tx.exec("SELECT COUNT(*) FROM table1;").one_field().as<int>();
CHECK(count == 2);
auto r = tx.exec("INSERT INTO table1 (id, name) VALUES (3, 'mies')");
count = tx.exec("SELECT COUNT(*) FROM table1").one_field().as<int>();
CHECK(count == 3);
(void)tx.exec("DELETE FROM table1 WHERE CAST(id AS INTEGER) = 1;");
count = tx.exec("SELECT COUNT(*) FROM table1;").one_field().as<int>();
CHECK(count == 2);
(void)tx.exec("UPDATE table1 SET name = 'amandel' WHERE CAST(id AS INTEGER) = 2");
auto f2 = R"(
data_T1
loop_
_table1.id
_table1.name
2 amandel
3 mies)"_cf;
CHECK(f1 == f2);
tx.rollback();
CHECK(f1 == f0);
}
// --------------------------------------------------------------------
TEST_CASE("cql-rename")
{
auto f1 = R"(
data_T1
loop_
_table1.id
_table1.name
1 aap
2 noot)"_cf;
auto &db = f1.front();
cif::cql::connection connection(db);
cif::cql::transaction tx(connection);
(void)tx.exec("ALTER TABLE table1 RENAME TO 'table2'");
auto f2 = R"(
data_T1
loop_
_table2.id
_table2.name
1 aap
2 noot)"_cf;
CHECK(f1 == f2);
}
// --------------------------------------------------------------------
TEST_CASE("cql-foreign-keys-1")
{
const char dict[] = R"(
data_test_dict.dic
_datablock.id test_dict.dic
_datablock.description
;
A test dictionary
;
_dictionary.title test_dict.dic
_dictionary.datablock_id test_dict.dic
_dictionary.version 1.0
loop_
_item_type_list.code
_item_type_list.primitive_code
_item_type_list.construct
_item_type_list.detail
code char
'[][_,.;:"&<>()/\{}'`~!@#$%A-Za-z0-9*|+-]*'
; code item types/single words ...
;
text char
'[][ \n\t()_,.;:"&<>/\{}'`~!@#$%?+=*A-Za-z0-9|^-]*'
; text item types / multi-line text ...
;
int numb
'[+-]?[0-9]+'
; int item types are the subset of numbers that are the negative
or positive integers.
;
save_cat_1
_category.description 'A simple test category'
_category.id cat_1
_category.mandatory_code no
_category_key.name '_cat_1.id'
save_
save__cat_1.id
_item.name '_cat_1.id'
_item.category_id cat_1
_item.mandatory_code yes
_item_aliases.dictionary cif_core.dic
_item_aliases.version 2.0.1
_item_linked.child_name '_cat_2.parent_id'
_item_linked.parent_name '_cat_1.id'
_item_type.code code
save_
save__cat_1.name
_item.name '_cat_1.name'
_item.category_id cat_1
_item.mandatory_code yes
_item_aliases.dictionary cif_core.dic
_item_aliases.version 2.0.1
_item_type.code text
save_
save_cat_2
_category.description 'A second simple test category'
_category.id cat_2
_category.mandatory_code no
_category_key.name '_cat_2.id'
save_
save__cat_2.id
_item.name '_cat_2.id'
_item.category_id cat_2
_item.mandatory_code yes
_item_aliases.dictionary cif_core.dic
_item_aliases.version 2.0.1
_item_type.code int
save_
save__cat_2.parent_id
_item.name '_cat_2.parent_id'
_item.category_id cat_2
_item.mandatory_code yes
_item_aliases.dictionary cif_core.dic
_item_aliases.version 2.0.1
_item_type.code code
save_
save__cat_2.desc
_item.name '_cat_2.desc'
_item.category_id cat_2
_item.mandatory_code yes
_item_aliases.dictionary cif_core.dic
_item_aliases.version 2.0.1
_item_type.code text
save_
)";
struct membuf : public std::streambuf
{
membuf(char *text, std::size_t length)
{
this->setg(text, text, text + length);
}
} buffer(const_cast<char *>(dict), sizeof(dict) - 1);
std::istream is_dict(&buffer);
cif::validator validator(is_dict);
cif::file f;
// --------------------------------------------------------------------
const char data[] = R"(
data_test
loop_
_cat_1.id
_cat_1.name
1 Aap
2 Noot
3 Mies
loop_
_cat_2.id
_cat_2.parent_id
_cat_2.desc
1 1 'Een dier'
2 1 'Een andere aap'
3 2 'walnoot bijvoorbeeld'
)";
struct data_membuf : public std::streambuf
{
data_membuf(char *text, std::size_t length)
{
this->setg(text, text, text + length);
}
} data_buffer(const_cast<char *>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer);
f.load(is_data, validator);
auto &db = f.front();
cif::cql::connection connection(db);
cif::cql::transaction tx(connection);
for (const auto &desc : tx.stream<std::string>(R"(SELECT b.desc FROM cat_1 a, cat_2 b WHERE a.id = b.parent_id AND a.name = 'Noot')"))
{
CHECK(desc == "walnoot bijvoorbeeld");
}
// Check cascading delete
tx.exec("DELETE FROM cat_1 WHERE id = 1");
CHECK(db["cat_1"].size() == 2);
CHECK(db["cat_2"].size() == 1);
tx.rollback();
CHECK(db["cat_1"].size() == 3);
CHECK(db["cat_2"].size() == 3);
}
// --------------------------------------------------------------------
TEST_CASE("drop-table")
{
auto f1 = R"(
data_T1
loop_
_table1.id
_table1.name
1 aap
2 noot)"_cf;
auto &db = f1.front();
cif::cql::connection connection(db);
cif::cql::transaction tx(connection);
SECTION("commit")
{
(void)tx.exec("DROP TABLE table1;");
tx.commit();
CHECK(db.empty());
}
// Ah, too bad: this doesn't work
// SECTION("rollback")
// {
// (void)tx.exec("DROP TABLE table1;");
// tx.rollback();
// CHECK(not db.empty());
// CHECK(db["table1"].size() == 2);
// }
}

View File

@@ -1,61 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "test-main.hpp"
#include <stdexcept>
#include <cif++.hpp>
// --------------------------------------------------------------------
TEST_CASE("fmt_1")
{
std::ostringstream os;
std::string world("world");
os << cif::format("Hello, %-10.10s, the magic number is %d and pi is %g", world, 42, cif::kPI);
REQUIRE(os.str() == "Hello, world , the magic number is 42 and pi is 3.14159");
REQUIRE(cif::format("Hello, %-10.10s, the magic number is %d and pi is %g", world, 42, cif::kPI).str() ==
"Hello, world , the magic number is 42 and pi is 3.14159");
}
// --------------------------------------------------------------------
TEST_CASE("clr_1")
{
using namespace cif::colour;
std::cout << "Hello, " << cif::coloured("world!", white, red, cif::colour::regular) << '\n'
<< "Hello, " << cif::coloured("world!", white, red, bold) << '\n'
<< "Hello, " << cif::coloured("world!", black, red) << '\n'
<< "Hello, " << cif::coloured("world!", white, green) << '\n'
<< "Hello, " << cif::coloured("world!", white, blue) << '\n'
<< "Hello, " << cif::coloured("world!", blue, white) << '\n'
<< "Hello, " << cif::coloured("world!", red, white, bold) << '\n';
}

103
test/matrix-test.cpp Normal file
View File

@@ -0,0 +1,103 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2025 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cif++/matrix.hpp"
#include "test-main.hpp"
#include <catch2/catch_test_macros.hpp>
#include <cif++.hpp>
TEST_CASE("m1")
{
cif::matrix3x3<int> m = cif::identity_matrix<int>(3);
CHECK(cif::determinant(m) == 1);
}
TEST_CASE("m2")
{
cif::matrix4x4<int> m = cif::identity_matrix<int>(4);
cif::sub_matrix<cif::matrix4x4<int>> ms(m, 1, 1);
CHECK(ms == cif::identity_matrix<int>(3));
}
TEST_CASE("m3")
{
cif::matrix4x4<int> m{
{ 1, 2, 3, 4, //
5, 6, 7, 8, //
9, 10, 11, 12, //
13, 14, 15, 16 }
};
cif::sub_matrix<cif::matrix4x4<int>> ms(m, 1, 1);
cif::matrix3x3<int> t{
{ 1, 3, 4, 9, 11, 12, 13, 15, 16 }
};
CHECK(ms == t);
}
TEST_CASE("m4")
{
cif::matrix4x4<int> m{
{
-2,
3,
1,
0,
4,
1,
-3,
2,
0,
-1,
2,
5,
3,
2,
0,
-4,
}
};
// std::cout << m << "\n\n";
// std::cout << cif::matrix3x3<int>(cif::sub_matrix<decltype(m)>(m, 0, 0)) << "\n\n";
// std::cout << cif::matrix3x3<int>(cif::sub_matrix<decltype(m)>(m, 0, 1)) << "\n\n";
// std::cout << cif::matrix3x3<int>(cif::sub_matrix<decltype(m)>(m, 0, 2)) << "\n\n";
// std::cout << cif::matrix3x3<int>(cif::sub_matrix<decltype(m)>(m, 0, 3)) << "\n\n";
// std::cout << cif::determinant(cif::matrix3x3<int>(cif::sub_matrix<decltype(m)>(m, 0, 0))) << "\n\n";
// std::cout << cif::determinant(cif::matrix3x3<int>(cif::sub_matrix<decltype(m)>(m, 0, 1))) << "\n\n";
// std::cout << cif::determinant(cif::matrix3x3<int>(cif::sub_matrix<decltype(m)>(m, 0, 2))) << "\n\n";
// std::cout << cif::determinant(cif::matrix3x3<int>(cif::sub_matrix<decltype(m)>(m, 0, 3))) << "\n\n";
CHECK(cif::determinant(m) == 332);
}

View File

@@ -54,7 +54,7 @@ TEST_CASE("create_nonpoly_1")
cif::file file;
auto &&[dbi, ignore] = file.emplace("TEST"); // create a datablock
dbi->set_validator(&cif::validator_factory::instance().get("mmcif_pdbx.dic"));
dbi->load_dictionary("mmcif_pdbx.dic");
cif::mm::structure structure(file);
@@ -82,7 +82,7 @@ _atom_site.pdbx_formal_charge
# that's enough to test with
)"_cf;
atoms.front().set_validator(&cif::validator_factory::instance().get("mmcif_pdbx.dic"));
atoms.front().load_dictionary("mmcif_pdbx.dic");
auto &hem_data = atoms["HEM"];
auto &atom_site = hem_data["atom_site"];
@@ -159,7 +159,7 @@ _struct_asym.details ?
_atom_type.symbol C
)"_cf;
expected.front().set_validator(&cif::validator_factory::instance().get("mmcif_pdbx.dic"));
expected.front().load_dictionary("mmcif_pdbx.dic");
if (not(expected.front() == structure.get_datablock()))
{
@@ -178,7 +178,7 @@ TEST_CASE("create_nonpoly_2")
cif::file file;
auto &&[dbi, ignore] = file.emplace("TEST"); // create a datablock
dbi->set_validator(&cif::validator_factory::instance().get("mmcif_pdbx.dic"));
dbi->load_dictionary("mmcif_pdbx.dic");
cif::mm::structure structure(file);
@@ -270,7 +270,7 @@ _struct_asym.details ?
_atom_type.symbol C
)"_cf;
expected.front().set_validator(&cif::validator_factory::instance().get("mmcif_pdbx.dic"));
expected.front().load_dictionary("mmcif_pdbx.dic");
REQUIRE(expected.front() == structure.get_datablock());
@@ -354,7 +354,7 @@ _struct_asym.details ?
#
)"_cf;
data.front().set_validator(&cif::validator_factory::instance().get("mmcif_pdbx.dic"));
data.front().load_dictionary("mmcif_pdbx.dic");
cif::mm::structure s(data);
@@ -566,7 +566,7 @@ _struct_asym.details ?
#
)"_cf;
data.front().set_validator(&cif::validator_factory::instance().get("mmcif_pdbx.dic"));
data.front().load_dictionary("mmcif_pdbx.dic");
SECTION("max")
{

View File

@@ -1,7 +1,7 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
* Copyright (c) 2025 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
@@ -24,9 +24,29 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "test-main.hpp"
/// \file io.hpp
/// \deprecated This file is no longer used. Please use "cif++/pdb.hpp" instead
#include <cif++.hpp>
#warning "Use of this file is deprecated, please use "cif++/pdb.hpp"
#include <iostream>
#include <fstream>
TEST_CASE("q-1")
{
using namespace cif::literals;
cif::compound_factory::instance().push_dictionary(gTestDir / "REA.cif");
cif::file a = cif::pdb::read(gTestDir / "pdb1cbs.ent.gz");
auto &pdbx_poly_seq_scheme = a.front()["pdbx_poly_seq_scheme"];
REQUIRE_FALSE(pdbx_poly_seq_scheme.empty());
SECTION("s-11")
{
CHECK(pdbx_poly_seq_scheme.count("asym_id"_key == "A") == 137);
CHECK(pdbx_poly_seq_scheme.count("asym_id"_key == "A" and "entity_id"_key == 1 and "seq_id"_key == 1 and "mon_id"_key == "PRO") == 1);
CHECK(pdbx_poly_seq_scheme.count("asym_id"_key == "A" and "entity_id"_key == 1 and "seq_id"_key == 1 and "mon_id"_key == "PRO" and "hetero"_key == false) == 1);
}
}

View File

@@ -28,6 +28,7 @@
#include <cif++.hpp>
#include <filesystem>
#include <iostream>
#include <fstream>

View File

@@ -11,12 +11,7 @@ int main(int argc, char *argv[])
Catch::Session session; // There must be exactly one instance
// Build a new parser on top of Catch2's
#if CATCH22
using namespace Catch::clara;
#else
// Build a new parser on top of Catch2's
using namespace Catch::Clara;
#endif
auto cli = session.cli() // Get Catch2's command line parser
| Opt(gTestDir, "data-dir") // bind variable to a new option, with a hint string

View File

@@ -1,17 +1,17 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
*
* Copyright (c) 2024 NKI/AVL, Netherlands Cancer Institute
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -26,11 +26,7 @@
#pragma once
#if CATCH22
#include <catch2/catch.hpp>
#else
#include <catch2/catch_all.hpp>
#endif
#include <filesystem>

View File

@@ -24,11 +24,18 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cif++/point.hpp"
#include "test-main.hpp"
#include <catch2/catch_test_macros.hpp>
#include <catch2/matchers/catch_matchers_floating_point.hpp>
#include <cif++.hpp>
#include <stdexcept>
#include <cif++.hpp>
#if defined(_MSC_VER)
# pragma warning(disable : 5054) // warning C5054: operator '&': deprecated between enumerations of different types
# pragma warning(disable : 4127) // conditional expression is constant
#endif
#include <Eigen/Eigenvalues>
@@ -296,7 +303,7 @@ TEST_CASE("m2q_0a")
auto d = cif::kSymopNrTable[i].symop().data();
Eigen::Matrix3f rot;
rot << d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8];
rot << static_cast<float>(d[0]), static_cast<float>(d[1]), static_cast<float>(d[2]), static_cast<float>(d[3]), static_cast<float>(d[4]), static_cast<float>(d[5]), static_cast<float>(d[6]), static_cast<float>(d[7]), static_cast<float>(d[8]);
// check to see if this matrix contains a true rotation
if (rot * rot.transpose() != Eigen::Matrix3f::Identity() or rot.determinant() != 1)
@@ -310,8 +317,7 @@ TEST_CASE("m2q_0a")
cif::point p2 = p1;
p2.rotate(q);
cif::matrix3x3<float> rot_c({
static_cast<float>(d[0]),
cif::matrix3x3<float> rot_c({ static_cast<float>(d[0]),
static_cast<float>(d[1]),
static_cast<float>(d[2]),
static_cast<float>(d[3]),
@@ -319,8 +325,7 @@ TEST_CASE("m2q_0a")
static_cast<float>(d[5]),
static_cast<float>(d[6]),
static_cast<float>(d[7]),
static_cast<float>(d[8])
});
static_cast<float>(d[8]) });
cif::point p3 = rot_c * p1;
@@ -435,11 +440,11 @@ TEST_CASE("symm_4")
// based on 2b8h
auto sg = cif::spacegroup(154); // p 32 2 1
auto c = cif::cell(107.516, 107.516, 338.487, 90.00, 90.00, 120.00);
auto c = cif::cell(107.516f, 107.516f, 338.487f, 90.00f, 90.00f, 120.00f);
cif::point a{ -8.688, 79.351, 10.439 }; // O6 NAG A 500
cif::point b{ -35.356, 33.693, -3.236 }; // CG2 THR D 400
cif::point sb(-6.916, 79.34, 3.236); // 4_565 copy of b
cif::point a{ -8.688f, 79.351f, 10.439f }; // O6 NAG A 500
cif::point b{ -35.356f, 33.693f, -3.236f }; // CG2 THR D 400
cif::point sb(-6.916f, 79.34f, 3.236f); // 4_565 copy of b
CHECK_THAT(distance(a, sg(a, c, "1_455"_symop)), Catch::Matchers::WithinRel(static_cast<float>(c.get_a()), 0.01f));
CHECK_THAT(distance(a, sg(a, c, "1_545"_symop)), Catch::Matchers::WithinRel(static_cast<float>(c.get_b()), 0.01f));
@@ -466,7 +471,7 @@ TEST_CASE("symm_4wvp_1")
cif::crystal c(db);
cif::point p{ -78.722, 98.528, 11.994 };
cif::point p{ -78.722f, 98.528f, 11.994f };
auto a = s.get_residue("A", 10, "").get_atom_by_atom_id("O");
auto sp1 = c.symmetry_copy(a.get_location(), "2_565"_symop);
@@ -605,3 +610,40 @@ TEST_CASE("volume_3bwh_1")
CHECK_THAT(c.get_cell().get_volume(), Catch::Matchers::WithinRel(741009.625f, 0.01f));
}
// --------------------------------------------------------------------
TEST_CASE("smallest_sphere-1")
{
std::vector<cif::point> pts{
{ 0.9295, 4.9006, 46.9706 },
{ -0.1215, 5.5936, 46.0726 },
{ -0.7975, 4.7046, 45.0796 },
{ -1.4875, 3.5486, 45.7196 },
{ -0.6535, 2.8816, 46.8186 },
{ 0.3825, 3.5156, 47.4496 },
{ 1.1995, 2.9206, 48.5286 },
{ 0.8255, 2.0466, 49.4716 },
{ 1.6625, 1.5036, 50.5176 },
{ 1.1165, 0.6056, 51.3626 },
{ 1.8325, -0.0064, 52.4656 },
{ 1.1945, -0.9044, 53.2216 },
{ 1.8135, -1.5534, 54.3566 },
{ 1.0925, -2.4574, 55.0656 },
{ 1.5205, -3.2204, 56.2476 },
{ 1.1955, 5.8066, 48.1796 },
{ 2.2495, 4.6896, 46.1796 },
{ -1.2515, 1.5186, 47.1786 },
{ 3.1385, 1.9106, 50.6166 },
{ 3.2605, -1.1834, 54.7206 },
{ 2.5975, -3.8554, 56.2096 },
{ 0.7975, -3.2184, 57.2686 }
};
for (int i = 0; i < 1000; ++i)
{
auto [c, r] = cif::smallest_sphere_around_points(pts);
CHECK_THAT(cif::distance(c, cif::point{ 0, 0.743099928, 51.1741028 }), Catch::Matchers::WithinAbs(0.f, 0.01f));
CHECK_THAT(r, Catch::Matchers::WithinAbs(7.31248331f, 0.01f));
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -5,6 +5,8 @@ IF NOT EXIST build_ci\libs (
MKDIR build_ci\libs
)
CD build_ci\libs
@REM Install ZLib
IF NOT EXIST zlib-%ZLIB_VERSION%.zip (
ECHO Downloading https://github.com/libarchive/zlib/archive/v%ZLIB_VERSION%.zip
curl -L -o zlib-%ZLIB_VERSION%.zip https://github.com/libarchive/zlib/archive/v%ZLIB_VERSION%.zip || EXIT /b 1
@@ -14,9 +16,9 @@ IF NOT EXIST zlib-%ZLIB_VERSION% (
C:\windows\system32\tar.exe -x -f zlib-%ZLIB_VERSION%.zip || EXIT /b 1
)
CD zlib-%ZLIB_VERSION%
cmake -G "Visual Studio 17 2022" . || EXIT /b 1
cmake --build . --target ALL_BUILD --config Release || EXIT /b 1
cmake --build . --target RUN_TESTS --config Release || EXIT /b 1
cmake --build . --target INSTALL --config Release || EXIT /b 1
cmake -B build || EXIT /b 1
cmake --build build --target ALL_BUILD --config Release || EXIT /b 1
cmake --build build --target RUN_TESTS --config Release || EXIT /b 1
cmake --build build --target INSTALL --config Release || EXIT /b 1
@EXIT /b 0

View File

@@ -63,7 +63,7 @@ update_dictionary() {
update_dictionary "@CIFPP_CACHE_DIR@/components.cif" "https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz"
update_dictionary "@CIFPP_CACHE_DIR@/mmcif_pdbx.dic" "https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic.gz"
update_dictionary "@CIFPP_CACHE_DIR@/mmcif_ma.dic" "https://github.com/ihmwg/ModelCIF/raw/master/dist/mmcif_ma.dic"
update_dictionary "@CIFPP_CACHE_DIR@/mmcif_ma.dic" "https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_ma.dic"
# notify subscribers, using find instead of run-parts to make it work on FreeBSD as well