Compare commits

...

1008 Commits

Author SHA1 Message Date
Maarten L. Hekkelman
b7680dcb92 More cleaning up 2026-02-17 13:50:08 +01:00
Maarten L. Hekkelman
43999dfade fixes, cartn's and type of ndb_seq_num 2026-02-16 13:05:00 +01:00
Maarten L. Hekkelman
845b549128 fix parser for float like strings 2026-02-16 09:48:34 +01:00
Maarten L. Hekkelman
b019ba9b91 ouch... 2026-02-14 11:31:26 +01:00
Maarten L. Hekkelman
7a49d064a1 Added forward declaration of file 2026-02-14 10:56:09 +01:00
Maarten L. Hekkelman
fd0a95221e add missing include 2026-02-13 20:36:22 +01:00
Maarten L. Hekkelman
32acd48ce9 Fix 2026-02-13 20:21:44 +01:00
Maarten L. Hekkelman
14be0638c1 removed forward_decl 2026-02-13 20:08:18 +01:00
Maarten L. Hekkelman
2a0e52dff6 IWYU 2026-02-13 20:05:36 +01:00
Maarten L. Hekkelman
0fe1178bdd why was this included? 2026-02-13 19:04:47 +01:00
Maarten L. Hekkelman
2f11d6d642 All tests pass 2026-02-13 16:26:38 +01:00
Maarten L. Hekkelman
a46a31ac52 first merge 2026-02-13 13:53:45 +01:00
Maarten L. Hekkelman
5ad38b4e9c Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2026-02-07 15:25:39 +01:00
Maarten L. Hekkelman
702323d83a a specialisation for find1 with parameter std::optional 2026-02-07 15:25:20 +01:00
Maarten L. Hekkelman
721857629e Reconstruction optimised 2026-02-04 11:58:49 +01:00
Maarten L. Hekkelman
9200acf8f4 fix, take 2 2026-01-28 19:57:36 +01:00
Maarten L. Hekkelman
6c1d60f5a5 fix 2026-01-28 19:36:52 +01:00
Maarten L. Hekkelman
56cfa09976 Fix typo, add depedency to cmake config file 2026-01-28 18:02:56 +01:00
Maarten L. Hekkelman
1c8571f02c category is now a valid std::ranges::input_range 2026-01-28 16:42:52 +01:00
Maarten L. Hekkelman
098f8f032a Flush progress bar to avoid garbage 2026-01-27 11:55:40 +01:00
Maarten L. Hekkelman
a539603c4f iwyu acting up again 2026-01-27 09:24:46 +01:00
Maarten L. Hekkelman
0dc4a34071 New progress bar 2026-01-26 17:43:21 +01:00
Maarten L. Hekkelman
2ef82c794e Silence some more windows warnings 2026-01-13 09:55:03 +01:00
Maarten L. Hekkelman
e157eb7700 fixed swap 2026-01-13 09:41:39 +01:00
Maarten L. Hekkelman
a079543594 residue copy constructor and the like 2026-01-13 09:28:00 +01:00
Maarten L. Hekkelman
9f3a0b5c4c cleaning up mostly done now 2026-01-13 08:59:40 +01:00
Maarten L. Hekkelman
dd37d87a33 sqlite stuff 2026-01-08 09:41:50 +01:00
Maarten L. Hekkelman
03342caf13 cleaning up code 2026-01-08 08:26:48 +01:00
Maarten L. Hekkelman
1e5050b221 clean up bugprone and cert warnings 2026-01-05 16:00:59 +01:00
Maarten L. Hekkelman
b34eb21d96 Modernizing code 2026-01-05 15:21:36 +01:00
Maarten L. Hekkelman
656c82838a macOS and Windows fixes,
start using clang-tidy
2026-01-05 12:59:59 +01:00
Maarten L. Hekkelman
2a265bb5c8 Finish removing sqlite code 2026-01-05 11:00:49 +01:00
Maarten L. Hekkelman
7ac0717944 Merge branch 'using-sqlite-for-delete' into develop 2026-01-05 10:55:33 +01:00
Maarten L. Hekkelman
f190bdfd64 embedded and patched sqlite no longer needed 2026-01-05 10:55:03 +01:00
Maarten L. Hekkelman
1d430e8c47 Merge branch 'with-sqlite' into develop 2026-01-05 08:31:32 +01:00
Maarten L. Hekkelman
56aab89176 Refactored validator_factory interface 2026-01-05 08:30:30 +01:00
Maarten L. Hekkelman
b9bcf07f84 Implement backslashed wrapping of long strings according to the cif 1.1 specification. 2025-12-31 16:08:17 +01:00
Maarten L. Hekkelman
e3e7648c5c change test 2025-12-30 15:36:07 +01:00
Maarten L. Hekkelman
83ee1984d8 null remains a problem, conceptually...
Added drop_empty_items
2025-12-30 15:28:51 +01:00
Maarten L. Hekkelman
f9741a27cd Keep track of modification 2025-12-30 10:39:42 +01:00
Maarten L. Hekkelman
0b002afb9f Add exec methods to connection 2025-12-30 10:27:15 +01:00
Maarten L. Hekkelman
a58e5a1bfc Rename function 2025-12-29 17:59:49 +01:00
Maarten L. Hekkelman
1229652444 Remove recursion 2025-12-29 17:19:03 +01:00
Maarten L. Hekkelman
05197a85c6 Fixed transactions by patching sqlite3 2025-12-29 17:11:17 +01:00
Maarten L. Hekkelman
71cd4958bd check complete sql 2025-12-29 15:52:02 +01:00
Maarten L. Hekkelman
61965c2391 More output options 2025-12-29 14:41:35 +01:00
Maarten L. Hekkelman
0f60f42f9e dirty marking, mcql work, load_dictionary 2025-12-29 12:45:56 +01:00
Maarten L. Hekkelman
2e61b330c4 Merge branch 'with-sqlite' of https://github.com/PDB-REDO/libcifpp into with-sqlite 2025-12-27 16:46:11 +01:00
Maarten L. Hekkelman
964e7620eb format output 2025-12-27 16:46:04 +01:00
Maarten L. Hekkelman
8a329e7c2d foreign key test 2025-12-24 14:07:14 +01:00
Maarten L. Hekkelman
8485747377 Merge branch 'trunk' into with-sqlite 2025-12-22 11:25:56 +01:00
Maarten L. Hekkelman
34af410d5e Use FetchContent to find sqlite3, if needed 2025-12-22 11:25:00 +01:00
UENO, M.
8f5b9eb631 Use find_package for FastFloat prior to FetchContent_Declare (#73)
* Use `find_package` for FastFloat prior to `FetchContent_Declare`

* Convert space to tab
2025-12-22 07:26:41 +01:00
Maarten L. Hekkelman
388eae211e Merge branch 'trunk' into with-sqlite 2025-12-20 08:53:23 +01:00
Maarten L. Hekkelman
f19c6d078e Merge branch 'trunk' into develop 2025-12-20 08:40:04 +01:00
Maarten L. Hekkelman
73f18a4da2 PCRE2 is not thread safe, the way it is used in libcifpp type validator 2025-12-20 08:38:59 +01:00
Maarten L. Hekkelman
8e2494532e order and supress empty output 2025-12-19 15:47:43 +01:00
Maarten L. Hekkelman
2f7f62bdce Implemented drop table 2025-12-18 14:58:22 +01:00
Maarten L. Hekkelman
bf9551a994 Better error reporting in cql 2025-12-18 14:36:44 +01:00
Maarten L. Hekkelman
30ff5bea36 Better validation message 2025-12-18 14:36:27 +01:00
Maarten L. Hekkelman
b6568664ea Execute multiple statements and more 2025-12-17 16:02:53 +01:00
Maarten L. Hekkelman
863f010a7c Renaming cql files 2025-12-17 13:45:00 +01:00
Maarten L. Hekkelman
da76bbae7c With transactions 2025-12-17 12:29:31 +01:00
Maarten L. Hekkelman
e2454a2e79 Insert, Delete and Update 2025-12-17 11:05:15 +01:00
Maarten L. Hekkelman
9503a7e9b4 Transaction stream working 2025-12-16 19:33:31 +01:00
Maarten L. Hekkelman
20784bdaf5 Working around NULL problem, for now 2025-12-16 16:14:10 +01:00
Maarten L. Hekkelman
9558085105 Working, but NULL is a problem 2025-12-16 15:40:50 +01:00
Maarten L. Hekkelman
429c31ae42 First working version 2025-12-16 12:56:29 +01:00
Maarten L. Hekkelman
58ac1ce033 backup 2025-12-15 18:43:02 +01:00
Maarten L. Hekkelman
23e575858c we can create tables now 2025-12-15 16:39:39 +01:00
Maarten L. Hekkelman
4a31878975 first steps with sqlite 2025-12-09 16:47:11 +01:00
Maarten L. Hekkelman
9f6e1e245b start value expression 2025-12-09 13:42:46 +01:00
Maarten L. Hekkelman
8a8ca9599d several simple select statements added 2025-12-09 11:28:39 +01:00
Maarten L. Hekkelman
00b0473438 backup 2025-12-03 16:22:44 +01:00
Maarten L. Hekkelman
7a9d94bc57 private linking to fast_float 2025-11-27 15:34:20 +01:00
Maarten L. Hekkelman
a3ba760ab5 Merge branch 'develop' into trunk 2025-11-27 15:29:46 +01:00
Maarten L. Hekkelman
913abcd1b3 Fixes in makefile 2025-11-27 11:26:47 +01:00
Maarten L. Hekkelman
510e336306 exclude from all for fast_float 2025-11-27 09:06:22 +01:00
Maarten L. Hekkelman
f15a76e29b exclude from all for fast_float 2025-11-27 09:05:51 +01:00
Maarten L. Hekkelman
915a147449 backup 2025-11-26 16:35:30 +01:00
Maarten L. Hekkelman
edf24ca9ff work 2025-11-26 13:46:55 +01:00
Maarten L. Hekkelman
ffff2479d2 revert version-string code 2025-11-26 13:17:14 +01:00
Maarten L. Hekkelman
46a9318aa5 revert version string generator 2025-11-26 13:11:54 +01:00
Maarten L. Hekkelman
4a7f48eed8 some initial work 2025-11-19 16:36:44 +01:00
Maarten L. Hekkelman
42e66afd92 Merge branch 'develop' into cql 2025-11-19 13:29:35 +01:00
Maarten L. Hekkelman
b550e9b027 re-enable tests 2025-11-19 13:28:47 +01:00
Maarten L. Hekkelman
452bb83ce7 Remove revision.hpp file when making clean 2025-11-19 11:39:27 +01:00
Maarten L. Hekkelman
6eda9aaf36 better center_and_radius for residue 2025-11-18 16:44:53 +01:00
Maarten L. Hekkelman
251fb55d6a fixing smallest sphere 2025-11-05 13:18:58 +01:00
Maarten L. Hekkelman
f94e9aece9 create_non_poly, another 2025-11-05 11:03:56 +01:00
Maarten L. Hekkelman
c565bb96be Do not run the spinner test 2025-10-30 09:19:50 +01:00
Maarten L. Hekkelman
e51f31dc4c Remove libfmt, fix instantiating templates for fast_float usage 2025-10-30 09:08:44 +01:00
Maarten L. Hekkelman
4e128885d6 Added missing include 2025-10-29 18:21:47 +01:00
Maarten L. Hekkelman
b37054228d Added smalles sphere function 2025-10-29 17:09:13 +01:00
Maarten L. Hekkelman
815b33fee0 Matrix determinant for 4x4 2025-10-28 15:55:05 +01:00
Maarten L. Hekkelman
97f55c1639 Version bump 2025-10-22 10:05:55 +02:00
Maarten L. Hekkelman
89de73eb6f Added exists to compound_factory 2025-10-21 13:06:09 +02:00
Maarten L. Hekkelman
75f2ec3792 Remove warning 2025-10-13 14:22:57 +02:00
Maarten L. Hekkelman
f4d29e8da9 re-enable test to see if fast_float is required 2025-10-01 17:09:06 +02:00
Maarten L. Hekkelman
b97b2638b8 More supported float types 2025-10-01 17:08:33 +02:00
Maarten L. Hekkelman
ea8dea8cbd Merge branch 'develop' into cql 2025-10-01 16:46:27 +02:00
Maarten L. Hekkelman
bc0222dc0e attempt two 2025-10-01 16:42:05 +02:00
Maarten L. Hekkelman
10a6b5649b Using fast float instead of home baked version 2025-10-01 16:14:07 +02:00
Maarten L. Hekkelman
ff2a233156 stap 1, een test 2025-10-01 15:48:16 +02:00
Maarten L. Hekkelman
743e2800f8 update changelog 2025-09-30 11:21:03 +02:00
Maarten L. Hekkelman
32ac884127 Do not stop on empty audit_conform fields 2025-09-29 10:34:29 +02:00
Maarten L. Hekkelman
bec69f7d07 Fix reconstruction when entity ID's are missing 2025-09-29 09:59:04 +02:00
Maarten L. Hekkelman
a99215ad6a version bump 2025-09-24 16:45:05 +02:00
Maarten L. Hekkelman
e3d2cbd044 Lower required catch2 version 2025-09-24 16:42:45 +02:00
Maarten L. Hekkelman
5fc965789d messages updated 2025-09-24 15:11:33 +02:00
Maarten L. Hekkelman
b4596902aa Add compile features for Catch2, required on Windows 2025-09-24 14:15:19 +02:00
Maarten L. Hekkelman
cbf8b52f62 Update catch2 usage 2025-09-24 13:25:56 +02:00
Maarten L. Hekkelman
4e0fa1c916 No complete jthread on macOS/CLang 2025-09-24 13:00:50 +02:00
Maarten L. Hekkelman
95b007d38f Merge branch 'trunk' into develop 2025-09-24 11:38:01 +02:00
Maarten L. Hekkelman
b66f7a30ce Progress bar using WriteConsole on Windows 2025-09-24 11:36:37 +02:00
Maarten L. Hekkelman
ec7287c503 remove warning 2025-09-24 11:32:12 +02:00
Maarten L. Hekkelman
a41c591f0c Restore order of imports, avoid reordering by clang-format 2025-09-24 10:51:53 +02:00
Maarten L. Hekkelman
3a6527cdd5 yet another update on progress bar 2025-09-24 10:23:58 +02:00
Maarten L. Hekkelman
5f21a094c0 added flush to progress bar 2025-09-24 10:14:03 +02:00
Maarten L. Hekkelman
2203a1855d improved progress bar 2025-09-24 09:49:28 +02:00
Maarten L. Hekkelman
7edd2ecc18 new progress bar 2025-09-23 15:53:55 +02:00
Maarten L. Hekkelman
1d2953c850 Fix reconstruction, version bump 2025-09-22 13:51:18 +02:00
Maarten L. Hekkelman
dbf59ce622 reconstruct better when entity ID's are missing 2025-09-22 12:59:16 +02:00
Maarten L. Hekkelman
1596db8499 Merge branch 'develop' of github.com:pdb-redo/libcifpp into develop 2025-09-16 13:29:57 +02:00
Maarten L. Hekkelman
bd1fb5c5cd Added model::create_link 2025-09-16 13:29:51 +02:00
Maarten L. Hekkelman
da500025c3 swap atoms should swap type_symbol as well 2025-09-10 17:16:22 +02:00
Maarten L. Hekkelman
60eeea9a93 more resilient loading of dictionary data 2025-09-10 15:06:06 +02:00
Maarten L. Hekkelman
1220f01f1e change location of mmcif_ma.dic 2025-09-10 14:22:52 +02:00
Maarten L. Hekkelman
ad0a34fe98 Update changelog 2025-09-10 12:49:43 +02:00
Maarten L. Hekkelman
a7425ff1a0 Optimise validation code 2025-09-10 12:40:01 +02:00
Maarten L. Hekkelman
1ce25f86ae better check anisotrop atoms 2025-09-10 12:19:56 +02:00
Maarten L. Hekkelman
cd93f72b96 Merge branch 'develop' into better-create-entity-ids 2025-09-10 09:28:50 +02:00
Maarten L. Hekkelman
23500bd303 Fix reconstruction of really bare files 2025-09-10 09:25:49 +02:00
Maarten L. Hekkelman
14b4753b4f test for null 2025-09-09 19:55:13 +02:00
Maarten L. Hekkelman
4c37d5db5f use rowhandles 2025-09-09 19:52:39 +02:00
Maarten L. Hekkelman
fc2c4b4172 fix map::at in reconstruct sequences 2025-09-09 10:51:31 +02:00
Maarten L. Hekkelman
3ac64de16b Version bump, update changelog 2025-09-03 14:55:20 +02:00
Maarten L. Hekkelman
45eecd72b0 using pkg-config, when available 2025-09-03 14:24:29 +02:00
Maarten L. Hekkelman
d1dd558cda as object lib 2025-09-03 14:00:22 +02:00
Maarten L. Hekkelman
d19e2c2196 Update pcre2s 2025-09-03 13:11:45 +02:00
Maarten L. Hekkelman
72c7aca074 revert to catch2 version 2, due to linker errors on Windows? 2025-09-03 13:04:13 +02:00
Maarten L. Hekkelman
683a1087d0 Install catch2 before testing 2025-09-03 12:28:06 +02:00
Maarten L. Hekkelman
35bc139deb Update pcre2s 2025-09-03 11:39:00 +02:00
Maarten L. Hekkelman
45ece2fa0d pcre2 is no longer a depends on Windows 2025-09-03 11:18:04 +02:00
Maarten L. Hekkelman
11c98f553f Update Catch2 to version 3
Updated pcre2s
2025-09-03 11:15:31 +02:00
Maarten L. Hekkelman
28aa9b1036 Using pcre2s 2025-09-03 11:07:02 +02:00
Maarten L. Hekkelman
d7b5c0a748 remove message 2025-09-02 15:21:04 +02:00
Maarten L. Hekkelman
065e7f5f18 added clang format file 2025-09-02 14:59:28 +02:00
Maarten L. Hekkelman
4b1623cfdc pcre2 again 2025-09-02 14:57:07 +02:00
Maarten L. Hekkelman
1de973ddcb Fix findpcre2 cmake file 2025-09-02 13:12:20 +02:00
Maarten L. Hekkelman
eecc801203 last remaining warning 2025-09-02 12:57:08 +02:00
Maarten L. Hekkelman
5c50154ea4 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2025-09-02 12:54:03 +02:00
Maarten L. Hekkelman
0fa3d6aa94 Removing warning using MSVC 2025-09-02 12:54:07 +02:00
Maarten L. Hekkelman
01f5242bfb Revert "Add formatting file"
This reverts commit af6d8d4f71.
2025-09-02 11:57:21 +02:00
Maarten L. Hekkelman
af6d8d4f71 Add formatting file 2025-09-02 11:47:50 +02:00
Maarten L. Hekkelman
fa8285fc0f use std::format anyway, even if __cpp_lib_format is not defined. 2025-09-02 10:23:00 +02:00
Maarten L. Hekkelman
2e7f6b8337 cross platform check for lib format 2025-09-02 10:03:36 +02:00
Maarten L. Hekkelman
a6a55020eb Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2025-09-01 10:54:09 +02:00
Maarten L. Hekkelman
0e84ea454d fmt fix 2025-09-01 10:53:29 +02:00
Maarten L. Hekkelman
f3bf211d45 pdb formatting 2025-09-01 09:34:07 +02:00
Maarten L. Hekkelman
f5ef44836c pdb formatting 2025-09-01 09:31:22 +02:00
Maarten L. Hekkelman
070124b6e1 fix cif2pdb 2025-08-29 14:00:27 +02:00
Maarten L. Hekkelman
c8a46fcdd9 Stop when element is missing in reading PDB input 2025-08-29 13:17:17 +02:00
Maarten L. Hekkelman
5306b59fd8 Do not write zero seq ID's in PDB files 2025-08-29 10:08:56 +02:00
Maarten L. Hekkelman
90c5df832a Check only first datablock 2025-08-29 09:49:27 +02:00
Maarten L. Hekkelman
2aa439d51f test for fmt 2025-08-28 08:48:20 +02:00
Maarten L. Hekkelman
ac2b68517c conditional fmt 2025-08-27 16:08:51 +02:00
Maarten L. Hekkelman
e56b568c42 use cif::format... sigh 2025-08-27 15:46:41 +02:00
Maarten L. Hekkelman
63c49b2e04 Fix writing pdb files 2025-08-27 15:20:20 +02:00
Maarten L. Hekkelman
559fd18a20 Fix std::format usage 2025-08-27 08:57:40 +02:00
Maarten L. Hekkelman
beb7585261 fix std::format usage 2025-08-27 08:24:07 +02:00
Maarten L. Hekkelman
8b0f92aa9a Merge remote-tracking branch 'github/using-fmt' into develop 2025-08-27 07:43:22 +02:00
Maarten L. Hekkelman
0d8beeae5b No longer needed 2025-08-26 16:04:31 +02:00
Maarten L. Hekkelman
e3da654e67 Speed up build when eigen3 is not available 2025-08-26 15:54:55 +02:00
Maarten L. Hekkelman
dc9e151d89 remove warning 2025-08-26 15:37:22 +02:00
Maarten L. Hekkelman
7cfaf051ba should now work on windows 2025-08-26 15:16:01 +02:00
Maarten L. Hekkelman
7920491309 hope to find pcre2.h 2025-08-26 14:07:47 +02:00
Maarten L. Hekkelman
0ee493a3fb implib? 2025-08-26 13:55:25 +02:00
Maarten L. Hekkelman
7e23bc0c0b finding pcre2 on windows 2025-08-26 13:46:03 +02:00
Maarten L. Hekkelman
579f859562 no find_packge for pcre2 2025-08-26 13:42:14 +02:00
Maarten L. Hekkelman
752938ca44 using find_package? 2025-08-26 13:38:37 +02:00
Maarten L. Hekkelman
fce58c02fe no pcre2grep please, no tests either 2025-08-26 13:34:43 +02:00
Maarten L. Hekkelman
924f7c1505 finding pcre2, yet another attempt 2025-08-26 13:31:12 +02:00
Maarten L. Hekkelman
8944906fd2 fix warning, pcre2 2025-08-26 12:41:08 +02:00
Maarten L. Hekkelman
cb02969604 Using std::format 2025-08-25 16:31:00 +02:00
Maarten L. Hekkelman
31090c6ec5 attempt 2 2025-08-25 11:25:10 +02:00
Maarten L. Hekkelman
9e30d2bc1a finding pcre2 2025-08-25 10:39:48 +02:00
Maarten L. Hekkelman
93d703f7a1 Do not buld pcre tests 2025-08-20 20:40:08 +02:00
Maarten L. Hekkelman
3c241048a5 do not install pcre 2025-08-20 17:02:40 +02:00
Maarten L. Hekkelman
2788536799 this should work 2025-08-20 16:58:52 +02:00
Maarten L. Hekkelman
314d435a18 Another way of importing pcre 2025-08-20 16:49:12 +02:00
Maarten L. Hekkelman
37edcd8666 Finding and optionally building pcre 2025-08-20 15:47:49 +02:00
Maarten L. Hekkelman
10e290fbdf pcre2 in github actions? 2025-08-20 14:40:50 +02:00
Maarten L. Hekkelman
58cda1241e cleanup 2025-08-20 13:41:45 +02:00
Maarten L. Hekkelman
3659aaabff remove unneeded allocations 2025-08-20 13:35:15 +02:00
Maarten L. Hekkelman
727a39cc54 Finishing up replacing boost with pcre 2025-08-20 13:28:24 +02:00
Maarten L. Hekkelman
fd9ccdfff9 Using pcre instead of boost::regex 2025-08-19 16:16:43 +02:00
Maarten L. Hekkelman
aabee270b3 update .gitignore 2025-08-19 14:28:51 +02:00
Maarten L. Hekkelman
647c58f8ec allow code to be built with older compilers... 2025-08-19 12:44:41 +02:00
Maarten L. Hekkelman
0b8024d19c Optimise query processing 2025-08-19 12:24:33 +02:00
Maarten L. Hekkelman
d59b0bf27f Remove wrong warnings 2025-08-13 11:30:53 +02:00
Maarten L. Hekkelman
398c16eac2 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2025-08-13 09:02:10 +02:00
Maarten L. Hekkelman
fa869bdc7d lightweight fixup 2025-08-13 09:01:50 +02:00
Maarten L. Hekkelman
c20d0d2a30 lightweight fixup 2025-08-12 16:45:19 +02:00
Maarten L. Hekkelman
000f2736c2 Merge branch 'develop' into clebreto-feature/enrich_structure_options 2025-08-11 10:13:20 +02:00
Maarten L. Hekkelman
cfcc81bb62 verbose messages 2025-08-11 10:13:07 +02:00
Maarten L. Hekkelman
82eae05868 changed b-factor options for structure loading 2025-06-11 14:17:50 +02:00
Maarten L. Hekkelman
e8fb53c49b Alternate implementation of structure_open_options 2025-06-11 13:35:58 +02:00
Maarten L. Hekkelman
604c97afe1 Merge branch 'develop' into clebreto-feature/enrich_structure_options 2025-06-11 11:43:01 +02:00
Maarten L. Hekkelman
7e60cdf272 remove redundant statement 2025-06-11 11:42:26 +02:00
Maarten L. Hekkelman
9ea7cfcc80 Remove test 2025-06-11 09:56:26 +02:00
Maarten L. Hekkelman
a7a4a16f79 remove debug code 2025-06-11 09:45:45 +02:00
Maarten L. Hekkelman
6717059934 Revert renaming compound_id to mon_id in residue 2025-06-11 09:41:49 +02:00
Maarten L. Hekkelman
714747c280 version bump 2025-06-11 09:32:36 +02:00
Maarten L. Hekkelman
81cd305c80 rename mm::polymer fields and methods to better match mmcif_pdbx naming.
fix building mm::structure using pdb_seq_num instead of auth_seq_num
2025-06-11 09:30:54 +02:00
Maarten L. Hekkelman
5de872bbb3 Version bump, update mmcif_pdbx.dic 2025-06-10 09:17:30 +02:00
Maarten L. Hekkelman
ce6a75a920 right... 2025-06-10 09:11:26 +02:00
Maarten L. Hekkelman
874a5cb2f2 missing code added 2025-06-02 15:09:49 +02:00
Maarten L. Hekkelman
6e2202d4f1 More verbose strip 2025-06-02 09:10:58 +02:00
Maarten L. Hekkelman
bcf33df701 Added strip, removed dangerous datablock::is_valid (non-const version) 2025-06-02 08:52:58 +02:00
Maarten L. Hekkelman
3bdcf21c69 Merge commit '4b36bdc' into develop 2025-05-29 16:14:15 +02:00
Maarten L. Hekkelman
4b36bdc58c work around incorrect mmcif_pdbx name 2025-05-29 16:13:28 +02:00
Maarten L. Hekkelman
6d9008ee8c Merge branch 'trunk' into develop 2025-05-29 15:15:41 +02:00
Maarten L. Hekkelman
ee93692707 comment formatting 2025-05-29 14:15:13 +02:00
Maarten L. Hekkelman
2bcc368bce reconstruct when audit_conform is missing 2025-05-29 14:07:45 +02:00
Maarten L. Hekkelman
6cc4467d53 options 2025-05-27 13:55:13 +02:00
Maarten L. Hekkelman
425f98dc07 No longer fail pdb conversion when missing compound info 2025-05-12 12:45:21 +02:00
Maarten L. Hekkelman
cf34a9f3ad Updated test file 2025-05-12 11:56:42 +02:00
Maarten L. Hekkelman
3b2f347428 Remove reconstruct test again 2025-05-12 11:46:16 +02:00
Maarten L. Hekkelman
bd82c3cc4f Only reconstruct when needed 2025-05-12 10:24:00 +02:00
Maarten L. Hekkelman
af319866c7 added get_atom_by_atom_id 2025-05-06 14:11:05 +02:00
Maarten L. Hekkelman
b6ab29398e Fix parsing PDB files 2025-04-15 09:56:51 +02:00
Maarten L. Hekkelman
a5bb1797c0 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2025-04-15 09:17:44 +02:00
Maarten L. Hekkelman
a9647671c4 entity_poly.type is now by default 'other' 2025-04-15 09:17:37 +02:00
Maarten L. Hekkelman
63f784e7da Reconstruct branches, somewhat 2025-04-14 12:44:27 +02:00
Maarten L. Hekkelman
5da3379e0b Don't reconstruct invalid files, non-polymer should not have auth_seq_id 2025-04-14 11:06:36 +02:00
Maarten L. Hekkelman
2f3514689d Default value for b_iso_or_equiv, better sorting of atoms 2025-04-09 15:12:11 +02:00
Maarten L. Hekkelman
89a3ea4e24 update mmcif_pdbx.dic 2025-04-09 15:11:37 +02:00
Maarten L. Hekkelman
467e9555f4 remove duplicate test 2025-04-09 13:11:05 +02:00
Maarten L. Hekkelman
5b32ca15f7 Fix cleanup_empty_categories 2025-04-09 13:06:45 +02:00
Maarten L. Hekkelman
92402817d2 Merge branch 'trunk' into develop 2025-04-09 13:05:12 +02:00
Maarten L. Hekkelman
60ad3031d5 remove warning, hopefully fix docs action 2025-04-09 10:15:15 +02:00
Maarten L. Hekkelman
724cddb481 Merged develop manually 2025-04-09 09:20:53 +02:00
Maarten L. Hekkelman
41c0521480 Merge branch 'feature/enrich_structure_options' of github.com:clebreto/libcifpp into clebreto-feature/enrich_structure_options 2025-04-02 13:58:01 +02:00
Maarten L. Hekkelman
85ac2b1f63 Fix parsing bogus PDB files (with REMARK 001) 2025-04-02 11:29:25 +02:00
Maarten L. Hekkelman
13a97353aa fix test 2025-04-02 11:18:20 +02:00
Maarten L. Hekkelman
f49c166b9b Fix for MSVC 2025-03-31 12:52:26 +02:00
Maarten L. Hekkelman
fffa326f80 validator for multiple dictionaries 2025-03-31 12:38:28 +02:00
Maarten L. Hekkelman
da446adbb2 non compiling code 2025-03-28 16:15:58 +01:00
Maarten L. Hekkelman
617fec5c69 Close, but no cigar 2025-03-28 15:06:02 +01:00
Maarten L. Hekkelman
cfefa69c9c Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2025-03-27 11:40:42 +01:00
Maarten L. Hekkelman
00638a9e23 Fix loading coordinates from converted restraint files 2025-03-27 11:36:46 +01:00
Maarten L. Hekkelman
e241e03a15 Fix loading coordinates from converted restraint files 2025-03-27 11:26:30 +01:00
Maarten L. Hekkelman
b1faa3bd48 version bump 2025-03-10 13:58:34 +01:00
Maarten L. Hekkelman
6d28f487ec fix readme 2025-03-10 13:56:33 +01:00
Maarten L. Hekkelman
b231f92f76 Construct nonpoly_scheme 2025-03-10 13:55:25 +01:00
LE BRETON Come
7d33d56c0e Update docs 2025-03-07 15:59:19 +01:00
LE BRETON Come
f86f34e5e1 WIP Enrich StructureOpenOptions 2025-03-07 15:54:30 +01:00
Maarten L. Hekkelman
5e7b52b7de loading dictionaries 2025-02-17 12:57:08 +01:00
Maarten L. Hekkelman
0459d344e9 Fixes in error reporting 2025-02-17 12:32:14 +01:00
Maarten L. Hekkelman
71e525cd76 Refactored dictionary loading 2025-02-17 09:40:36 +01:00
Maarten L. Hekkelman
1480706d8b change for mingw 2025-02-05 16:05:08 +01:00
Maarten L. Hekkelman
96655b6d80 revert 2025-01-29 17:12:59 +01:00
Maarten L. Hekkelman
eed2aa0d0d better way to include eigen3 2025-01-29 17:01:44 +01:00
Maarten L. Hekkelman
de0c078a23 Update changelog 2025-01-29 16:08:55 +01:00
Maarten L. Hekkelman
321e995a54 Add some comments 2025-01-29 16:07:03 +01:00
Maarten L. Hekkelman
da9f1f81d7 Fix eigen3 problems on github? 2025-01-29 15:57:16 +01:00
Maarten L. Hekkelman
c6d4477a24 Using eigen quaternions 2025-01-29 15:37:57 +01:00
Maarten L. Hekkelman
523b073cdc own eigen 2025-01-29 14:25:28 +01:00
Maarten L. Hekkelman
2591bee21b test for github actions, own eigen library 2025-01-29 13:54:20 +01:00
Maarten L. Hekkelman
d881ca00c9 cleanup 2025-01-29 13:54:00 +01:00
Maarten L. Hekkelman
329dbff474 replace deprecated call 2025-01-29 13:42:37 +01:00
Maarten L. Hekkelman
d84a9fe6dc Deal with missing entity.type 2025-01-29 13:41:11 +01:00
Maarten L. Hekkelman
dcd812a996 Optimise text comparison routines 2025-01-29 11:39:08 +01:00
Maarten L. Hekkelman
6750194d9b Fixes for dictionary loading 2025-01-28 16:03:46 +01:00
Maarten L. Hekkelman
05865c3d9b Fixes for dictionary loading 2025-01-28 15:51:40 +01:00
Maarten L. Hekkelman
21e224bf00 Merge branch 'trunk' into develop 2025-01-15 14:25:57 +01:00
Maarten L. Hekkelman
f401d3fd0c Add way to load dictionary extensions along with main dictionary 2025-01-15 14:25:29 +01:00
Maarten L. Hekkelman
fd436871f1 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2024-12-24 12:55:48 +01:00
Maarten L. Hekkelman
fcf7864a4b Remove dead code 2024-12-24 12:55:44 +01:00
Maarten L. Hekkelman
c4003956d9 Only build tests when not included as subdirectory 2024-12-24 11:32:07 +01:00
Maarten L. Hekkelman
de622b6162 cmake policy for Boost 2024-12-16 08:54:47 +01:00
Maarten L. Hekkelman
41b4bdb90e update changelog 2024-11-19 08:57:00 +01:00
Maarten L. Hekkelman
af73cb3ad3 Version bump 2024-11-18 08:09:01 +01:00
Maarten L. Hekkelman
240b631963 Merge branch 'trunk' into develop 2024-11-18 08:08:30 +01:00
Maarten L. Hekkelman
c2a747af8c Fix remark 3 parser 2024-11-18 08:07:15 +01:00
Maarten L. Hekkelman
5959647826 three way comparison for point 2024-11-04 09:25:51 +01:00
Maarten L. Hekkelman
9542e211bc avoid cmake errors in windows? 2024-10-15 08:56:34 +02:00
Maarten L. Hekkelman
d07890db7f Set target property CIFPP_DATA_DIR 2024-10-07 11:24:10 +02:00
Maarten L. Hekkelman
ca241bd8f2 Fix linking to std::atomic 2024-09-23 09:14:04 +02:00
Charles Beattie
e444092711 and_condition_impl::combine_equal - Remove UB container modification. (#63)
* and_condition_impl::combine_equal - Remove UB container modification.

The container is modified while iterating it.
Switched to indexed based iteration to avoid UB.

* Update condition.cpp

Sorry missed this line.
2024-09-13 17:08:18 +02:00
Maarten L. Hekkelman
a96b1e07f4 Merge remote-tracking branch 'origin/develop' into trunk 2024-09-10 11:34:00 +02:00
Maarten L. Hekkelman
f48c31bcb5 Proposed fix for comparing floating points (needed on macOS) (#62)
* Proposed fix for comparing floating points (needed on macOS)

* Work around weird behaviour in testing floats using catch2

* Better from_chars implementation
2024-09-10 11:31:59 +02:00
Maarten L. Hekkelman
d85ab93a35 Fix swap for incomplete rows 2024-08-27 15:48:20 +02:00
Maarten L. Hekkelman
a6804b5aca test on macOS 2024-07-22 12:31:17 +02:00
Maarten L. Hekkelman
e4dcb211ee modify tests for macOS? 2024-07-22 11:41:05 +02:00
Maarten L. Hekkelman
a5a5f47f7a Add missing file 2024-07-16 12:44:43 +02:00
Maarten L. Hekkelman
25c900c387 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2024-07-16 12:08:59 +02:00
Maarten L. Hekkelman
4e95f7b83e restored find atomic 2024-07-16 12:08:43 +02:00
IdaDeVries
66ad3b0cee Use c++20 2024-07-15 09:54:17 +02:00
Maarten L. Hekkelman
e853cd1ca0 set_validator changes 2024-07-01 12:27:31 +02:00
Maarten L. Hekkelman
b9544033c6 Fix reconstruction 2024-07-01 12:16:11 +02:00
Maarten L. Hekkelman
17840cb8cc New dictionary 2024-07-01 12:15:39 +02:00
Maarten L. Hekkelman
f85b6d94b8 Attempt to get code compiling on msvc/macos 2024-06-12 10:25:23 +02:00
Maarten L. Hekkelman
6c32a9f198 Add operator != for optional type 2024-06-12 09:56:04 +02:00
Maarten L. Hekkelman
cefeebbfb8 fix rename in comment 2024-06-12 09:55:46 +02:00
Maarten L. Hekkelman
941a015b43 Do not stop when compound is missing 2024-05-15 09:05:01 +02:00
Maarten L. Hekkelman
ae0e9fbe77 fix cif::item constructor
fix ordering atoms
2024-04-29 12:06:50 +02:00
Maarten L. Hekkelman
3484c3dd2e Merge branch 'develop' of github.com:pdb-redo/libcifpp into develop 2024-04-29 08:34:19 +02:00
Maarten L. Hekkelman
5be8f749bd Fix validation error 2024-04-29 08:34:15 +02:00
Maarten L. Hekkelman
cf484707a0 Fix validation error 2024-04-29 08:30:11 +02:00
Maarten L. Hekkelman
f12e529c0b default alt id for new atoms 2024-04-24 13:21:32 +02:00
Maarten L. Hekkelman
01b90a2ba5 set occupancy of newly added atom 2024-04-23 15:42:55 +02:00
Maarten L. Hekkelman
cd1e952812 residue::create_new_atom 2024-04-23 13:50:25 +02:00
Maarten L. Hekkelman
996f1e4277 check existence of alternate for specific atoms 2024-04-17 16:49:37 +02:00
Maarten L. Hekkelman
2d84694f86 remove debug statement 2024-04-17 14:14:19 +02:00
Maarten L. Hekkelman
65718c64cc fix updating index for change in value in category 2024-04-17 12:54:58 +02:00
Maarten L. Hekkelman
6e30365f55 endl 2024-04-16 11:15:22 +02:00
Maarten L. Hekkelman
c0555b6d86 Export CIFPP_SHARE_DIR variable when included as sub directory 2024-04-10 11:38:23 +02:00
Maarten L. Hekkelman
1ff9b6c071 fix makefile 2024-04-08 10:32:35 +02:00
Maarten L. Hekkelman
c1a51a1dfa fix makefile 2024-04-08 10:31:08 +02:00
Maarten L. Hekkelman
bfbbeb90e7 fix makefile to create shared libraries when requested to do so
example should work when there is no components.cif file yet
2024-04-03 17:21:48 +02:00
Maarten L. Hekkelman
588e075325 Fix query generation when using constant numeric values
replace size_t with std::size_t to silence warnings
2024-04-03 14:01:51 +02:00
Maarten L. Hekkelman
66717fee68 take DESTDIR into account 2024-03-28 20:31:19 +01:00
Maarten L. Hekkelman
844f52c955 Merge branch 'develop' into trunk 2024-03-28 19:04:39 +01:00
Maarten L. Hekkelman
e679cd05c1 update changelog 2024-03-27 16:19:54 +01:00
Maarten L. Hekkelman
1e72ce4830 add missing header 2024-03-19 19:53:00 +01:00
Maarten L. Hekkelman
3bb21c5403 Try one more location to locate resources 2024-03-14 11:34:36 +01:00
Maarten L. Hekkelman
6d1be23ad0 oops 2024-03-14 11:12:39 +01:00
Maarten L. Hekkelman
0472b9a4a4 Merge branch 'trunk' into develop 2024-03-14 10:55:22 +01:00
Maarten L. Hekkelman
c9acff49f9 version bump, clean up makefile 2024-03-14 10:54:54 +01:00
Maarten L. Hekkelman
7cab560595 minimum requires cmake version 2024-03-14 07:37:50 +01:00
Maarten L. Hekkelman
ac98531a2f Fix writing exports file, version bump 2024-03-12 12:55:59 +01:00
Maarten L. Hekkelman
917e0ba79c Merge branch 'develop' of github.com:pdb-redo/libcifpp into develop 2024-03-12 10:12:55 +01:00
Maarten L. Hekkelman
3ebceb7522 do not number non-polymer residues 2024-03-12 10:12:50 +01:00
Maarten L. Hekkelman
92bd52da12 get that code compiling 2024-03-09 14:13:37 +01:00
Maarten L. Hekkelman
fb56a9cd6e version bump 2024-03-09 14:03:45 +01:00
Maarten L. Hekkelman
a4680f7d38 error_code should be checked differently? 2024-03-09 13:54:53 +01:00
Maarten L. Hekkelman
da8a72a8aa Merge branch 'develop' into trunk 2024-03-06 16:50:33 +01:00
Maarten L. Hekkelman
ac497932b5 Fix loading embedded restraint data 2024-03-06 16:50:12 +01:00
Maarten L. Hekkelman
9927b5061a Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2024-03-06 16:21:15 +01:00
Maarten L. Hekkelman
cedaab9642 load compound info from appended restraint info 2024-03-06 16:20:43 +01:00
Maarten L. Hekkelman
50bf2145ec Merge branch 'develop' into trunk 2024-03-06 15:54:56 +01:00
Maarten L. Hekkelman
dc77729f50 update changelog 2024-03-06 15:48:17 +01:00
Maarten L. Hekkelman
e3330d667a formatting 2024-03-06 13:22:00 +01:00
Maarten L. Hekkelman
9822f397a1 update libcifpp makefile 2024-03-06 12:58:50 +01:00
Maarten L. Hekkelman
a3b5ce9959 loading extra compound info changed
cif::file constructors
2024-03-06 12:33:58 +01:00
Maarten L. Hekkelman
9eb06e929a fixes in iterators 2024-03-05 15:52:36 +01:00
Maarten L. Hekkelman
629e06d647 write uncoloured text 2024-03-05 15:22:05 +01:00
Maarten L. Hekkelman
51ccb92184 small changes in constructors 2024-03-05 13:41:05 +01:00
Maarten L. Hekkelman
3cd27f13fd calculate formula_weight when missing 2024-03-05 13:02:31 +01:00
Maarten L. Hekkelman
ae668530c0 do not install files if they are not downloaded 2024-03-05 11:40:33 +01:00
Maarten L. Hekkelman
4a8b1c056c do not install files if they are not downloaded 2024-03-05 11:39:45 +01:00
Maarten L. Hekkelman
d7a5e598bc Merge branch 'develop' of github.com:pdb-redo/libcifpp into develop 2024-03-05 11:33:25 +01:00
Maarten L. Hekkelman
3f1ee32cc6 better trim 2024-03-05 11:33:19 +01:00
Maarten L. Hekkelman
725d6ead98 stupid typo 2024-02-29 14:56:45 +01:00
Maarten L. Hekkelman
baf70579de version bump 2024-02-27 15:23:51 +01:00
Maarten L. Hekkelman
cd28ab58a3 typo in error message 2024-02-27 14:40:56 +01:00
Maarten L. Hekkelman
a78fa0a81d reconstruction fixes 2024-02-27 13:36:45 +01:00
Maarten L. Hekkelman
82130be5f5 oops 2024-02-27 13:29:38 +01:00
Maarten L. Hekkelman
510ce62dfb reconstruction fixes 2024-02-27 13:24:53 +01:00
Maarten L. Hekkelman
93375a5087 Develop (#54)
* - renamed exists to contains
- fix compare for ints where item is empty

* - checking and optionally dropping ndb_poly_seq_scheme
- fix in iterator_proxy

* formatting data in reconstruction

* Version bump

* Attempt to get code compiling on macOS

* attempt 2 to build on macOS

* Added remove column

* Added rename_column
Added item_alias
Rename columns in reconstruct

* macOS...

* Fixed serious bug in emplace of both datablock and file.

* renaming field and column to item

* replace tag with item or item_name

* Fix validate pdbx

* version bump

* atom_site_anisotrop check

* - changed compound::is_known_peptide/is_know_base
- Add audit_conform only if file is really valid
- Added reconstruction code for PDBx

* pdb2cif work

* gcc diagnostics and clipper

* Fixing pdb2cif, and sequence checking

* work around bug in old gcc

* fix reconstruct sequence

* formatting

* some small optimisations

* Fix url in compound message

* Fix operator= for item_handle

* Fix operator= for item_handle

* new update_value in category

* test builds faster now

* Use Catch2 version 3 if installed

* catch22
2024-02-17 16:03:14 +01:00
Maarten L. Hekkelman
be738e7fb1 catch22 2024-02-17 15:37:53 +01:00
Maarten L. Hekkelman
9c78131df3 Use Catch2 version 3 if installed 2024-02-17 15:20:11 +01:00
Maarten L. Hekkelman
d94f6f4d19 test builds faster now 2024-02-05 13:01:28 +01:00
Maarten L. Hekkelman
9a3eced350 new update_value in category 2024-01-31 16:20:49 +01:00
Maarten L. Hekkelman
2fed7a76fb Fix operator= for item_handle 2024-01-30 16:51:12 +01:00
Maarten L. Hekkelman
f02e59df1b Fix operator= for item_handle 2024-01-30 16:40:02 +01:00
Maarten L. Hekkelman
04147a2fe9 Fix url in compound message 2024-01-30 16:39:32 +01:00
Maarten L. Hekkelman
0e83bc31dc some small optimisations 2024-01-30 16:29:22 +01:00
Maarten L. Hekkelman
75a5f7960f formatting 2024-01-30 11:12:29 +01:00
Maarten L. Hekkelman
3f93c27b07 fix reconstruct sequence 2024-01-29 17:40:24 +01:00
Maarten L. Hekkelman
ab781d4516 work around bug in old gcc 2024-01-29 16:33:53 +01:00
Maarten L. Hekkelman
446438bf8c Fixing pdb2cif, and sequence checking 2024-01-29 16:12:01 +01:00
Maarten L. Hekkelman
4e012cbd48 gcc diagnostics and clipper 2024-01-29 16:11:08 +01:00
Maarten L. Hekkelman
12ee4a792c pdb2cif work 2024-01-29 13:00:50 +01:00
Maarten L. Hekkelman
e59750386f - changed compound::is_known_peptide/is_know_base
- Add audit_conform only if file is really valid
- Added reconstruction code for PDBx
2024-01-24 16:14:08 +01:00
Maarten L. Hekkelman
4e19d54867 atom_site_anisotrop check 2024-01-23 14:08:25 +01:00
Maarten L. Hekkelman
db603e5438 version bump 2024-01-23 11:57:29 +01:00
Maarten L. Hekkelman
5320cb123a Fix validate pdbx 2024-01-23 11:57:21 +01:00
Maarten L. Hekkelman
30a2ebdbb4 Merge remote-tracking branch 'github/develop-cif2fasta' into develop 2024-01-23 11:42:12 +01:00
Maarten L. Hekkelman
a5d43998a3 replace tag with item or item_name 2024-01-23 11:41:13 +01:00
Maarten L. Hekkelman
2792caec70 renaming field and column to item 2024-01-23 11:23:20 +01:00
Maarten L. Hekkelman
fb2b1e984c Fixed serious bug in emplace of both datablock and file. 2024-01-22 16:04:57 +01:00
Maarten L. Hekkelman
13ab1caf95 macOS... 2024-01-22 15:33:43 +01:00
Maarten L. Hekkelman
5d4534fac4 Added rename_column
Added item_alias
Rename columns in reconstruct
2024-01-22 15:15:11 +01:00
Maarten L. Hekkelman
f450643861 Added remove column 2024-01-22 14:03:22 +01:00
Maarten L. Hekkelman
fc14a65511 attempt 2 to build on macOS 2024-01-22 13:08:25 +01:00
Maarten L. Hekkelman
bbd1e27c5e Attempt to get code compiling on macOS 2024-01-22 12:49:11 +01:00
Maarten L. Hekkelman
369a83b718 Version bump 2024-01-22 12:00:38 +01:00
Maarten L. Hekkelman
afc541b956 Merge remote-tracking branch 'github/develop-cif2fasta' into develop 2024-01-22 12:00:11 +01:00
Maarten L. Hekkelman
7e4d2ffb4d formatting data in reconstruction 2024-01-22 11:43:56 +01:00
Maarten L. Hekkelman
e09913a94f - checking and optionally dropping ndb_poly_seq_scheme
- fix in iterator_proxy
2024-01-22 11:18:00 +01:00
Maarten L. Hekkelman
b4d1c4cc04 - renamed exists to contains
- fix compare for ints where item is empty
2024-01-17 16:31:18 +01:00
Maarten L. Hekkelman
22537c0e7e Merge branch 'develop' into trunk 2024-01-09 16:45:46 +01:00
Maarten L. Hekkelman
39c0db8d6a Do not validate on copy category 2024-01-09 16:29:53 +01:00
Maarten L. Hekkelman
9db12761f7 Increase version to reflect changes 2024-01-04 13:14:36 +01:00
Maarten L. Hekkelman
0f8a7c4817 Fix design flaw in category_index (when moving categories) 2024-01-04 13:10:09 +01:00
Maarten L. Hekkelman
47e59a55c5 for compatibility with gcc 9.4 2024-01-03 12:35:22 +01:00
Maarten L. Hekkelman
b3496f4e5d fixes in pdbx validation, compound one letter code 2024-01-03 12:31:42 +01:00
Maarten L. Hekkelman
e866228afd In reconstruct, use embedded compound info 2024-01-03 09:43:13 +01:00
Maarten L. Hekkelman
4aeaa5251e revert change to string_view, not working on macOS 2024-01-03 09:26:32 +01:00
Maarten L. Hekkelman
b36988e64a update changelog 2024-01-03 09:18:08 +01:00
Maarten L. Hekkelman
393aefce8f small optimisation 2024-01-03 08:51:06 +01:00
Maarten L. Hekkelman
227ff1b8be sequence recovery and validation 2024-01-02 16:54:04 +01:00
Maarten L. Hekkelman
82086a93b0 PDBx validation and reconstruction code, take 1 2024-01-02 15:27:26 +01:00
Maarten L. Hekkelman
abd97cc1c9 Merge 2024-01-02 10:52:09 +01:00
Maarten L. Hekkelman
3315fae83e Merge branch 'cif2fasta-develop' into develop-cif2fasta 2024-01-02 10:51:11 +01:00
Maarten L. Hekkelman
d8c3c3f7f0 formatting 2024-01-02 10:44:17 +01:00
Maarten L. Hekkelman
23459879f8 export CIFPP_SHARE_DIR to parent scope in case we're included using sub_directory 2024-01-02 09:43:33 +01:00
Maarten L. Hekkelman
f1ca916d58 Merge remote-tracking branch 'origin/develop' into develop-cif2fasta 2023-12-27 14:10:25 +01:00
Maarten L. Hekkelman
6aae012ae5 Fix url in missing compound message 2023-12-27 09:55:35 +01:00
Maarten L. Hekkelman
516983427a Load inline compound info as restraints, not CCD info 2023-12-20 13:50:27 +01:00
Maarten L. Hekkelman
05d78c92f9 Update changelog 2023-12-20 11:52:49 +01:00
Maarten L. Hekkelman
dc57144472 delete garbage 2023-12-20 11:51:27 +01:00
Maarten L. Hekkelman
dd260ca45e Change order of categories in input and in output
- input matches order in file from now on
- output is ordered by parent/child relations
2023-12-20 11:51:15 +01:00
Maarten L. Hekkelman
3bc2fc4151 add missing get 2023-12-20 11:50:29 +01:00
Maarten L. Hekkelman
6c58eaa7e8 Update changelog 2023-12-13 16:28:37 +01:00
Maarten L. Hekkelman
e1a1c11a01 Add formula_weight to entity in pdb2cif 2023-12-13 16:27:41 +01:00
Maarten L. Hekkelman
95a6b4264d typo in doc 2023-12-12 09:50:04 +01:00
Maarten L. Hekkelman
4782a4e07d Merge branch 'develop' into trunk 2023-12-12 09:31:30 +01:00
Maarten L. Hekkelman
dbc14206dc New readme 2023-12-12 09:21:59 +01:00
Maarten L. Hekkelman
f4296d8858 docs ci 2023-12-12 08:26:44 +01:00
Maarten L. Hekkelman
75c4c2286d readme, docs ci 2023-12-12 08:08:40 +01:00
Maarten L. Hekkelman
b14237e8e6 Not nice 2023-12-05 16:01:20 +01:00
Maarten L. Hekkelman
df3263e4bd test 2023-12-05 15:57:51 +01:00
Maarten L. Hekkelman
ff7b413abf windows gunzip workaround 2023-12-05 15:43:52 +01:00
Maarten L. Hekkelman
07224779e6 find_program works a bit different than expected 2023-12-05 15:26:26 +01:00
Maarten L. Hekkelman
c031a3c24e Moved data file components.cif to rsrc 2023-12-05 11:53:39 +01:00
Maarten L. Hekkelman
1e74f7912c using eigen3 2023-12-05 08:59:10 +01:00
Maarten L. Hekkelman
d91f8d0876 Use fetchcontent for eigen, again 2023-12-04 18:59:03 +01:00
Maarten L. Hekkelman
43d4644fba fix license badge 2023-12-03 15:32:55 +01:00
Maarten L. Hekkelman
61a924d208 attempt to build documentation in github 2023-12-03 15:30:40 +01:00
Maarten L. Hekkelman
2692f2c1bf no update script for macOS
install with sudo in CI
2023-11-29 15:22:59 +01:00
Maarten L. Hekkelman
88b3c87bae fixes for automatic update of data 2023-11-29 14:37:28 +01:00
Maarten L. Hekkelman
7fd7dfd937 Work on WIN32 with or without CCP4 2023-11-29 13:11:13 +01:00
Maarten L. Hekkelman
9a76f02709 using gzip, now correct 2023-11-28 16:45:56 +01:00
Maarten L. Hekkelman
81cc1c73c1 use gzip if found 2023-11-28 16:24:18 +01:00
Maarten L. Hekkelman
eabda8f0f7 Fix downloading CCD file 2023-11-28 16:11:02 +01:00
Maarten L. Hekkelman
cb3917457b cleaning up ci file 2023-11-28 13:58:13 +01:00
Maarten L. Hekkelman
2c4f36a8f9 run CI on macOS 2023-11-28 13:47:53 +01:00
Maarten L. Hekkelman
d458be99bc finally read the documentation on cmake set 2023-11-28 11:41:52 +01:00
Maarten L. Hekkelman
a7f4531767 switched last test to Catch2 2023-11-28 11:14:00 +01:00
Maarten L. Hekkelman
08f5a1ad06 link to zlib::zlib
cache_dir may be undefined
2023-11-28 09:58:44 +01:00
Maarten L. Hekkelman
f2de24c851 for CI: install zlib in windows 2023-11-28 09:15:11 +01:00
Maarten L. Hekkelman
ed1eca8fb5 Build zlib if needed 2023-11-28 08:25:23 +01:00
Maarten L. Hekkelman
f792e6f13d correctly use boost::regex 2023-11-27 17:22:59 +01:00
Maarten L. Hekkelman
b1782b7a97 Merge branch 'with-catch2' into develop 2023-11-27 17:02:04 +01:00
Maarten L. Hekkelman
f3c3966e00 Merge branch 'with-catch2' of s4.hekkelman.net:git-repo/libcifpp into with-catch2 2023-11-27 17:01:43 +01:00
Maarten L. Hekkelman
8a9f72c96b boost regex standalone 2023-11-27 17:01:34 +01:00
Maarten L. Hekkelman
402d3a16ae option takes a default value, not type 2023-11-27 13:01:29 +01:00
Maarten L. Hekkelman
458e228b91 Start using postfix d for debug libraries. Rename cmake config files 2023-11-27 12:47:11 +01:00
Maarten L. Hekkelman
a817f09441 Stricter tests 2023-11-25 11:13:40 +01:00
Maarten L. Hekkelman
413ba85538 CI added 2023-11-23 09:42:12 +01:00
Maarten L. Hekkelman
c1807bf1c3 catch2 work 2023-11-23 09:41:49 +01:00
Maarten L. Hekkelman
09dd65499a Change testing framework to Catch2 2023-11-22 17:06:07 +01:00
Maarten L. Hekkelman
640552abad Remove dependency on libzeep 2023-11-22 14:48:23 +01:00
Maarten L. Hekkelman
faee1848e5 cleaner makefile 2023-11-22 14:35:32 +01:00
Maarten L. Hekkelman
c7a217fea7 cleaner makefile 2023-11-22 14:32:33 +01:00
Maarten L. Hekkelman
e4ae19a625 cleaner makefile 2023-11-22 14:30:21 +01:00
Maarten L. Hekkelman
24fe6ee583 Unused comparison result... tssk 2023-11-22 14:15:51 +01:00
Maarten L. Hekkelman
9cba754eb6 Unused comparison result... tssk 2023-11-22 14:12:45 +01:00
Maarten L. Hekkelman
9e6bb5e844 cleaner makefile 2023-11-22 14:08:06 +01:00
Maarten L. Hekkelman
a74ee255fb More info on missing compounds 2023-11-22 14:07:53 +01:00
Maarten L. Hekkelman
b9fc0c0365 Using FetchContent 2023-11-22 10:26:21 +01:00
Maarten L. Hekkelman
2ce16c74f7 Updating CMakeLists.txt 2023-11-22 09:49:09 +01:00
Maarten L. Hekkelman
6d0ea5c6f9 Drop using CCP4 monomers library
version bump
2023-11-21 14:48:51 +01:00
Maarten L. Hekkelman
b83ef112a9 Fix in structure::change_residue 2023-11-13 10:50:27 +01:00
Maarten L. Hekkelman
dd9110a3a8 Add option to not write data files 2023-11-07 16:48:44 +01:00
Maarten L. Hekkelman
88c23e1b0f Add option to not write data files 2023-11-07 16:38:47 +01:00
Maarten L. Hekkelman
dbe40e3ad6 No longer use mon_lib_list as check for existence of compound 2023-11-06 08:39:17 +01:00
Maarten L. Hekkelman
958d3b05c8 Correctly import Eigen3 2023-11-06 08:21:59 +01:00
Maarten L. Hekkelman
4e7b0baafb version bump 2023-11-02 14:29:05 +01:00
Maarten L. Hekkelman
b3dc38f2d8 update for WIN32 2023-11-02 14:12:17 +01:00
Maarten L. Hekkelman
6044d3dce3 Added cif::cell::get_volume() 2023-10-19 11:58:21 +02:00
Maarten L. Hekkelman
29446f2122 new cif::item constructors
version bump
2023-10-19 09:51:10 +02:00
Maarten L. Hekkelman
abb8673549 Better support for older cmake versions 2023-10-17 15:24:21 +02:00
Maarten L. Hekkelman
ffc8f9dcdf Better support for older cmake versions 2023-10-17 15:22:35 +02:00
Maarten L. Hekkelman
288b2bb720 update changelog 2023-10-17 12:23:59 +02:00
Maarten L. Hekkelman
fb3b7bda68 made data dir options more visible in cmake config
better error reporting in file::load
2023-10-10 13:39:13 +02:00
Maarten L. Hekkelman
6d5efe1cbd Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2023-09-26 14:40:44 +02:00
Maarten L. Hekkelman
1ceec22184 Better conversion from string to int 2023-09-26 14:40:38 +02:00
Maarten L. Hekkelman
951ff9b953 Better conversion from string to int 2023-09-26 14:39:26 +02:00
Maarten L. Hekkelman
641f06a7e7 sqrt is not constexpr on macOS 2023-09-22 09:37:15 +02:00
Maarten L. Hekkelman
915ba4ac21 describe download CCD 2023-09-18 10:49:08 +02:00
Maarten L. Hekkelman
824637d83f Update README.md, add link to documentation 2023-09-15 08:56:34 +02:00
Maarten L. Hekkelman
0871406fe3 Eigen dependency removed for clients
Typos fixed
Version bump
2023-09-14 16:03:00 +02:00
Maarten L. Hekkelman
1ad7e47b2e version bump, changelog 2023-09-14 12:53:07 +02:00
Maarten L. Hekkelman
f72a2c69d0 rename doxygen target 2023-09-14 11:14:38 +02:00
Maarten L. Hekkelman
84d9275cb8 update versionstring 2023-09-14 11:12:49 +02:00
Maarten L. Hekkelman
1b7c387c8b reintroduced get_terminal_width as an exported function 2023-09-14 09:52:52 +02:00
Maarten L. Hekkelman
2f1adbd22c clean up 2023-09-14 09:45:13 +02:00
Maarten L. Hekkelman
65031523a6 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2023-09-14 09:15:07 +02:00
Maarten L. Hekkelman
02cc0fa0f6 Using CTest
Create docs/xml directory
2023-09-14 09:13:16 +02:00
Maarten L. Hekkelman
1e1afa023f Using CTest
Create docs/xml directory
2023-09-14 09:12:28 +02:00
Maarten L. Hekkelman
41f343c2cd Create the docs/xml directory in configure step 2023-09-13 17:01:43 +02:00
Maarten L. Hekkelman
a73c4deaca Revert changes in CMakeLists.txt for docs 2023-09-13 16:45:15 +02:00
Maarten L. Hekkelman
01a21aebc4 Fix install rules 2023-09-13 16:16:16 +02:00
Maarten L. Hekkelman
49434043f2 typo 2023-09-13 16:04:30 +02:00
Maarten L. Hekkelman
2e23877912 cleaning up a readthedocs.yaml file 2023-09-13 15:23:35 +02:00
Maarten L. Hekkelman
b737dd7df4 Even more documentation 2023-09-13 14:59:57 +02:00
Maarten L. Hekkelman
137ffaf768 Merge remote-tracking branch 'github/new-docs' into develop 2023-09-13 12:15:13 +02:00
Maarten L. Hekkelman
747c6d30d2 Added better support for std::optional in conditions 2023-09-13 12:14:04 +02:00
Maarten L. Hekkelman
4585968b11 fix point_type 2023-09-13 10:23:31 +02:00
Maarten L. Hekkelman
84af564aee More documentation
Version bump
2023-09-13 10:22:28 +02:00
Maarten L. Hekkelman
86d957675e documented last code 2023-09-12 10:25:18 +02:00
Maarten L. Hekkelman
bc33e608db better docs 2023-09-12 10:04:34 +02:00
Maarten L. Hekkelman
0b5d28338e replacing std::endl where appropriate
more docs
2023-09-12 09:06:45 +02:00
Maarten L. Hekkelman
bdbf22e705 more docs 2023-09-11 15:12:37 +02:00
Maarten L. Hekkelman
160f6016ee more docs 2023-09-11 14:31:13 +02:00
Maarten L. Hekkelman
0855965edf Documenting more
Fixed colouring output manipulators
2023-09-11 12:37:51 +02:00
Maarten L. Hekkelman
fe3cbdab78 refactoring PDB interface 2023-09-11 08:37:18 +02:00
Maarten L. Hekkelman
a8c25f910d new colouring of output 2023-09-08 10:22:08 +02:00
Maarten L. Hekkelman
cb82ec9b01 new colouring of output, first attempt 2023-09-08 09:23:01 +02:00
Maarten L. Hekkelman
e84282cb9a documenting symmetry and text 2023-09-06 17:01:53 +02:00
Maarten L. Hekkelman
8b2e02e1b0 documented row 2023-09-06 14:29:14 +02:00
Maarten L. Hekkelman
1addd2be89 documented point 2023-09-06 13:49:13 +02:00
Maarten L. Hekkelman
2aebfc29ac documenting parser 2023-09-06 11:22:17 +02:00
Maarten L. Hekkelman
26a5410b38 documenting model 2023-09-06 10:59:48 +02:00
Maarten L. Hekkelman
f44e6d0948 backup of documentation 2023-09-05 16:43:24 +02:00
Maarten L. Hekkelman
d496ebf6dd backup of documentation 2023-09-05 15:40:53 +02:00
Maarten L. Hekkelman
1719ed6979 backup of documentation 2023-09-05 15:07:32 +02:00
Maarten L. Hekkelman
821895bb1b backup of documentation 2023-09-05 13:28:09 +02:00
Maarten L. Hekkelman
3f437277d1 accidentally deleted 2023-09-05 11:00:14 +02:00
Maarten L. Hekkelman
39fc56084a documentation backup 2023-09-05 10:42:11 +02:00
Maarten L. Hekkelman
e2fca07fad documenting more 2023-09-04 16:04:41 +02:00
Maarten L. Hekkelman
ec0d75ce95 documenting more 2023-09-04 13:41:21 +02:00
Maarten L. Hekkelman
877a64adaa documenting more 2023-09-04 11:28:33 +02:00
Maarten L. Hekkelman
0fcf9ed5ad documenting more 2023-09-04 09:44:36 +02:00
Maarten L. Hekkelman
c99de817fa documentation 2023-09-01 21:52:46 +02:00
Maarten L. Hekkelman
600c86a185 attempt to get some documentation 2023-09-01 17:12:54 +02:00
Maarten L. Hekkelman
1ae755b0a5 Start adding documentation 2023-08-31 14:31:41 +02:00
Maarten L. Hekkelman
7186057dd3 version bump 2023-08-22 13:45:08 +02:00
Maarten L. Hekkelman
768fec9c58 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2023-08-22 13:36:52 +02:00
Maarten L. Hekkelman
7197dd877b remove pkgconfig support 2023-08-22 13:36:43 +02:00
Maarten L. Hekkelman
b7aa7eac9f remove pkgconfig support 2023-08-22 13:33:34 +02:00
Maarten L. Hekkelman
818dc2f952 Remove Dart 2023-08-22 13:32:11 +02:00
Maarten L. Hekkelman
9dc5d11829 newer version string code, this should be final 2023-08-16 11:26:20 +02:00
Maarten L. Hekkelman
8565e1b408 Better version string 2023-08-15 13:27:10 +02:00
Maarten L. Hekkelman
bfc7133786 fix config
better version string implementation
2023-08-15 09:49:04 +02:00
Maarten L. Hekkelman
15a49f1bb4 Fix uncompressing concatenated gzip files 2023-08-04 09:51:11 +02:00
Maarten L. Hekkelman
db1dff16fe update changelog 2023-08-03 10:20:40 +02:00
Maarten L. Hekkelman
8d7d9d3a31 Fix for PDB files that do not terminate their last line with a new line character 2023-08-03 10:19:12 +02:00
Maarten L. Hekkelman
078bf8a559 stricter code 2023-07-31 10:48:47 +02:00
Maarten L. Hekkelman
1f314a5e9b removing compiler warnings on MSVC 2023-07-19 15:49:26 +02:00
Maarten L. Hekkelman
0adb50ac01 Add dependency on Eigen3 in config 2023-07-19 14:21:43 +02:00
Maarten L. Hekkelman
d91707cd06 Link to Eigen3 2023-07-19 14:11:22 +02:00
Maarten L. Hekkelman
c0e7ee4eeb small stuff 2023-07-18 15:25:44 +02:00
Pino Toscano
c143a7223e build: fix installation of cron script on GNU/Hurd (#46)
Use the same Linux paths, as the cron implementations available on
the Hurd as usually the same as Linux.
2023-07-16 12:01:17 +02:00
Maarten L. Hekkelman
2c951ba146 prevent downloading components.cif if it already exists 2023-06-20 13:47:13 +02:00
Maarten L. Hekkelman
660aadcd9c conditional include <compare> 2023-06-20 11:11:00 +02:00
Maarten L. Hekkelman
91d6adb980 Version bump 2023-06-20 09:48:27 +02:00
Maarten L. Hekkelman
b79ddd55c5 Update readme 2023-06-20 09:45:46 +02:00
Maarten L. Hekkelman
0ca645c634 right align number, if there was a dictionary loaded containing the required information 2023-06-20 08:48:40 +02:00
Maarten L. Hekkelman
676c0c8dc8 Added include compare for spaceship operator 2023-06-20 08:48:08 +02:00
Maarten L. Hekkelman
5c366ad9b1 - remove three_letter_code for CCP4 dictionaries
- fix test for equality of compound ID's, they are case insensitive you know
2023-06-13 11:42:39 +02:00
Maarten L. Hekkelman
836aed6ea9 Fix includes to contain <cstdint> 2023-06-08 13:15:43 +02:00
Maarten L. Hekkelman
50df250415 Merge branch 'develop' into trunk 2023-06-08 10:12:03 +02:00
Maarten L. Hekkelman
2409fc5b7b update changelog, version bump 2023-06-08 10:10:49 +02:00
Maarten L. Hekkelman
8a1184a24c Fix cif_id_for_number 2023-06-07 19:11:20 +02:00
Maarten L. Hekkelman
d2fbc54765 New cache location 2023-06-07 14:07:27 +02:00
Maarten L. Hekkelman
1bcb26ba75 extend validator
faster unique_id
2023-06-07 13:08:36 +02:00
Maarten L. Hekkelman
32f4749d84 faster cif parser 2023-06-07 11:19:35 +02:00
Maarten L. Hekkelman
da12be879a progress_bar consuming too much time 2023-06-07 09:15:17 +02:00
Maarten L. Hekkelman
94a38ad4e8 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2023-06-06 14:31:26 +02:00
Maarten L. Hekkelman
20ef79a172 for c++17, limited version of std::string_view 2023-06-06 14:30:11 +02:00
Maarten L. Hekkelman
92bf25476e Speed improvements 2023-06-06 14:12:21 +02:00
Maarten L. Hekkelman
b55e074dd7 reserve some token buffer space 2023-06-06 09:33:31 +02:00
Maarten L. Hekkelman
7b654a837d with reserved words automaton 2023-06-06 09:22:55 +02:00
Maarten L. Hekkelman
ae9d247d22 optimised the parser a bit 2023-06-05 13:43:31 +02:00
Maarten L. Hekkelman
16b7deafe8 Better is_unquoted_string test 2023-06-02 17:09:57 +02:00
Maarten L. Hekkelman
f2cfe28458 Update README 2023-05-31 15:56:50 +02:00
Maarten L. Hekkelman
2e8a52949e Update example and README 2023-05-31 15:54:53 +02:00
Maarten L. Hekkelman
441e142767 Update readme 2023-05-31 15:42:54 +02:00
Maarten L. Hekkelman
bf9bdd2aae Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2023-05-31 15:17:00 +02:00
Maarten L. Hekkelman
ce14593f0b improved loading resources from absolute path
better error reporting when loading dictionary
2023-05-31 15:16:10 +02:00
Maarten L. Hekkelman
1c02a451e1 improved has_atom_id
added couple of comparison operators to sym_op class
2023-05-16 13:55:07 +02:00
Maarten L. Hekkelman
448855a2d3 catch error in create entity for branch 2023-05-09 11:46:37 +02:00
Maarten L. Hekkelman
8ac8e89f2b Fix progress bar by removing conditional variable 2023-05-02 13:45:02 +02:00
Maarten L. Hekkelman
2281f59401 Remove struct_conn records as well in remove_branch 2023-05-02 13:44:36 +02:00
Maarten L. Hekkelman
4cb0673370 small change to matrix 2023-04-25 10:13:30 +02:00
Maarten L. Hekkelman
76c5706f7c moving to eigen3 eigensolver, fixing include and dependencies 2023-04-22 14:14:48 +02:00
Maarten L. Hekkelman
2bf4284ff4 cleanup 2023-04-21 14:52:12 +02:00
Maarten L. Hekkelman
d9e2fc97f3 Added missing spinner test 2023-04-21 14:50:22 +02:00
Maarten L. Hekkelman
85dfdf4174 Better progress bar 2023-04-21 14:49:54 +02:00
Martin Salinas
1bede3efda Removed unused argument warning (#36)
As argument rhs is not being used in that equals (should the equals function always return false), I added that flag so the compiler skips that warning.
2023-04-21 09:18:47 +02:00
Maarten L. Hekkelman
505f0fdd31 oops 2023-04-20 16:33:24 +02:00
Maarten L. Hekkelman
eed7ec3a4a Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2023-04-20 13:38:48 +02:00
Maarten L. Hekkelman
fdb057e0e2 for now, require eigen3
add inverse_symmetry_copy to crystal
2023-04-20 13:14:52 +02:00
Maarten L. Hekkelman
3fddd1a628 Using quaternions, when possible 2023-04-20 11:37:36 +02:00
Maarten L. Hekkelman
2440706b87 backup 2023-04-19 18:51:41 +02:00
Maarten L. Hekkelman
cf628fa95c backup 2023-04-19 18:36:33 +02:00
Maarten L. Hekkelman
2b0b47d20d Fix special case 2023-04-19 16:04:59 +02:00
Maarten L. Hekkelman
a8abf2804f attempt to use quaternions 2023-04-19 16:01:52 +02:00
Maarten L. Hekkelman
22d7757949 Introduced cif::crystal 2023-04-19 10:17:38 +02:00
Maarten L. Hekkelman
0b0d170c96 a bit of documentation 2023-04-19 09:57:49 +02:00
Maarten L. Hekkelman
1e8e9adf62 Merge branch 'trunk' into develop 2023-04-19 09:22:49 +02:00
Maarten L. Hekkelman
0f03fc31e0 added required include 2023-04-19 09:22:32 +02:00
Maarten L. Hekkelman
518432e0fb test data 2023-04-17 20:54:57 +02:00
Maarten L. Hekkelman
10ef3464ef Fix symmetry issue 2023-04-17 20:52:10 +02:00
Maarten L. Hekkelman
226abbd577 Merge branch 'develop' of s4.hekkelman.net:git-repo/libcifpp into develop 2023-04-17 18:56:46 +02:00
Maarten L. Hekkelman
8d66f42ab1 more test cases 2023-04-17 18:56:02 +02:00
Maarten L. Hekkelman
0f14d06f9a Added inverse symmetry operation 2023-04-14 19:38:39 +02:00
Maarten L. Hekkelman
c53be78496 symmetry fixes 2023-04-14 19:04:16 +02:00
Maarten L. Hekkelman
a38f31ce48 fix closest_symmetry_copy 2023-04-14 17:56:59 +02:00
Maarten L. Hekkelman
1258bd5047 eigen, fixed 2023-04-14 14:08:52 +02:00
Maarten L. Hekkelman
d25cbeb14c matrix eigen value work 2023-04-14 11:47:18 +02:00
Maarten L. Hekkelman
9b60a07fb6 calculating eigen values 2023-04-13 19:55:32 +02:00
Maarten L. Hekkelman
c0dd41ce50 added inverse symmetry operation 2023-04-13 15:49:15 +02:00
Maarten L. Hekkelman
4cff92bbcc symmetry operations now working correctly 2023-04-13 11:42:59 +02:00
Maarten L. Hekkelman
9aa8a223c7 symmetry work 2023-04-12 17:00:09 +02:00
Maarten L. Hekkelman
fb59adcfdd Fix symmetry rotational numbers 2023-04-12 10:59:23 +02:00
Maarten L. Hekkelman
4acca8a3e3 Merge branch 'trunk' into develop 2023-04-07 09:31:11 +02:00
Maarten L. Hekkelman
c1030d2b08 Merge branch 'MartinSalinas98-trunk' into trunk 2023-04-07 09:26:00 +02:00
Maarten L. Hekkelman
16a185c6c0 More include changes 2023-04-07 09:16:38 +02:00
Maarten L. Hekkelman
174e818bd0 Merge branch 'trunk' of github.com:MartinSalinas98/libcifpp into MartinSalinas98-trunk 2023-04-07 08:43:44 +02:00
Maarten L. Hekkelman
7f829bf5df Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2023-04-07 08:43:01 +02:00
Maarten L. Hekkelman
71908282bb merged from trunk 2023-04-07 08:42:46 +02:00
MartinSalinas98
db3ae446af Imported local files with relative path 2023-04-07 03:39:02 +02:00
Martin Salinas
bc7d291307 Merge branch 'PDB-REDO:trunk' into trunk 2023-04-07 03:33:57 +02:00
Maarten L. Hekkelman
cfd4702279 Fix memory leak 2023-04-05 20:46:18 +02:00
Maarten L. Hekkelman
54eefb546d Fix memory leak 2023-04-05 20:44:47 +02:00
Maarten L. Hekkelman
6af0d96a4e Fix memory leak in category 2023-04-05 20:28:47 +02:00
Maarten L. Hekkelman
eb50bee4a3 atom_type_traits changes 2023-04-04 19:19:30 +02:00
Maarten L. Hekkelman
b6143f3652 optimise load atom data 2023-03-30 20:49:03 +02:00
Maarten L. Hekkelman
348aa7afb6 fix test (use gTestDir) 2023-03-30 20:48:41 +02:00
Martin Salinas
66912b68cc Commented unused variable
```
/home/msalinas/Documents/standalone-installations/cifParsers/libcifpp/src/pdb/cif2pdb.cpp: In function ‘std::tuple<int, int> cif::pdb::WriteCoordinatesForModel(std::ostream&, const cif::datablock&, const std::map<std::__cxx11::basic_string<char>, std::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >&, std::set<std::__cxx11::basic_string<char> >&, int)’:
/home/msalinas/Documents/standalone-installations/cifParsers/libcifpp/src/pdb/cif2pdb.cpp:3362:15: warning: unused variable ‘pdbx_nonpoly_scheme’ [-Wunused-variable]
 3362 |         auto &pdbx_nonpoly_scheme = db["pdbx_nonpoly_scheme"];
      |         
```
This warning appeared while compiling the library. The use of that variable below has been commented, so I think it's appropiate to do the same thing with the unused variable.
2023-03-28 13:54:45 +02:00
Maarten L. Hekkelman
84dd218758 Merge branch 'MartinSalinas98-patch-1' into trunk 2023-03-28 11:33:05 +02:00
Maarten L. Hekkelman
106ae38976 Update readme 2023-03-28 11:32:11 +02:00
Maarten L. Hekkelman
f1a52245ea Merge branch 'patch-1' of github.com:MartinSalinas98/libcifpp into MartinSalinas98-patch-1 2023-03-28 11:27:48 +02:00
Maarten L. Hekkelman
cea38e5bb2 Merge branch 'trunk' into develop 2023-03-28 10:31:24 +02:00
Maarten L. Hekkelman
ed5aac358c libcifpp really requires zlib, not only private. 2023-03-28 10:27:58 +02:00
Maarten L. Hekkelman
5eb128251e Added category::find1<std::optional> 2023-03-27 10:36:47 +02:00
Maarten L. Hekkelman
cfa46ec954 Added model::has_atom_id 2023-03-23 14:32:21 +01:00
Martin Salinas
07cc60e264 Fixed installation commands
Installation commands in the readme cause an error when running last command `cmake --install .` because of the lack of sudo privileges.
The following commands don't require sudo to run successfully and install the library.
2023-03-23 11:42:10 +01:00
Maarten L. Hekkelman
90973dc547 version bump, update changelog 2023-03-22 12:39:16 +01:00
Maarten L. Hekkelman
12e3d71b00 fix construct_from_angle_axis 2023-03-21 19:49:21 +01:00
Maarten L. Hekkelman
9addc8f873 fix remove_atom
add create_water
2023-03-21 19:49:11 +01:00
Maarten L. Hekkelman
343465cef0 Added test for create_non_poly with initializers 2023-03-08 16:00:40 +01:00
Maarten L. Hekkelman
bec5159415 residue numbering in pdb, again... 2023-02-14 08:28:40 +01:00
Maarten L. Hekkelman
f8da8360e6 write twin info in pdb format 2023-02-14 08:27:13 +01:00
Maarten L. Hekkelman
fb2ad7b75d Fix in REMARK3 parser for more strict mmcif_pdbx dictionary 2023-02-10 16:24:53 +01:00
Maarten L. Hekkelman
24aa7a70e5 Fix writing pdbx_ens_id 2023-02-07 11:38:15 +01:00
Maarten L. Hekkelman
5ade3d6cdd Fixes in update data script 2023-02-06 16:10:41 +01:00
Maarten L. Hekkelman
0d8e548ffc pdb2cif and vv 2023-02-06 14:19:56 +01:00
Maarten L. Hekkelman
b09650812f oops 2023-02-06 06:48:57 +01:00
Maarten L. Hekkelman
acc9ad5c08 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2023-02-05 11:28:03 +01:00
Maarten L. Hekkelman
67b6c4bd27 update downloaded files only when needed and clean up afterwards 2023-02-05 11:27:54 +01:00
Maarten L. Hekkelman
7a1d3dbdfa Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2023-02-03 19:21:08 +01:00
Maarten L. Hekkelman
4bf10df0c5 include <array> 2023-02-03 19:20:40 +01:00
Maarten L. Hekkelman
d84faad109 Accept X as alias for atom symbol Nn 2023-02-02 11:23:40 +01:00
Maarten L. Hekkelman
e01ace7ea4 write auth_seq_num as well as pdb_seq_num for nonpolies 2023-02-02 09:49:21 +01:00
Maarten L. Hekkelman
e004e1591e fix cron script 2023-02-02 09:35:44 +01:00
Maarten L. Hekkelman
4613084e1b find_first, find_min, find_max, count added
PDB writing changed for auth_seq_num
version bump
2023-02-01 13:46:08 +01:00
Maarten L. Hekkelman
637b795a8f Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2023-02-01 09:00:51 +01:00
Maarten L. Hekkelman
4de981a3c0 better handling of missing residues in pdb2cif 2023-01-31 20:32:57 +01:00
Maarten L. Hekkelman
15db026e27 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2023-01-31 19:10:44 +01:00
Maarten L. Hekkelman
d88d520553 Use std::experimental::is_detected from libzeep, if needed
version bump
2023-01-25 17:06:47 +01:00
Maarten L. Hekkelman
46cd98ea1d Dependencies and share location in Win 2023-01-25 17:01:39 +01:00
Maarten L. Hekkelman
d10328d891 Use the zeep implementation of std::experimental::is_detected 2023-01-25 16:31:13 +01:00
Maarten L. Hekkelman
e418a17256 Version bump 2023-01-25 15:01:28 +01:00
Maarten L. Hekkelman
627d3b9df2 export by default, fixes for MSVC 2023-01-25 11:23:08 +01:00
Maarten L. Hekkelman
ba28ade414 clean up 2023-01-25 10:45:08 +01:00
Maarten L. Hekkelman
7c11130357 explicitly export what needs to be exported 2023-01-25 10:07:54 +01:00
Maarten L. Hekkelman
151915beea Merge branch 'exports' into develop 2023-01-25 09:44:04 +01:00
Maarten L. Hekkelman
4f9aacb338 Merge branch 'trunk' into develop 2023-01-25 09:41:59 +01:00
Maarten L. Hekkelman
1f8e491ddc generate exports header 2023-01-25 09:41:43 +01:00
Maarten L. Hekkelman
05cfa92182 revert version number 2023-01-17 14:16:36 +01:00
Maarten L. Hekkelman
e8031aeb49 Fix is_cis 2023-01-17 14:12:33 +01:00
Maarten L. Hekkelman
85885406aa Fix sugar test 2023-01-17 14:10:50 +01:00
Maarten L. Hekkelman
636f17d78d Update changelog 2023-01-17 14:10:38 +01:00
Maarten L. Hekkelman
29559a5339 Fix is_cis 2023-01-17 13:51:46 +01:00
Maarten L. Hekkelman
19f2fd75c9 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2023-01-04 17:14:33 +01:00
Maarten L. Hekkelman
8a60bae335 non-throwing remove_residue
add chem_comp for sugars
2023-01-04 17:05:48 +01:00
Maarten L. Hekkelman
fa5ff60550 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2023-01-03 21:31:09 +01:00
Maarten L. Hekkelman
f6e0568964 start reconstructing 2023-01-03 21:30:17 +01:00
Maarten L. Hekkelman
fa27a11fea some quaternion additions 2023-01-03 16:45:53 +01:00
Maarten L. Hekkelman
19706559cb Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2023-01-02 14:06:16 +01:00
Maarten L. Hekkelman
0a06a0a51d Fix cron script 2023-01-02 14:06:09 +01:00
Maarten L. Hekkelman
b045177734 less nervous progress bar 2023-01-02 14:05:47 +01:00
Maarten L. Hekkelman
7ee5fa8765 some quaternion tests added 2023-01-02 14:05:31 +01:00
Maarten L. Hekkelman
3e690048a6 sugar work 2023-01-02 14:04:56 +01:00
Maarten L. Hekkelman
7ec3bfea9f 3d work 2023-01-02 14:04:41 +01:00
Maarten L. Hekkelman
098f3fd496 FIx compound factory stacking 2022-12-21 21:14:41 +01:00
Maarten L. Hekkelman
5476eef049 some extensions for sugar tree building 2022-12-21 16:33:33 +01:00
Maarten L. Hekkelman
33c1eea9a1 Fix copy construction, do not copy the links 2022-12-19 17:40:43 +01:00
Maarten L. Hekkelman
d3432ed87c merging in fixes from develop branch 2022-12-15 09:05:51 +01:00
Maarten L. Hekkelman
f05363ea93 duh 2022-12-15 08:51:05 +01:00
Maarten L. Hekkelman
77389c20a4 Fix equals in condition_impl, columns might be unknown 2022-12-15 08:49:49 +01:00
Maarten L. Hekkelman
7c5f1ba85e Merge branch 'trunk' into develop 2022-12-14 10:49:56 +01:00
Maarten L. Hekkelman
e7c34cc15c Merge changes 2022-12-14 10:47:20 +01:00
Maarten L. Hekkelman
72fd03a6b2 formatting, fix in is_unquoted test 2022-12-13 16:48:09 +01:00
Maarten L. Hekkelman
61f464ae4d Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-12-13 16:08:16 +01:00
Maarten L. Hekkelman
19cdf66f10 fix operator or for conditions (equals or empty) 2022-12-13 16:05:23 +01:00
Maarten L. Hekkelman
0c036df6a8 fix operator or for conditions (equals or empty) 2022-12-13 16:02:55 +01:00
Maarten L. Hekkelman
4c1b9d83d1 Fix memory leak 2022-12-13 15:49:11 +01:00
Maarten L. Hekkelman
b976b4657b Optimised erase III 2022-12-13 15:41:57 +01:00
Maarten L. Hekkelman
eba04950d5 Optimised erase II 2022-12-13 11:31:56 +01:00
Maarten L. Hekkelman
0c70df27ec Optimised erase 2022-12-13 10:38:33 +01:00
Maarten L. Hekkelman
d83f34722b Accept and fix incorrect SEQRES 2022-12-13 09:50:40 +01:00
Maarten L. Hekkelman
652b6021d3 improved parser. is_non_quoted string 2022-12-07 16:58:54 +01:00
Maarten L. Hekkelman
7fe9c87b6e Some small fixes for windows 2022-12-07 15:39:26 +01:00
Maarten L. Hekkelman
9b2ae6d7fd Fix update script, order is important 2022-11-28 08:48:13 +01:00
Maarten L. Hekkelman
8fd5b9a34b Remove PATH_MAX to enable compilation on Debian/hurd 2022-11-20 13:30:44 +01:00
Maarten L. Hekkelman
dffbf52d04 removed erronous static-assert 2022-11-19 11:17:50 +01:00
Maarten L. Hekkelman
57ac5f0112 Fix for 32bit, version bump 2022-11-19 10:21:36 +01:00
Maarten L. Hekkelman
d5a71b0b24 Fix verbose checking
faster aniso_row retrieval
report missing header line for pdb
2022-11-16 17:07:28 +01:00
Maarten L. Hekkelman
d95b7be2e4 removed remaining lzma references 2022-11-15 12:11:41 +01:00
Maarten L. Hekkelman
2f0a23f56a debian dislikes tweaks 2022-11-14 09:28:19 +01:00
Maarten L. Hekkelman
13b218f643 revert export of CIFPP_SHARE_DIR variable 2022-11-14 08:53:52 +01:00
Maarten L. Hekkelman
92a836ecdc More location fixes 2022-11-13 11:43:35 +01:00
Maarten L. Hekkelman
c88a46f155 Fix installation issues, version bump 2022-11-13 11:16:51 +01:00
Maarten L. Hekkelman
8882a34984 installation dirs cleanup 2022-11-11 20:40:42 +01:00
Maarten L. Hekkelman
2d4a1731d9 Merge branch 'develop' into trunk 2022-11-11 09:04:30 +01:00
Maarten L. Hekkelman
be1e3073f1 Fixes based on upstream changes 2022-11-10 17:06:59 +01:00
Maarten L. Hekkelman
6fe5a04cc0 static zlib on windows 2022-11-09 19:52:54 +01:00
Maarten L. Hekkelman
426432885e back on linux 2022-11-09 18:57:32 +01:00
Maarten L. Hekkelman
9bc3381814 First steps to compile libcifpp 5 in Windows 2022-11-09 17:23:47 +01:00
Maarten L. Hekkelman
f3a492fd67 Fix regex include 2022-11-09 15:07:04 +01:00
Maarten L. Hekkelman
8cd2aa46b6 simply include boost-regex 2022-11-09 14:58:25 +01:00
Maarten L. Hekkelman
d638d634ba Remove gxrio and replace it with stripped down version 2022-11-09 14:23:51 +01:00
Maarten L. Hekkelman
35196789e0 updated makefile, use system regex if good enough 2022-11-09 10:33:44 +01:00
Maarten L. Hekkelman
e907ce6c29 exclude regex from tar 2022-11-09 09:19:39 +01:00
Maarten L. Hekkelman
b80bc20d17 Use system installed boost headers for regex, when available 2022-11-09 08:54:56 +01:00
Maarten L. Hekkelman
3a87eaa435 include utility
exclude from all for gxrio
2022-11-08 15:46:03 +01:00
Maarten L. Hekkelman
143eb57f04 Removed remaining mrc_add_resources call 2022-11-08 15:38:06 +01:00
Maarten L. Hekkelman
6cc550bf18 oops 2022-11-08 15:20:29 +01:00
Maarten L. Hekkelman
7f5336661b Error reporting 2022-11-08 14:53:15 +01:00
Maarten L. Hekkelman
e44539ef2c Checks before building indices. Better error reporting 2022-11-08 13:45:06 +01:00
Maarten L. Hekkelman
a2f5850173 avoid crash on empty branches 2022-11-08 09:29:15 +01:00
Maarten L. Hekkelman
283f4883f7 Fix makefile 2022-11-08 08:43:49 +01:00
Maarten L. Hekkelman
ce9842f671 Fix in PDB export
Better add_git_submodule
2022-11-08 08:28:02 +01:00
Maarten L. Hekkelman
b784433fd7 less verbose parser 2022-11-07 17:00:49 +01:00
Maarten L. Hekkelman
8c064e7c0a version bump 2022-11-07 12:37:01 +01:00
Maarten L. Hekkelman
c15a8bd127 export source tarballs 2022-11-07 12:32:14 +01:00
Maarten L. Hekkelman
64e40e7b31 Fix writing PDB CISPEP records
Better checking for open files
More verbose parser
2022-11-07 11:06:06 +01:00
Maarten L. Hekkelman
06d254e0de Revert "Use system version of boost regex, when available"
This reverts commit eaa342ca32.
2022-11-03 16:52:09 +01:00
Maarten L. Hekkelman
eaa342ca32 Use system version of boost regex, when available 2022-11-03 16:36:14 +01:00
Maarten L. Hekkelman
782f7c467b Support for cifv1.0 (empty category names) 2022-11-03 15:48:58 +01:00
Maarten L. Hekkelman
c45d02cb70 sigh 2022-11-03 13:09:04 +01:00
Maarten L. Hekkelman
5b4c131eea More verbose 2022-11-03 13:00:11 +01:00
Maarten L. Hekkelman
bbe71af99e Accept invalid CCD component files, for now 2022-11-03 12:18:26 +01:00
Maarten L. Hekkelman
49912d019f Better error reporting 2022-11-03 11:43:57 +01:00
Maarten L. Hekkelman
d4758e09d7 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-11-03 09:37:38 +01:00
Maarten L. Hekkelman
f0a913cc07 Fixes for deuterium...
Fixes for sugar branches
2022-11-03 09:37:31 +01:00
Maarten L. Hekkelman
8b0b8e3688 removed submodule 2022-11-01 16:52:42 +01:00
Maarten L. Hekkelman
de4fc8a015 Added necessary include 2022-11-01 16:50:54 +01:00
Maarten L. Hekkelman
bf1e56ec53 Another attempt 2022-11-01 16:29:59 +01:00
Maarten L. Hekkelman
040b4e4ff9 clean up 2022-11-01 14:54:26 +01:00
Maarten L. Hekkelman
4666ee3145 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-11-01 14:54:10 +01:00
Maarten L. Hekkelman
2958c56a92 change parser to use streambuf directly 2022-11-01 14:53:17 +01:00
Maarten L. Hekkelman
9cff8768ab Merge branch 'potential-performance-gain' into trunk 2022-11-01 13:41:06 +01:00
Maarten L. Hekkelman
cc671b8006 fixes in numeric conversions 2022-11-01 13:41:01 +01:00
Maarten L. Hekkelman
728abe6d0e less verbose pdb2cif 2022-11-01 12:11:04 +01:00
Maarten L. Hekkelman
7b8f3f2538 optimise retract buffer 2022-11-01 11:56:18 +01:00
Maarten L. Hekkelman
98db98f916 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-11-01 09:54:20 +01:00
Maarten L. Hekkelman
96a67b23ca Fix loading dictionaries 2022-11-01 09:53:36 +01:00
Maarten L. Hekkelman
2c3d7542e5 no submodule for gxrio 2022-11-01 08:45:52 +01:00
Maarten L. Hekkelman
f84d83b723 Add gxrio dependency again 2022-10-31 10:50:53 +01:00
Maarten L. Hekkelman
b1837ba029 for freebsd 2022-10-31 10:35:28 +01:00
Maarten L. Hekkelman
260438fa44 fix for meta project 2022-10-30 19:51:29 +01:00
Maarten L. Hekkelman
23d82beb04 Fix version string 2022-10-30 13:02:53 +01:00
Maarten L. Hekkelman
19db5d736b Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-10-30 11:14:16 +01:00
Maarten L. Hekkelman
6946c40657 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-10-30 11:12:30 +01:00
Maarten L. Hekkelman
bd3723ee20 Do not crash on empty rows (find result) 2022-10-30 11:12:19 +01:00
Maarten L. Hekkelman
1f078d4827 update for meta project 2022-10-30 10:28:01 +01:00
Maarten L. Hekkelman
3c62a38667 write out PDB files 2022-10-28 16:13:33 +02:00
Maarten L. Hekkelman
7ffda74e3d pdb2cif, avoid duplicate key violation on REMARK 350 2022-10-26 16:43:21 +02:00
Maarten L. Hekkelman
560f6debc6 updating dictionaries and default to downloading components.cif 2022-10-26 15:46:50 +02:00
Maarten L. Hekkelman
ea1ac33de8 Update README.md
No more boost requirement
2022-10-26 15:30:56 +02:00
Maarten L. Hekkelman
7ea30237ae Merge branch 'trunk' into new-develop 2022-10-26 15:23:35 +02:00
Maarten L. Hekkelman
bc668487e2 Fix installation of dictionary files 2022-10-26 14:29:40 +02:00
Maarten L. Hekkelman
1769f9864b Fixed TLS parser, and more 2022-10-26 13:23:23 +02:00
Maarten L. Hekkelman
75ffd97802 use gxrio internally for reading pdb files 2022-10-25 12:41:14 +02:00
Maarten L. Hekkelman
cfd5b7da0f Reintroduce more atom members 2022-10-25 11:51:35 +02:00
Maarten L. Hekkelman
26b7d1df26 Write chem_comp for polymer residues in pdb2cif 2022-10-21 16:01:12 +02:00
Maarten L. Hekkelman
0747929cd6 Allow missing CRYST1 record in PDB files... finally 2022-10-21 15:42:19 +02:00
Maarten L. Hekkelman
5bcfb102f4 change_residue fix 2022-10-20 19:48:26 +02:00
Maarten L. Hekkelman
908fb1ccea gxrio dependency 2022-10-20 13:36:18 +02:00
Maarten L. Hekkelman
af8389baa4 fix dependencies 2022-10-11 15:36:53 +02:00
Maarten L. Hekkelman
24ca1017cd Fix mm::polymer to have its own auth_asym_id 2022-10-11 12:17:42 +02:00
Maarten L. Hekkelman
85c21aeb01 row as vector 2022-10-11 09:05:03 +02:00
Maarten L. Hekkelman
2f249048d9 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2022-10-10 09:03:34 +02:00
Maarten L. Hekkelman
974cb40ab3 for gcc 9.4 2022-10-10 09:01:40 +02:00
Maarten L. Hekkelman
c01c16ea60 more accessors in cif::mm::atom 2022-10-05 17:04:39 +02:00
Maarten L. Hekkelman
bd157c249c Reintroduced PDB conversion code 2022-10-05 14:18:08 +02:00
Maarten L. Hekkelman
b0ac33c1b1 optimisation in query
fix memory problem in cif::item, making it safer
2022-10-05 13:21:57 +02:00
Maarten L. Hekkelman
82e73a9525 All tests pass 2022-10-05 10:13:53 +02:00
Maarten L. Hekkelman
adc316d671 Use index in lookup, when available
start branch code in model::structure (not done yet)
2022-10-04 13:57:38 +02:00
Maarten L. Hekkelman
6a0b6b99ac using index in category 2022-10-03 16:22:32 +02:00
Maarten L. Hekkelman
08dd9dd5b4 missing methods 2022-10-03 10:01:42 +02:00
Maarten L. Hekkelman
557a1c2d00 Reactivated several structure methods 2022-10-03 09:32:20 +02:00
Maarten L. Hekkelman
f77bbfedda updates 2022-10-03 08:41:50 +02:00
Maarten L. Hekkelman
3aa3fe19e2 Better validate diagnostic output 2022-09-30 10:25:55 +02:00
Maarten L. Hekkelman
35fcc0493e Moving cif::Structure back in as model 2022-09-28 17:11:04 +02:00
Maarten L. Hekkelman
9485bec2fa Moved symmetry back into libcifpp 2022-09-27 16:09:24 +02:00
Maarten L. Hekkelman
4b759e731c Moved code back into libcifpp, 3d stuff mainly 2022-09-27 14:34:31 +02:00
Maarten L. Hekkelman
7dd6a8a1aa fixes and updated submodules 2022-08-24 16:20:16 +02:00
Maarten L. Hekkelman
96725ae8b9 Clean up makefile 2022-08-19 13:55:35 +02:00
Maarten L. Hekkelman
b3a0ded9a8 add git submodule 2022-08-19 13:38:59 +02:00
Maarten L. Hekkelman
184c491803 changed find1 a bit more
reverted to returning empty results in case nothing is found
2022-08-18 17:20:41 +02:00
Maarten L. Hekkelman
f944b3ce00 changed find1 logic for only row_handles, now returns empty row_handle instead of throwing when not found
various condition fixes
2022-08-18 11:16:28 +02:00
Maarten L. Hekkelman
2557f41863 some documentation and cleanup of cif::item 2022-08-17 20:40:31 +02:00
Maarten L. Hekkelman
2b92cee3f7 some documentation and cleanup of cif::item 2022-08-17 20:14:48 +02:00
Maarten L. Hekkelman
8071768579 better row_handle::get 2022-08-17 19:39:14 +02:00
Maarten L. Hekkelman
71c8541b68 validate links
fix get_parents_condition
2022-08-17 17:08:38 +02:00
Maarten L. Hekkelman
3d66c77188 writing order 2022-08-17 16:02:57 +02:00
Maarten L. Hekkelman
8701512961 Remove cif++/Cif++.hpp
Implemented reorder by index
2022-08-17 15:29:14 +02:00
Maarten L. Hekkelman
b317c780ba Fixes for pre c++20
file constructor from raw data
2022-08-17 15:02:56 +02:00
Maarten L. Hekkelman
681aa3bf8b clean up 2022-08-17 11:24:29 +02:00
Maarten L. Hekkelman
a68e053471 Remove tests 2022-08-17 11:17:09 +02:00
Maarten L. Hekkelman
25a90e3b32 split out pdbx code
fix dangling memory reference
2022-08-17 11:14:06 +02:00
Maarten L. Hekkelman
2f62759dfe Before split-out of libpdbxpp 2022-08-17 08:50:56 +02:00
Maarten L. Hekkelman
cf9ec46ab8 Removed DSSP code, moved to dssp project 2022-08-16 16:57:25 +02:00
Maarten L. Hekkelman
ecbef51b10 - fix category::clear
- fix dssp TCO value
2022-08-16 16:34:15 +02:00
Maarten L. Hekkelman
dfff8c9587 condition work (children, parents) 2022-08-16 16:18:11 +02:00
Maarten L. Hekkelman
cc5d52bbf9 boost::regex usage 2022-08-16 16:17:56 +02:00
Maarten L. Hekkelman
a9e9f86c93 Renaming internal use of mmcif_pdbx dictionary. It was wrong to use the name mmcif_pdbx_v50 2022-08-16 16:17:30 +02:00
Maarten L. Hekkelman
a2c52713b2 Refactored dssp to be standalone 2022-08-16 11:50:43 +02:00
Maarten L. Hekkelman
545aca88d8 More info 2022-08-15 17:17:56 +02:00
Maarten L. Hekkelman
ac27248784 Started moving DSSP code 2022-08-15 15:37:07 +02:00
Maarten L. Hekkelman
5758bfbaea Required changes for FreeBSD 2022-08-15 10:11:11 +02:00
Maarten L. Hekkelman
8d3a079774 Fix regex for item_validator 2022-08-15 10:09:18 +02:00
Maarten L. Hekkelman
718c138510 Fix constructor for item 2022-08-15 09:49:23 +02:00
Maarten L. Hekkelman
29aac70e67 Changed boost::regex dependency to git submodule, in case it is needed 2022-08-15 08:59:47 +02:00
Maarten L. Hekkelman
700575adfe Merged 2022-08-15 07:42:51 +02:00
Maarten L. Hekkelman
9fe6e5df85 remove dependency on boost::program_options 2022-08-14 20:08:36 +02:00
Maarten L. Hekkelman
ce7434a463 Make boost::regex optional 2022-08-14 19:31:31 +02:00
Maarten L. Hekkelman
ad7d876d07 new dependency 2022-08-13 21:55:19 +02:00
Maarten L. Hekkelman
0dc19e86fa fixed example 2022-08-11 20:53:54 +02:00
Maarten L. Hekkelman
a12acaa5c7 moving from namespace cif::v2 to cif 2022-08-11 20:39:34 +02:00
Maarten L. Hekkelman
ff62efe720 More tests 2022-08-11 16:49:59 +02:00
Maarten L. Hekkelman
2407877184 Reordering all files 2022-08-11 15:44:59 +02:00
Maarten L. Hekkelman
5fde050738 debugging 2022-08-10 17:09:53 +02:00
Maarten L. Hekkelman
a855f88073 Getting rid of boost/algorithm/string 2022-08-10 16:46:48 +02:00
Maarten L. Hekkelman
cfa2acd61d backup 2022-08-08 08:10:22 +02:00
Maarten L. Hekkelman
d9db2fe2e7 insert 2022-08-07 20:48:59 +02:00
Maarten L. Hekkelman
15d62cd3b6 Fix mapping between pdbx_poly_seq_scheme and atom_site while reading an mmcif::Structure 2022-08-07 12:51:16 +02:00
Maarten L. Hekkelman
19a89aeb7e - start row_initializer 2022-08-07 11:49:09 +02:00
Maarten L. Hekkelman
677c61c32f moving insert_impl, index work 2022-08-06 16:08:34 +02:00
Maarten L. Hekkelman
4dd4f66397 backup 2022-08-04 16:29:55 +02:00
Maarten L. Hekkelman
04b7828abc validator work 2022-08-04 13:57:12 +02:00
Maarten L. Hekkelman
9c621ecab8 more condition work 2022-08-03 16:40:48 +02:00
Maarten L. Hekkelman
ab9c4d9416 compiling again 2022-08-03 16:09:27 +02:00
Maarten L. Hekkelman
e5eb62255a started with validator, running into the ground 2022-08-03 12:44:58 +02:00
Maarten L. Hekkelman
98ff79432b backup 2022-08-02 16:56:55 +02:00
Maarten L. Hekkelman
24fa80ba2a parser just started working again, a bit 2022-08-02 16:42:52 +02:00
Maarten L. Hekkelman
3999d792ef const iterator construction from non-const iterator 2022-08-02 11:59:29 +02:00
Maarten L. Hekkelman
4db3732749 move construction and operators = 2022-08-02 11:42:11 +02:00
Maarten L. Hekkelman
07131e8b40 copy constructor for category 2022-08-02 11:24:14 +02:00
Maarten L. Hekkelman
39b91e74c9 - new item storage
- formatting of numbers using to_chars
2022-08-02 10:35:14 +02:00
Maarten L. Hekkelman
d4bb7ec3bc Fixed includes based on feedback from Patricia Herrera. 2022-08-01 19:00:42 +02:00
Maarten L. Hekkelman
6175b7e359 backup 2022-08-01 16:58:52 +02:00
Maarten L. Hekkelman
10442d506a structured binding, start 2022-08-01 15:08:42 +02:00
Maarten L. Hekkelman
573a695c3d small steps 2022-08-01 12:57:00 +02:00
Maarten L. Hekkelman
a76bef0d01 backup 2022-08-01 08:33:49 +02:00
Maarten L. Hekkelman
e20111b566 small optimization 2022-07-31 16:19:24 +02:00
Maarten L. Hekkelman
4a1d9c8f75 New storage layout for item_value 2022-07-31 15:53:18 +02:00
Maarten L. Hekkelman
26c86282e3 before refactoring item_value based on statistics 2022-07-27 16:46:33 +02:00
Maarten L. Hekkelman
0eaeb1650d split out item 2022-07-27 12:41:25 +02:00
Maarten L. Hekkelman
f4a6533f6b eerste fröbelwerk 2022-07-26 17:00:14 +02:00
Maarten L. Hekkelman
df1b6a13e1 updated git ignore 2022-07-25 16:21:55 +02:00
Maarten L. Hekkelman
e8f24f617c - Fixed copy/paste error in Category interface
- message in progress finished
2022-07-25 10:40:03 +02:00
Maarten L. Hekkelman
9454fdc217 Fix for parsing empty quoted strings. 2022-07-21 13:03:23 +02:00
Maarten L. Hekkelman
22543d8fe5 Update changelog 2022-07-12 16:55:55 +02:00
Maarten L. Hekkelman
60d1dc82e6 Improved remark 3 parser for large molecules 2022-07-12 16:54:38 +02:00
Maarten L. Hekkelman
87486f87ef revert loading compressed dictionaries 2022-07-12 14:20:01 +02:00
Maarten L. Hekkelman
80e7da0f13 Locating dictionaries updated 2022-07-12 14:13:59 +02:00
Maarten L. Hekkelman
3745beae66 Fix search order for resources 2022-07-12 13:55:19 +02:00
Maarten L. Hekkelman
3965840bfa New way of locating resources 2022-07-12 13:49:02 +02:00
Maarten L. Hekkelman
a88c6f3d32 Fix for older clang (on MacOS?). 2022-07-05 11:07:12 +02:00
Maarten L. Hekkelman
ed6c6f0026 Move assignment of Structure is not possible due to reference to datablock 2022-07-05 09:46:59 +02:00
Maarten L. Hekkelman
bdda9d72b5 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-07-05 09:36:36 +02:00
Maarten L. Hekkelman
fd080e778e Changes for MacOS/MSVC 2022-07-05 09:36:26 +02:00
Maarten L. Hekkelman
9f72df2ecd Update LICENSE
Format changed, now recognized
2022-07-01 08:21:22 +02:00
ojcharles
617db012f0 Update Cif++.cpp (#15)
add mutex library for std::unique_lock, not included in std lib
2022-07-01 08:07:24 +02:00
Maarten L. Hekkelman
9d15541237 include cstring for gnu c++ 12 2022-06-22 08:53:05 +02:00
Maarten L. Hekkelman
35c99564c6 Fix importing sugars from PDB files 2022-06-01 15:17:54 +02:00
Maarten L. Hekkelman
1d8fe334d6 Fix writing sugar branches 2022-06-01 13:22:34 +02:00
Maarten L. Hekkelman
d86bb314ac Better handling of missing residues/mismatch seqres 2022-06-01 11:27:41 +02:00
Maarten L. Hekkelman
0ef8eb59f8 Fix scattering factors error 2022-05-18 13:04:42 +02:00
Maarten L. Hekkelman
b5fe4a9a87 locating resources that might be protected 2022-05-18 11:53:13 +02:00
Maarten L. Hekkelman
11fea31b98 more loading resources 2022-05-18 11:37:26 +02:00
Maarten L. Hekkelman
f629275ed5 locating resources that might be protected 2022-05-18 11:25:47 +02:00
Maarten L. Hekkelman
a5f6166469 locating resources that might be protected 2022-05-18 11:14:14 +02:00
Maarten L. Hekkelman
501050e591 Add move constructor to mmcif::Structure 2022-05-10 17:11:04 +02:00
Maarten L. Hekkelman
e1b240b2b2 sugar work 2022-05-04 16:48:28 +02:00
Maarten L. Hekkelman
3d79278ed7 Merge branch 'trunk' into develop 2022-05-04 09:51:15 +02:00
Maarten L. Hekkelman
5e0b197a43 mmcif::Atom::compound() revision 2022-05-04 09:50:24 +02:00
Maarten L. Hekkelman
9c4170d9e2 - Added more const members
- change PDB writing interface
2022-05-03 11:52:15 +02:00
Maarten L. Hekkelman
af721eb196 Make having no compound less fatal 2022-05-02 14:40:22 +02:00
Maarten L. Hekkelman
788e315f5e Fix entity_branch_link entry 2022-05-02 12:24:35 +02:00
Maarten L. Hekkelman
4a82a8d5a8 Fixed all tests 2022-05-02 11:09:36 +02:00
Maarten L. Hekkelman
11019a26f8 Merge branch 'sugar-tests' into develop 2022-05-02 10:03:44 +02:00
Maarten L. Hekkelman
6f8909dce9 Fixed tests 2022-05-02 10:01:10 +02:00
Maarten L. Hekkelman
5525103aaf backup 2022-05-02 09:26:59 +02:00
Maarten L. Hekkelman
291ef737b1 - Fix removing atoms
- Optimize isUnquotedString
2022-05-01 14:09:06 +02:00
Maarten L. Hekkelman
af125bdd57 backup 2022-04-26 16:04:13 +02:00
Maarten L. Hekkelman
79089bbb8c removed incorrect assert 2022-04-20 16:32:47 +02:00
Maarten L. Hekkelman
1f08498d00 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2022-04-20 11:18:50 +02:00
Maarten L. Hekkelman
49ba714a03 - structure id stuff
- added cif::null
- more tests
2022-04-20 11:17:11 +02:00
Maarten L. Hekkelman
85fd9296b2 Add test for loading 2022-04-19 17:03:09 +02:00
Maarten L. Hekkelman
1cda14867f More interface changes in mmcif::Structure 2022-04-19 13:40:38 +02:00
Maarten L. Hekkelman
2d2b26f7dc Fix regression in bondmap calculation 2022-04-19 09:10:54 +02:00
Maarten L. Hekkelman
93b33af44a oops, wrong field name 2022-04-13 10:57:50 +02:00
Maarten L. Hekkelman
eb80490bcd getPolymerByAsymID 2022-04-13 09:47:18 +02:00
Maarten L. Hekkelman
ba2b06f5af reduce complexity 2022-04-13 09:39:43 +02:00
Maarten L. Hekkelman
fecc762db1 - better link validation
- better output (quote reserved strings)
2022-04-12 17:00:47 +02:00
Maarten L. Hekkelman
1e406253ab loading unknown atoms 2022-04-12 12:41:25 +02:00
Maarten L. Hekkelman
6e3b85f43d getResidue, again 2022-04-11 16:36:40 +02:00
Maarten L. Hekkelman
58f1b626e2 change getResidue 2022-04-06 12:49:03 +02:00
Maarten L. Hekkelman
c104a08e16 fixed Atom::charge to pick more sensible default 2022-03-30 11:14:11 +02:00
Maarten L. Hekkelman
dd0f6ca1e6 accept more invalid characters, sigh 2022-03-29 11:45:33 +02:00
Maarten L. Hekkelman
f02ea91b51 label and auth seq id, some improvements 2022-03-28 09:50:37 +02:00
Maarten L. Hekkelman
6768a501a3 access to atoms 2022-03-21 09:58:16 +01:00
Maarten L. Hekkelman
879e15c759 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2022-03-14 16:28:55 +01:00
Maarten L. Hekkelman
89285b4abc construct quaternion from angle/axis 2022-03-14 16:28:41 +01:00
Maarten L. Hekkelman
c584714f91 ion radii 2022-03-09 15:40:32 +01:00
Maarten L. Hekkelman
f5016403b7 refactored mmcif::File 2022-03-02 15:26:29 +01:00
Maarten L. Hekkelman
c8f66ae6bb start remove residue 2022-02-23 08:24:26 +01:00
Maarten L. Hekkelman
858c967e71 Locate mmcif dictionary in CCP4 space 2022-02-15 08:08:01 +01:00
Maarten L. Hekkelman
f9ca5de5bf Add missing include for gcc 8.2 2022-02-09 16:04:24 +01:00
Maarten L. Hekkelman
252c3476a1 Slightly better handling of hetero residues 2022-02-09 14:53:05 +01:00
Maarten L. Hekkelman
19210df6db Fix parsing mmCIF files with an unquoted string ?? 2022-02-08 11:22:10 +01:00
Maarten L. Hekkelman
15c5730749 Remove redundant FindFilesystem include 2022-02-03 10:35:34 +01:00
Maarten L. Hekkelman
3764adb7ef update changelog 2022-02-02 13:44:32 +01:00
Maarten L. Hekkelman
9160adb1cf Merge branch 'develop' into trunk 2022-02-02 13:40:47 +01:00
Maarten L. Hekkelman
3ebf4338ab Do not crash on uninitialized Atoms 2022-02-02 12:41:32 +01:00
Maarten L. Hekkelman
2eb4b7b39b Fix building in Windows 2022-01-25 15:27:15 +01:00
Maarten L. Hekkelman
c241e49b48 fix makefile 2022-01-25 15:13:15 +01:00
Maarten L. Hekkelman
238c881132 Update dependencies, version string 2022-01-25 13:27:58 +01:00
Maarten L. Hekkelman
49dc733536 Create non poly from described atoms 2022-01-25 13:27:19 +01:00
Maarten L. Hekkelman
755bd78f60 Fix declaration for mmcif::Nudge 2022-01-19 13:21:29 +01:00
Maarten L. Hekkelman
77f80cd51f Fix atomic test (apparently, libatomic is only needed for std::atomic<long long>) 2022-01-19 08:25:25 +01:00
Maarten L. Hekkelman
3df6000635 cleaning up code 2022-01-18 16:06:28 +01:00
Maarten L. Hekkelman
5efee2b40d comment adjusted 2022-01-18 13:28:42 +01:00
Maarten L. Hekkelman
f3c2e59184 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2022-01-18 11:26:23 +01:00
Maarten L. Hekkelman
24ab660e6e Change logic for testing std::filesystem and libatomic 2022-01-18 11:24:31 +01:00
Maarten L. Hekkelman
6c0a418068 Revert "Check atomic"
This reverts commit 07a180991e.
2022-01-18 11:12:56 +01:00
Maarten L. Hekkelman
07a180991e Check atomic 2022-01-17 11:40:46 +01:00
Maarten L. Hekkelman
4732004b67 Merge branch 'develop' into trunk 2022-01-12 16:41:18 +01:00
Maarten L. Hekkelman
faa9cd0431 Added another rotate/translate method to mmcif::Structure 2022-01-12 14:06:32 +01:00
Maarten L. Hekkelman
e0c3c2394d Fix Structure::createNonPoly to add atoms... 2022-01-11 11:21:56 +01:00
Maarten L. Hekkelman
2dec584f54 clean up code 2022-01-05 15:54:23 +01:00
Maarten L. Hekkelman
5ab2ccae40 avoid calling cif::Category::size() too often 2022-01-05 15:45:27 +01:00
Maarten L. Hekkelman
1017d08626 skip updating links when changing atom location 2022-01-05 15:24:22 +01:00
Maarten L. Hekkelman
32b1bbd943 combine translate and rotate in a single call 2022-01-05 14:27:16 +01:00
Maarten L. Hekkelman
1abf31ffa5 no-validate option in cif::Row::assign 2022-01-05 14:04:28 +01:00
Maarten L. Hekkelman
aec60829d2 more quiet code 2022-01-05 11:29:10 +01:00
Maarten L. Hekkelman
888c3c38c2 Add a 'quiet' mode (cif::VERBOSE < 0) 2022-01-05 10:36:39 +01:00
Maarten L. Hekkelman
e2c4648037 clean up 2022-01-05 10:24:37 +01:00
Maarten L. Hekkelman
f7b98c0530 refactored AtomImpl 2022-01-05 10:23:15 +01:00
Maarten L. Hekkelman
d4bd3faa16 Merge branch 'profiling-structure' into trunk 2022-01-04 10:29:23 +01:00
Maarten L. Hekkelman
c4f3b1cd7b delay loading atoms in residues 2022-01-04 09:48:41 +01:00
Maarten L. Hekkelman
74add69a83 Finish removing bzip2 support 2022-01-03 15:51:01 +01:00
Maarten L. Hekkelman
a490b19d24 version bump 2022-01-03 15:45:48 +01:00
Maarten L. Hekkelman
44cfa2c1a2 further optimisation 2022-01-03 15:19:50 +01:00
Maarten L. Hekkelman
6dd9522b3f optimized mmcif::Atom 2022-01-03 14:32:42 +01:00
Maarten L. Hekkelman
5e352cb8e4 Removed erronous dependency in config.cmake.in 2021-12-20 13:33:06 +01:00
Maarten L. Hekkelman
2fad7315b8 make DSSP::iterator bidirectional 2021-12-15 15:12:23 +01:00
Maarten L. Hekkelman
520759dfe8 update changelog 2021-12-14 15:17:50 +01:00
Maarten L. Hekkelman
577b44ae11 Fix in processing CCP4 monomers, proline is a peptide 2021-12-14 15:16:23 +01:00
Maarten L. Hekkelman
66f742d6c0 code to facilitate DSSP 2021-12-14 15:14:45 +01:00
Maarten L. Hekkelman
7ba9f688c7 Merge branch 'develop' into trunk 2021-12-10 10:39:34 +01:00
Maarten L. Hekkelman
883f0307a2 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-12-10 10:38:43 +01:00
Maarten L. Hekkelman
c9719f873f Merge branch 'develop' into trunk 2021-12-10 10:37:04 +01:00
Maarten L. Hekkelman
123d25f853 formatting of floating points in cif files
better verbose info for differences
2021-12-10 10:35:02 +01:00
Maarten L. Hekkelman
56da42db84 formatting of floating points in cif files
better verbose info for differences
2021-12-10 10:32:21 +01:00
Maarten L. Hekkelman
7f820449ca formatting 2021-12-08 09:06:09 +01:00
Maarten L. Hekkelman
ecb2cf5f11 Fix for compiling with gcc 11.2 2021-12-08 09:03:21 +01:00
Maarten L. Hekkelman
7f27da9b3b Fixed rename-compound-test to work when not using resources 2021-11-25 16:27:45 +01:00
Maarten L. Hekkelman
01eb499c69 attempt to fix running tests in different directory 2021-11-25 16:27:42 +01:00
Maarten L. Hekkelman
1ff6f70682 changelog update 2021-11-25 16:25:53 +01:00
Maarten L. Hekkelman
ddde996e10 strip newlines from compound names read from CCD 2021-11-25 16:24:55 +01:00
Maarten L. Hekkelman
1c9212c7e0 Fixed rename-compound-test to work when not using resources 2021-11-25 16:09:15 +01:00
Maarten L. Hekkelman
a568143991 unneeded loading of resource removed from test 2021-11-24 13:52:23 +01:00
Maarten L. Hekkelman
2b6f1bd9ee attempt to fix running tests in different directory 2021-11-23 10:51:22 +01:00
Maarten L. Hekkelman
2527aa5ea6 correct version in cmakefile, fix structure-test to no longer load resource 2021-11-24 13:57:36 +01:00
Maarten L. Hekkelman
4c28091ecd Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-11-18 09:56:11 +01:00
Maarten L. Hekkelman
d49725423e re-added group to Compound. Seems to be important 2021-11-18 09:56:01 +01:00
Maarten L. Hekkelman
fcb4dc61b5 fixed writing PDB files (= remove to_upper for all header lines...) 2021-11-17 08:01:24 +01:00
Maarten L. Hekkelman
b7330c074f Fixed Structure::changeResidue to actually change the residue itself as well. 2021-11-16 08:38:14 +01:00
Maarten L. Hekkelman
e8f4123030 strip newlines from names in Compound 2021-11-15 12:37:45 +01:00
Maarten L. Hekkelman
975057c4c4 Fixed bug in structure::changeresidue when removing atoms 2021-11-15 11:32:50 +01:00
Maarten L. Hekkelman
a0e01668d1 take largest in value for best quaternion 2021-11-15 10:22:56 +01:00
Maarten L. Hekkelman
2c77491416 cleaner implementation of matrices 2021-11-12 10:36:38 +01:00
Maarten L. Hekkelman
be19e4a9cb Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-11-12 09:20:01 +01:00
Maarten L. Hekkelman
61ce91a9d7 using expression templates for matrices 2021-11-12 09:17:21 +01:00
Maarten L. Hekkelman
18f1d07e85 clean up code 2021-11-12 08:28:10 +01:00
Maarten L. Hekkelman
b596976194 correct implementation of alignpoints 2021-11-12 08:15:33 +01:00
Maarten L. Hekkelman
1f6b86d516 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-11-11 21:41:05 +01:00
Maarten L. Hekkelman
31499b977d Fix the 3d alignment code 2021-11-11 21:39:52 +01:00
Maarten L. Hekkelman
f83850e380 git and revisions 2021-11-11 09:42:22 +01:00
Maarten L. Hekkelman
1a4ccd86fe changelog update 2021-11-03 09:28:14 +01:00
Maarten L. Hekkelman
5c3c6fec09 strip newlines from compound names read from CCD 2021-10-29 10:48:09 +02:00
Maarten L. Hekkelman
f97e742daa removed a too strict test in loading structures 2021-10-21 08:42:42 +02:00
Maarten L. Hekkelman
7f39d401e2 Optimised assigning data 2021-10-20 14:55:30 +02:00
Maarten L. Hekkelman
af412c284d clean up 2021-10-20 12:27:27 +02:00
Maarten L. Hekkelman
874cd3bae5 Fix symmetry lookup 2021-10-20 11:36:31 +02:00
Maarten L. Hekkelman
ea28ebdd13 optimized caching of items in mmcif::Atom 2021-10-19 16:10:59 +02:00
Maarten L. Hekkelman
3ba468933f Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-10-18 17:17:20 +02:00
Maarten L. Hekkelman
45f33e4bea Merge branch 'trunk' into develop 2021-10-18 11:12:56 +02:00
Maarten L. Hekkelman
021487ed16 Fix reading mmCIF file where model is defined but model 1 is missing. Version bump. 2021-10-18 11:11:03 +02:00
Maarten L. Hekkelman
cb3443ffb1 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-10-18 09:42:30 +02:00
Maarten L. Hekkelman
6b2c9dc3e3 order of compound info, load CCD first 2021-10-18 09:40:33 +02:00
Maarten L. Hekkelman
7513cc1947 Merge branch 'trunk' into develop 2021-10-14 09:30:19 +02:00
Maarten L. Hekkelman
ab2dd4b75f Merge branch 'trunk' into develop 2021-10-13 14:03:29 +02:00
Maarten L. Hekkelman
8bbcba76cf Performance increase by using std::string_view
Updated Structure::changeResidue
2021-10-12 16:04:27 +02:00
164 changed files with 405569 additions and 186860 deletions

22
.clang-format Normal file
View File

@@ -0,0 +1,22 @@
BasedOnStyle: LLVM
UseTab: AlignWithSpaces
IndentWidth: 4
TabWidth: 4
BreakBeforeBraces: Allman
ColumnLimit: 0
NamespaceIndentation: Inner
FixNamespaceComments: true
AccessModifierOffset: -2
AllowShortCaseLabelsOnASingleLine: true
IndentCaseLabels: true
BreakConstructorInitializers: BeforeComma
BraceWrapping:
BeforeLambdaBody: false
AlignAfterOpenBracket: DontAlign
Cpp11BracedListStyle: false
IncludeBlocks: Regroup
LambdaBodyIndentation: Signature
AllowShortLambdasOnASingleLine: Inline
EmptyLineBeforeAccessModifier: LogicalBlock
IndentPPDirectives: AfterHash
PPIndentWidth: 1

18
.clang-tidy Normal file
View File

@@ -0,0 +1,18 @@
Checks: '-*,
bugprone-*,
-bugprone-easily-swappable-parameters,
cert-*,
modernize*,
-modernize-use-trailing-return-type,
-modernize-avoid-c-arrays,
-modernize-use-designated-initializers,
performance
'
# HeaderFilterRegex: '.*'
ExcludeHeaderFilterRegex: 'Eigen|Eigen/Eigenvalues|eigen3/Eigen/Eigenvalues|sqlite3.h'
CheckOptions:
- key: bugprone-narrowing-conversions.WarnOnIntegerNarrowingConversion
value: false
- key: bugprone-narrowing-conversions.WarnOnIntegerToFloatingPointNarrowingConversion
value: false

View File

@@ -0,0 +1,65 @@
# This starter workflow is for a CMake project running on multiple platforms. There is a different starter workflow if you just want a single platform.
# See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-single-platform.yml
name: publish docs
on:
push:
branches: [ "trunk" ]
permissions:
contents: read
pages: write
id-token: write
concurrency:
group: "pages"
cancel-in-progress: false
jobs:
docs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set reusable strings
# Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
id: strings
shell: bash
run: |
echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT"
- name: Install dependencies Ubuntu
run: sudo apt-get update && sudo apt-get install cmake doxygen
- uses: actions/setup-python@v4
with:
python-version: '3.9'
cache: 'pip' # caching pip dependencies
- run: pip install -r docs/requirements.txt
- name: Configure CMake
run: cmake -S . -B ${{ steps.strings.outputs.build-output-dir }} -DBUILD_DOCUMENTATION=ON -DBUILD_TESTING=OFF
- name: Run Sphinx
run: |
cmake --build ${{ steps.strings.outputs.build-output-dir }} --target Sphinx-libcifpp
ls -l ${{ steps.strings.outputs.build-output-dir }}
ls -l ${{ steps.strings.outputs.build-output-dir }}/docs/sphinx
- name: Upload artifact
uses: actions/upload-pages-artifact@v3
with:
path: ${{ steps.strings.outputs.build-output-dir }}/docs/sphinx
deploy:
environment:
name: github-pages
url: ${{ steps.deployment.outputs.page_url }}
runs-on: ubuntu-latest
needs: docs
steps:
- name: Deploy to GitHub Pages
id: deployment
uses: actions/deploy-pages@v4

View File

@@ -0,0 +1,64 @@
name: multi platform test
on:
push:
branches: [ "trunk", "develop" ]
pull_request:
branches: [ "trunk" ]
jobs:
build:
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, windows-latest, macos-latest]
include:
- os: windows-latest
cpp_compiler: cl
- os: ubuntu-latest
cpp_compiler: g++
- os: macos-latest
cpp_compiler: clang++
steps:
- uses: actions/checkout@v3
- name: Set reusable strings
id: strings
shell: bash
run: echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT"
- name: Install dependencies Ubuntu
if: matrix.os == 'ubuntu-latest'
run: sudo apt-get update && sudo apt-get install mrc catch2 libsqlite3-dev
- name: Install dependencies Window
if: matrix.os == 'windows-latest'
run: ./tools/depends.cmd
shell: cmd
- name: Install Catch2 macOS
if: matrix.os == 'macos-latest'
run: >
brew install catch2
- name: Configure CMake
run: >
cmake -B ${{ steps.strings.outputs.build-output-dir }}
-DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }}
-DCMAKE_BUILD_TYPE=Release
-S ${{ github.workspace }}
- name: Build
run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config Release
- name: Test
working-directory: ${{ steps.strings.outputs.build-output-dir }}
run: ctest --build-config Release --output-on-failure
- name: Install
if: matrix.os != 'windows-latest'
run: sudo cmake --install ${{ steps.strings.outputs.build-output-dir }} --config Release

16
.gitignore vendored
View File

@@ -1,14 +1,18 @@
build/
.vscode/
.vs/
.pc/
tools/symop-map-generator
test/unit-test
test/pdb2cif-test
test/rename-compound-test
tools/update-libcifpp-data
data/components.cif*
rsrc/components.cif*
CMakeSettings.json
msvc/
src/revision.hpp
test/test-create_sugar_?.cif
Testing/
include/cif++/exports.hpp
docs/api
docs/conf.py
build_ci/
data/components.cif
perf.data*
.cache/

22
.readthedocs.yaml Normal file
View File

@@ -0,0 +1,22 @@
version: 2
build:
os: ubuntu-22.04
tools:
python: "3.11"
apt_packages:
- doxygen
- cmake
jobs:
pre_build:
- cmake -S . -B build -DBUILD_DOCUMENTATION=ON
- cmake --build build --target Doxygen
# Build from the docs/ directory with Sphinx
sphinx:
configuration: docs/conf.py
# Explicitly set the version of Python and its requirements
python:
install:
- requirements: docs/requirements.txt

View File

@@ -1,33 +0,0 @@
language: cpp
os:
- linux
- osx
dist: focal
osx_image:
- xcode12
compiler:
- gcc
- clang
addons:
apt:
packages:
- libboost-all-dev
before_install:
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then brew install make; fi
script:
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then ./configure --disable-shared --disable-revision --disable-download-ccd ; else ./configure --disable-revision --disable-download-ccd ; fi
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then gmake ; else make ; fi
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then gmake test ; else make test ; fi
- if [ "$TRAVIS_OS_NAME" = "osx" ]; then sudo gmake install ; else sudo make install; fi
# jobs:
# allow_failures:
# - os: osx

View File

@@ -6,483 +6,580 @@
# modification, are permitted provided that the following conditions are met:
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer
# list of conditions and the following disclaimer
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required(VERSION 3.16)
cmake_minimum_required(VERSION 3.23)
cmake_policy(SET CMP0135 NEW)
if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
endif()
# set the project name
project(cifpp VERSION 2.0.2 LANGUAGES CXX)
project(
libcifpp
VERSION 10.0.0
LANGUAGES CXX C)
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
enable_testing()
include(GNUInstallDirs)
include(CheckFunctionExists)
include(CheckIncludeFiles)
include(CheckLibraryExists)
include(FindAtomic)
include(CMakePackageConfigHelpers)
include(Dart)
include(FindFilesystem)
include(GenerateExportHeader)
include(CTest)
include(ExternalProject)
include(FetchContent)
include(VersionString)
set(CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD 17)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
find_package(Filesystem REQUIRED)
# When building with ninja-multiconfig, build both debug and release by default
if(CMAKE_GENERATOR STREQUAL "Ninja Multi-Config")
set(CMAKE_CROSS_CONFIGS "Debug;Release")
set(CMAKE_DEFAULT_CONFIGS "Debug;Release")
endif()
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
# https://stackoverflow.com/questions/63902528/program-crashes-when-filesystempath-is-destroyed
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
set(CMAKE_CXX_FLAGS
"${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers"
)
elseif(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
endif()
# Building shared libraries?
option(BUILD_SHARED_LIBS "Build a shared library instead of a static one" OFF)
# We do not want to write an export file for all our symbols...
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
# Build documentation?
set(BUILD_DOCUMENTATION OFF CACHE BOOL "Build the documentation")
# Optionally build a version to be installed inside CCP4
option(BUILD_FOR_CCP4 "Build a version to be installed in CCP4" OFF)
if(BUILD_FOR_CCP4)
if("$ENV{CCP4}" STREQUAL "" OR NOT EXISTS $ENV{CCP4})
message(FATAL_ERROR "A CCP4 built was requested but CCP4 was not sourced")
else()
list(APPEND CMAKE_MODULE_PATH "$ENV{CCP4}")
list(APPEND CMAKE_PREFIX_PATH "$ENV{CCP4}")
set(CMAKE_INSTALL_PREFIX "$ENV{CCP4}")
set(BUILD_FOR_CCP4 OFF CACHE BOOL "Build a version to be installed in CCP4")
# This is the only option:
if(WIN32)
set(BUILD_SHARED_LIBS ON)
endif()
endif("$ENV{CCP4}" STREQUAL "" OR NOT EXISTS $ENV{CCP4})
# Create the cql/sqlite interface
set(BUILD_SQLITE_INTERFACE ON CACHE BOOL "Build the sqlite interface")
# Building shared libraries?
if(NOT (BUILD_FOR_CCP4 AND WIN32))
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build a shared library instead of a static one")
endif()
# Check if CCP4 is available
if(EXISTS "$ENV{CCP4}")
set(CCP4 $ENV{CCP4})
set(CLIBD ${CCP4}/lib/data)
endif()
if(CCP4 AND NOT CLIBD)
set(CLIBD ${CCP4}/lib/data)
endif()
if(PROJECT_IS_TOP_LEVEL AND NOT BUILD_FOR_CCP4)
# Lots of code depend on the availability of the components.cif file
set(CIFPP_DOWNLOAD_CCD ON CACHE BOOL "Download the CCD file components.cif during installation")
# When CCP4 is sourced in the environment, we can recreate the symmetry operations table
if(EXISTS "${CCP4}")
if(CIFPP_RECREATE_SYMOP_DATA AND NOT EXISTS "${CLIBD}/syminfo.lib")
message(WARNING "Symop data table recreation requested, but file syminfo.lib was not found in ${CLIBD}")
set(CIFPP_RECREATE_SYMOP_DATA OFF)
else()
option(CIFPP_RECREATE_SYMOP_DATA "Recreate SymOp data table in case it is out of date" ON)
endif()
# An optional cron script can be installed to keep the data files up-to-date
if(UNIX AND NOT APPLE)
set(CIFPP_INSTALL_UPDATE_SCRIPT ON CACHE BOOL "Install the script to update CCD and dictionary files")
endif()
else()
set(CIFPP_RECREATE_SYMOP_DATA OFF)
message("Not trying to recreate SymOpTable_data.hpp since CCP4 is not defined")
unset(CIFPP_DOWNLOAD_CCD)
unset(CIFPP_INSTALL_UPDATE_SCRIPT)
endif()
# set(CMAKE_DEBUG_POSTFIX d)
# When CCP4 is sourced in the environment, we can recreate the symmetry
# operations table
if(EXISTS "$ENV{CCP4}/lib/data/syminfo.lib")
set(CIFPP_RECREATE_SYMOP_DATA ON CACHE BOOL "Recreate SymOp data table in case it is out of date")
endif()
# CCP4 build
if(BUILD_FOR_CCP4)
if("$ENV{CCP4}" STREQUAL "" OR NOT EXISTS $ENV{CCP4})
message(FATAL_ERROR "cifpp: A CCP4 built was requested but CCP4 was not sourced")
else()
list(PREPEND CMAKE_MODULE_PATH "$ENV{CCP4}")
list(PREPEND CMAKE_PREFIX_PATH "$ENV{CCP4}")
set(CMAKE_INSTALL_PREFIX "$ENV{CCP4}")
if(WIN32)
set(BUILD_SHARED_LIBS ON)
endif()
endif()
endif()
# Now include the GNUInstallDirs module
include(GNUInstallDirs)
if(WIN32)
if(${CMAKE_SYSTEM_VERSION} GREATER_EQUAL 10) # Windows 10
add_definitions(-D _WIN32_WINNT=0x0A00)
elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.3) # Windows 8.1
add_definitions(-D _WIN32_WINNT=0x0603)
elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.2) # Windows 8
add_definitions(-D _WIN32_WINNT=0x0602)
elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.1) # Windows 7
add_definitions(-D _WIN32_WINNT=0x0601)
elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.0) # Windows Vista
add_definitions(-D _WIN32_WINNT=0x0600)
else() # Windows XP (5.1)
add_definitions(-D _WIN32_WINNT=0x0501)
endif()
# We do not want to write an export file for all our symbols...
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif()
if(MSVC)
# make msvc standards compliant...
add_compile_options(/permissive-)
add_compile_options(/permissive- /bigobj)
add_link_options(/NODEFAULTLIB:library)
macro(get_WIN32_WINNT version)
if (WIN32 AND CMAKE_SYSTEM_VERSION)
set(ver ${CMAKE_SYSTEM_VERSION})
string(REPLACE "." "" ver ${ver})
string(REGEX REPLACE "([0-9])" "0\\1" ver ${ver})
set(${version} "0x${ver}")
endif()
endmacro()
get_WIN32_WINNT(ver)
add_definitions(-D_WIN32_WINNT=${ver})
# On Windows, do not install in the system location
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT AND NOT BUILD_FOR_CCP4)
message(STATUS "The library and auxiliary files will be installed in $ENV{LOCALAPPDATA}/${PROJECT_NAME}")
set(CMAKE_INSTALL_PREFIX "$ENV{LOCALAPPDATA}/${PROJECT_NAME}" CACHE PATH "..." FORCE)
endif()
# Find out the processor type for the target
if(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
set(COFF_TYPE "x64")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "i386")
set(COFF_TYPE "x86")
elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ARM64")
set(COFF_TYPE "arm64")
else()
message(FATAL_ERROR "Unsupported or unknown processor type ${CMAKE_SYSTEM_PROCESSOR}")
endif()
set(COFF_SPEC "--coff=${COFF_TYPE}")
# for mrc, just in case
list(APPEND CMAKE_PREFIX_PATH "$ENV{LOCALAPPDATA}/mrc")
endif()
if(UNIX AND NOT APPLE AND NOT BUILD_FOR_CCP4 AND CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
# On Linux, install in the $HOME/.local folder by default
message(STATUS "The library and auxiliary files will be installed in $ENV{HOME}/.local")
set(CMAKE_INSTALL_PREFIX "$ENV{HOME}/.local" CACHE PATH "..." FORCE)
endif()
# Optionally use mrc to create resources
if(WIN32 AND BUILD_SHARED_LIBS)
message("Not using resources when building shared libraries for Windows")
else()
find_program(MRC mrc)
if(MRC)
option(CIFPP_USE_RSRC "Use mrc to create resources" ON)
else()
message("Using resources not possible since mrc was not found")
endif()
if(CIFPP_USE_RSRC STREQUAL "ON")
set(CIFPP_USE_RSRC 1)
message("Using resources compiled with ${MRC}")
add_compile_definitions(CIFPP_USE_RSRC)
endif()
# This is dubious...
if(BUILD_SHARED_LIBS)
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>DLL")
else()
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
endif()
endif()
# Libraries
set(CMAKE_THREAD_PREFER_PTHREAD)
set(THREADS_PREFER_PTHREAD_FLAG)
if(MSVC)
# Avoid linking the shared library of zlib. Search ZLIB_ROOT first if it is
# set.
if(ZLIB_ROOT)
set(_ZLIB_SEARCH_ROOT PATHS ${ZLIB_ROOT} NO_DEFAULT_PATH)
list(APPEND _ZLIB_SEARCHES _ZLIB_SEARCH_ROOT)
endif()
# Normal search.
set(_ZLIB_x86 "(x86)")
set(_ZLIB_SEARCH_NORMAL
PATHS "[HKEY_LOCAL_MACHINE\\SOFTWARE\\GnuWin32\\Zlib;InstallPath]"
"$ENV{ProgramFiles}/zlib" "$ENV{ProgramFiles${_ZLIB_x86}}/zlib")
unset(_ZLIB_x86)
list(APPEND _ZLIB_SEARCHES _ZLIB_SEARCH_NORMAL)
if(BUILD_FOR_CCP4)
list(PREPEND _ZLIB_SEARCHES "$ENV{CCP4}/lib")
endif()
foreach(search ${_ZLIB_SEARCHES})
find_library(
ZLIB_LIBRARY
NAMES zlibstatic NAMES_PER_DIR ${${search}}
PATH_SUFFIXES lib)
endforeach()
endif()
set(CMAKE_CXX_STANDARD 20)
# Using fast_float for float parsing, but only if needed
try_compile(STD_CHARCONV_COMPILING
SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-charconv.cpp
CXX_STANDARD 20
CXX_STANDARD_REQUIRED ON)
if(NOT STD_CHARCONV_COMPILING)
message(NOTICE "libcifpp: Using fast_float for std::from_chars")
find_package(FastFloat 8.0 QUIET CONFIG)
if(NOT FastFloat_FOUND)
message(STATUS "FastFloat not found in system, fetching from GitHub")
FetchContent_Declare(fastfloat
GIT_REPOSITORY "https://github.com/fastfloat/fast_float"
GIT_TAG v8.0.2
EXCLUDE_FROM_ALL)
FetchContent_MakeAvailable(fastfloat)
endif()
endif()
find_package(Threads)
find_package(ZLIB QUIET)
set(Boost_DETAILED_FAILURE_MSG ON)
if(NOT BUILD_SHARED_LIBS)
set(Boost_USE_STATIC_LIBS ON)
endif()
find_package(Boost 1.70.0 REQUIRED COMPONENTS system iostreams regex program_options)
if(NOT MSVC AND Boost_USE_STATIC_LIBS)
find_package(ZLIB REQUIRED)
find_package(BZip2 REQUIRED)
if(NOT ZLIB_FOUND)
message(FATAL_ERROR "cifpp: The zlib development files were not found you this system, please install them and try again (hint: on debian/ubuntu use apt-get install zlib1g-dev)")
endif()
# Create a revision file, containing the current git version info
include(FindPkgConfig)
find_package(Git)
if(GIT_FOUND AND EXISTS "${CMAKE_SOURCE_DIR}/.git")
include(GetGitRevisionDescription)
get_git_head_revision(REFSPEC COMMITHASH)
if(PKG_CONFIG_FOUND)
pkg_check_modules(PCRE2 IMPORTED_TARGET libpcre2-8)
endif()
# Generate our own version string
git_describe_working_tree(BUILD_VERSION_STRING --match=build --dirty)
if(NOT PCRE2_FOUND)
add_subdirectory(pcre2-simple)
endif()
# Using Eigen3 is a bit of a thing. We don't want to build it completely since
# we only need a couple of header files. Nothing special. But often, eigen3 is
# already installed and then we prefer that.
find_package(Eigen3 3.4 QUIET)
if(Eigen3_FOUND AND TARGET Eigen3::Eigen)
get_target_property(EIGEN_INCLUDE_DIR Eigen3::Eigen
INTERFACE_INCLUDE_DIRECTORIES)
else()
message(WARNING "Git not found, cannot set version info")
# Use ExternalProject since FetchContent always tries to install the result...
ExternalProject_Add(my-eigen3
URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.zip
DOWNLOAD_EXTRACT_TIMESTAMP TRUE
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
INSTALL_COMMAND "")
SET(BUILD_VERSION_STRING ${PROJECT_VERSION})
ExternalProject_Get_Property(my-eigen3 SOURCE_DIR)
set(EIGEN_INCLUDE_DIR ${SOURCE_DIR})
endif()
# generate version.h
string(TIMESTAMP BUILD_DATE_TIME "%Y-%m-%dT%H:%M:%SZ" UTC)
configure_file("${CMAKE_SOURCE_DIR}/src/revision.hpp.in" "${CMAKE_BINARY_DIR}/revision.hpp" @ONLY)
# SymOp data table
if(CIFPP_RECREATE_SYMOP_DATA)
# The tool to create the table
# The tool to create the table
add_executable(symop-map-generator
"${CMAKE_CURRENT_SOURCE_DIR}/src/symop-map-generator.cpp")
add_executable(symop-map-generator "${CMAKE_SOURCE_DIR}/tools/symop-map-generator.cpp")
target_compile_features(symop-map-generator PUBLIC cxx_std_20)
target_link_libraries(symop-map-generator Threads::Threads ${Boost_LIBRARIES} std::filesystem ${ZLIB_LIBRARIES} ${BZip2_LIBRARIES})
if(Boost_INCLUDE_DIR)
target_include_directories(symop-map-generator PUBLIC ${Boost_INCLUDE_DIR})
endif()
add_custom_command(
OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/src/symop_table_data.hpp
COMMAND
$<TARGET_FILE:symop-map-generator> $ENV{CLIBD}/syminfo.lib
$ENV{CLIBD}/symop.lib ${CMAKE_CURRENT_SOURCE_DIR}/src/symop_table_data.hpp)
set($ENV{CLIBD} ${CLIBD})
add_custom_command(
OUTPUT ${CMAKE_SOURCE_DIR}/src/SymOpTable_data.hpp
COMMAND $<TARGET_FILE:symop-map-generator> ${CLIBD}/syminfo.lib ${CMAKE_SOURCE_DIR}/src/SymOpTable_data.hpp
)
add_custom_target(
OUTPUT ${CMAKE_SOURCE_DIR}/src/SymOpTable_data.hpp
DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib"
)
add_custom_target(
OUTPUT
${CMAKE_CURRENT_SOURCE_DIR}/src/symop_table_data.hpp
DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib"
"$ENV{CLIBD}/symop.lib")
endif()
# Create a revision file, containing the current git version info
write_version_header("${CMAKE_CURRENT_SOURCE_DIR}/src/" LIB_NAME "LibCIFPP")
add_library(cifpp)
add_library(cifpp::cifpp ALIAS cifpp)
# Sources
set(project_sources
${PROJECT_SOURCE_DIR}/src/AtomType.cpp
${PROJECT_SOURCE_DIR}/src/BondMap.cpp
${PROJECT_SOURCE_DIR}/src/Cif++.cpp
${PROJECT_SOURCE_DIR}/src/Cif2PDB.cpp
${PROJECT_SOURCE_DIR}/src/CifParser.cpp
${PROJECT_SOURCE_DIR}/src/CifUtils.cpp
${PROJECT_SOURCE_DIR}/src/CifValidator.cpp
${PROJECT_SOURCE_DIR}/src/Compound.cpp
${PROJECT_SOURCE_DIR}/src/PDB2Cif.cpp
${PROJECT_SOURCE_DIR}/src/PDB2CifRemark3.cpp
${PROJECT_SOURCE_DIR}/src/Point.cpp
${PROJECT_SOURCE_DIR}/src/Secondary.cpp
${PROJECT_SOURCE_DIR}/src/Structure.cpp
${PROJECT_SOURCE_DIR}/src/Symmetry.cpp
${PROJECT_SOURCE_DIR}/src/TlsParser.cpp
list(APPEND project_sources
${CMAKE_CURRENT_SOURCE_DIR}/src/category.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/condition.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/datablock.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/dictionary_parser.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/file.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/item.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/parser.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/row.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/validate.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/text.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/utilities.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/atom_type.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/compound.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/point.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/symmetry.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/model.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/cif2pdb.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/pdb2cif.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/pdb_record.hpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.hpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/reconstruct.cpp
${CMAKE_CURRENT_SOURCE_DIR}/src/pdb/validate-pdbx.cpp
)
set(project_headers
${PROJECT_SOURCE_DIR}/include/cif++/AtomType.hpp
${PROJECT_SOURCE_DIR}/include/cif++/BondMap.hpp
${PROJECT_SOURCE_DIR}/include/cif++/Cif++.hpp
${PROJECT_SOURCE_DIR}/include/cif++/Cif2PDB.hpp
${PROJECT_SOURCE_DIR}/include/cif++/CifParser.hpp
${PROJECT_SOURCE_DIR}/include/cif++/CifUtils.hpp
${PROJECT_SOURCE_DIR}/include/cif++/CifValidator.hpp
${PROJECT_SOURCE_DIR}/include/cif++/Compound.hpp
${PROJECT_SOURCE_DIR}/include/cif++/Matrix.hpp
${PROJECT_SOURCE_DIR}/include/cif++/PDB2Cif.hpp
${PROJECT_SOURCE_DIR}/include/cif++/PDB2CifRemark3.hpp
${PROJECT_SOURCE_DIR}/include/cif++/Point.hpp
${PROJECT_SOURCE_DIR}/include/cif++/Secondary.hpp
${PROJECT_SOURCE_DIR}/include/cif++/Structure.hpp
${PROJECT_SOURCE_DIR}/include/cif++/Symmetry.hpp
${PROJECT_SOURCE_DIR}/include/cif++/TlsParser.hpp
list(APPEND project_headers
include/cif++.hpp
include/cif++/atom_type.hpp
include/cif++/category.hpp
include/cif++/compound.hpp
include/cif++/condition.hpp
include/cif++/datablock.hpp
include/cif++/dictionary_parser.hpp
include/cif++/exports.hpp
include/cif++/file.hpp
include/cif++/format.hpp
include/cif++/gzio.hpp
include/cif++/item.hpp
include/cif++/iterator.hpp
include/cif++/matrix.hpp
include/cif++/model.hpp
include/cif++/parser.hpp
include/cif++/pdb.hpp
include/cif++/point.hpp
include/cif++/row.hpp
include/cif++/symmetry.hpp
include/cif++/text.hpp
include/cif++/utilities.hpp
include/cif++/validate.hpp
)
add_library(cifpp ${project_sources} ${project_headers} ${CMAKE_SOURCE_DIR}/src/SymOpTable_data.hpp)
if(BUILD_SQLITE_INTERFACE)
find_package(SQLite3 QUIET)
if(SQLite3_FOUND)
target_link_libraries(cifpp PRIVATE SQLite::SQLite3)
else()
FetchContent_Populate(SQLite3
URL https://sqlite.org/2025/sqlite-amalgamation-3510100.zip
URL_HASH SHA3_256=856b52ffe7383d779bb86a0ed1ddc19c41b0e5751fa14ce6312f27534e629b64
EXCLUDE_FROM_ALL)
list(APPEND project_sources $<BUILD_INTERFACE:${sqlite3_SOURCE_DIR}>/sqlite3.c)
target_include_directories(cifpp PRIVATE $<BUILD_INTERFACE:${sqlite3_SOURCE_DIR}>)
endif()
list(APPEND project_sources ${CMAKE_CURRENT_SOURCE_DIR}/src/cql.cpp)
list(APPEND project_headers include/cif++/cql.hpp)
endif()
if(TARGET my-eigen3)
add_dependencies(cifpp my-eigen3)
endif()
target_sources(cifpp
PRIVATE ${project_sources}
${CMAKE_CURRENT_SOURCE_DIR}/src/symop_table_data.hpp
PUBLIC
FILE_SET cifpp_headers TYPE HEADERS
BASE_DIRS ${CMAKE_CURRENT_SOURCE_DIR}/include
FILES ${project_headers}
)
# The code now really requires C++20
target_compile_features(cifpp PUBLIC cxx_std_20)
generate_export_header(cifpp EXPORT_FILE_NAME
${CMAKE_CURRENT_SOURCE_DIR}/include/cif++/exports.hpp)
if(MSVC)
target_compile_definitions(cifpp PUBLIC NOMINMAX=1)
endif()
set_target_properties(cifpp PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_include_directories(cifpp
PUBLIC
"$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
${Boost_INCLUDE_DIR}
)
target_include_directories(
cifpp
PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
PRIVATE "${EIGEN_INCLUDE_DIR}")
target_include_directories(cifpp
PRIVATE
${CMAKE_BINARY_DIR}
)
target_link_libraries(cifpp
PUBLIC Threads::Threads ZLIB::ZLIB $<$<TARGET_EXISTS:std::atomic>:std::atomic>)
target_link_libraries(cifpp Threads::Threads ${Boost_LIBRARIES} std::filesystem ${ZLIB_LIBRARIES} ${BZip2_LIBRARIES})
if(PCRE2_FOUND)
target_include_directories(cifpp PRIVATE ${PCRE2_INCLUDE_DIRS})
target_link_libraries(cifpp PRIVATE ${PCRE2_LINK_LIBRARIES})
else()
target_link_libraries(cifpp PRIVATE $<BUILD_INTERFACE:pcre2s>)
endif()
if (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
if(NOT STD_CHARCONV_COMPILING)
target_link_libraries(cifpp PRIVATE FastFloat::fast_float)
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
endif (CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
endif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
option(CIFPP_DOWNLOAD_CCD "Download the CCD file components.cif during installation" OFF)
if(CIFPP_DOWNLOAD_CCD)
# download the components.cif file from CCD
set(COMPONENTS_CIF ${PROJECT_SOURCE_DIR}/data/components.cif)
# download the components.cif file from CCD
set(COMPONENTS_CIF ${CMAKE_CURRENT_SOURCE_DIR}/rsrc/components.cif)
if (NOT EXISTS ${COMPONENTS_CIF})
if(EXISTS ${COMPONENTS_CIF})
file(SIZE ${COMPONENTS_CIF} CCD_FILE_SIZE)
if (NOT EXISTS ${PROJECT_SOURCE_DIR}/data)
file(MAKE_DIRECTORY ${PROJECT_SOURCE_DIR}/data/)
endif()
if(CCD_FILE_SIZE EQUAL 0)
message(STATUS "cifpp: Removing empty ${COMPONENTS_CIF} file")
file(REMOVE "${COMPONENTS_CIF}")
endif()
endif()
find_program(GUNZIP gunzip)
if(NOT EXISTS ${COMPONENTS_CIF})
# Since the file(DOWNLOAD) command in cmake does not use compression, we try
# to download the gzipped version and decompress it ourselves.
find_program(GUNZIP gunzip)
if(GUNZIP)
file(DOWNLOAD ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz ${COMPONENTS_CIF}.gz
SHOW_PROGRESS)
add_custom_command(OUTPUT ${COMPONENTS_CIF}
COMMAND ${GUNZIP} ${COMPONENTS_CIF}.gz
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}/data/)
else()
file(DOWNLOAD ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif ${COMPONENTS_CIF}
SHOW_PROGRESS)
endif()
endif()
if(WIN32 OR GUNZIP STREQUAL "GUNZIP-NOTFOUND")
file(
DOWNLOAD https://files.wwpdb.org/pub/pdb/data/monomers/components.cif
${COMPONENTS_CIF}
SHOW_PROGRESS
STATUS CCD_FETCH_STATUS)
else()
if(NOT EXISTS "${COMPONENTS_CIF}.gz")
file(
DOWNLOAD
https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz
${COMPONENTS_CIF}.gz
SHOW_PROGRESS
STATUS CCD_FETCH_STATUS)
endif()
add_custom_target(COMPONENTS ALL DEPENDS ${COMPONENTS_CIF})
add_custom_command(
OUTPUT ${COMPONENTS_CIF}
COMMAND "${GUNZIP}" ${COMPONENTS_CIF}.gz
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/rsrc/)
add_custom_target(COMPONENTS ALL DEPENDS ${COMPONENTS_CIF})
endif()
# Do not continue if downloading went wrong
list(POP_FRONT CCD_FETCH_STATUS CCD_FETCH_STATUS_CODE)
if(CCD_FETCH_STATUS_CODE)
message(
FATAL_ERROR "cifpp: Error trying to download CCD file: ${CCD_FETCH_STATUS}")
endif()
endif()
endif()
if(UNIX)
option(CIFPP_INSTALL_UPDATE_SCRIPT "Install the script to update CCD and dictionary files" OFF)
set(CIFPP_CACHE_DIR "/var/cache/libcifpp" CACHE STRING "The cache directory to use")
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
# Installation directories
if(BUILD_FOR_CCP4)
set(CIFPP_DATA_DIR
"$ENV{CCP4}/share/libcifpp"
CACHE PATH "Directory where dictionary and other static data is stored")
else()
set(CIFPP_DATA_DIR
"${CMAKE_INSTALL_FULL_DATADIR}/libcifpp"
CACHE PATH "Directory where dictionary and other static data is stored")
endif()
generate_export_header(cifpp
EXPORT_FILE_NAME cif++/Cif++Export.hpp)
if(CIFPP_DATA_DIR)
target_compile_definitions(cifpp PUBLIC DATA_DIR="${CIFPP_DATA_DIR}")
set_target_properties(cifpp PROPERTIES CIFPP_DATA_DIR ${CIFPP_DATA_DIR})
endif()
set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} )
set(LIBRARY_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} )
set(SHARE_INSTALL_DIR ${CMAKE_INSTALL_DATADIR}/libcifpp)
if(NOT PROJECT_IS_TOP_LEVEL)
set(CIFPP_SHARE_DIR ${CIFPP_DATA_DIR} PARENT_SCOPE)
endif()
set(CIFPP_DATA_DIR "${CMAKE_INSTALL_PREFIX}/${SHARE_INSTALL_DIR}" CACHE STRING "The directory containing the provided data files")
if(UNIX AND NOT BUILD_FOR_CCP4)
if("${CMAKE_INSTALL_PREFIX}" STREQUAL "/usr/local")
set(CIFPP_CACHE_DIR
"/var/cache/libcifpp"
CACHE PATH "The directory where downloaded data files are stored")
else()
set(CIFPP_CACHE_DIR
"${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/cache/libcifpp"
CACHE PATH "The directory where downloaded data files are stored")
endif()
target_compile_definitions(cifpp PUBLIC DATA_DIR="${CIFPP_DATA_DIR}")
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
set(CIFPP_ETC_DIR
"${CMAKE_INSTALL_FULL_SYSCONFDIR}"
CACHE PATH "The directory where the update configuration file is stored")
else()
unset(CIFPP_CACHE_DIR)
endif()
# Install rules
install(TARGETS cifpp
EXPORT cifppTargets
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
EXPORT cifpp
FILE_SET cifpp_headers DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
install(EXPORT cifppTargets
FILE "cifppTargets.cmake"
NAMESPACE cifpp::
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
)
if(MSVC AND BUILD_SHARED_LIBS)
install(
FILES $<TARGET_PDB_FILE:cifpp>
DESTINATION ${CMAKE_INSTALL_LIBDIR}
OPTIONAL)
endif()
# Clean up old config files (with old names)
file(GLOB OLD_CONFIG_FILES
${CMAKE_INSTALL_FULL_LIBDIR}/cmake/cifpp/cifppConfig*.cmake
${CMAKE_INSTALL_FULL_LIBDIR}/cmake/cifpp/cifppTargets*.cmake)
if(OLD_CONFIG_FILES)
message(
STATUS "cifpp: Installation will remove old config files: ${OLD_CONFIG_FILES}")
install(CODE "file(REMOVE ${OLD_CONFIG_FILES})")
endif()
install(EXPORT cifpp
NAMESPACE cifpp::
FILE "cifpp-targets.cmake"
DESTINATION lib/cmake/cifpp)
install(
DIRECTORY include/cif++
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
COMPONENT Devel
)
FILES ${CMAKE_CURRENT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
${CMAKE_CURRENT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
${CMAKE_CURRENT_SOURCE_DIR}/rsrc/mmcif_ma.dic
DESTINATION ${CMAKE_INSTALL_DATADIR}/libcifpp)
if(CIFPP_DATA_DIR AND CIFPP_DOWNLOAD_CCD)
install(FILES ${COMPONENTS_CIF}
DESTINATION ${CMAKE_INSTALL_DATADIR}/libcifpp)
endif()
set(CONFIG_TEMPLATE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cifpp-config.cmake.in)
configure_package_config_file(
${CONFIG_TEMPLATE_FILE} ${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake
INSTALL_DESTINATION lib/cmake/cifpp
PATH_VARS CIFPP_DATA_DIR)
install(
FILES "${CMAKE_CURRENT_BINARY_DIR}/cif++/Cif++Export.hpp"
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/cif++
COMPONENT Devel
)
FILES "${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config-version.cmake"
DESTINATION lib/cmake/cifpp)
install(FILES
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx_v50.dic
${COMPONENTS_CIF}
DESTINATION ${SHARE_INSTALL_DIR}
)
set_target_properties(
cifpp
PROPERTIES VERSION ${PROJECT_VERSION}
SOVERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}"
INTERFACE_cifpp_MAJOR_VERSION ${PROJECT_VERSION_MAJOR})
configure_package_config_file(Config.cmake.in
${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfig.cmake
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
PATH_VARS INCLUDE_INSTALL_DIR LIBRARY_INSTALL_DIR SHARE_INSTALL_DIR
)
install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfig.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfigVersion.cmake"
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
COMPONENT Devel
)
set(cifpp_MAJOR_VERSION ${CMAKE_PROJECT_VERSION_MAJOR})
set_target_properties(cifpp PROPERTIES
VERSION ${PROJECT_VERSION}
SOVERSION ${cifpp_MAJOR_VERSION}
INTERFACE_cifpp_MAJOR_VERSION ${cifpp_MAJOR_VERSION})
set_property(TARGET cifpp APPEND PROPERTY
COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION
)
set_property(
TARGET cifpp
APPEND
PROPERTY COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION)
write_basic_package_version_file(
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfigVersion.cmake"
VERSION ${PROJECT_VERSION}
COMPATIBILITY AnyNewerVersion
)
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config-version.cmake"
VERSION ${PROJECT_VERSION}
COMPATIBILITY AnyNewerVersion)
# pkgconfig support
set(prefix ${CMAKE_INSTALL_PREFIX})
set(exec_prefix ${CMAKE_INSTALL_PREFIX})
set(libdir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
set(includedir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libcifpp.pc.in
${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc.in @ONLY)
file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc
INPUT ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc.in)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
# Unit tests
option(CIFPP_BUILD_TESTS "Build test exectuables" OFF)
if(CIFPP_BUILD_TESTS)
if(CIFPP_USE_RSRC)
add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/cifpp_test_rsrc.obj
COMMAND ${MRC} -o ${CMAKE_CURRENT_BINARY_DIR}/cifpp_test_rsrc.obj ${CMAKE_SOURCE_DIR}/rsrc/mmcif_pdbx_v50.dic ${COFF_SPEC}
)
set(CIFPP_TEST_RESOURCE ${CMAKE_CURRENT_BINARY_DIR}/cifpp_test_rsrc.obj)
endif()
list(APPEND CIFPP_tests
# pdb2cif
rename-compound
structure
unit)
foreach(CIFPP_TEST IN LISTS CIFPP_tests)
set(CIFPP_TEST "${CIFPP_TEST}-test")
set(CIFPP_TEST_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/test/${CIFPP_TEST}.cpp")
add_executable(${CIFPP_TEST} ${CIFPP_TEST_SOURCE} ${CIFPP_TEST_RESOURCE})
target_include_directories(${CIFPP_TEST} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/include
${CMAKE_CURRENT_BINARY_DIR} # for config.h
)
target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp ${Boost_LIBRARIES} std::filesystem ${ZLIB_LIBRARIES} ${BZip2_LIBRARIES})
if(MSVC)
# Specify unwind semantics so that MSVC knowns how to handle exceptions
target_compile_options(${CIFPP_TEST} PRIVATE /EHsc)
endif()
add_custom_target("run-${CIFPP_TEST}" DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch ${CIFPP_TEST})
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch
COMMAND $<TARGET_FILE:${CIFPP_TEST}> -- ${PROJECT_SOURCE_DIR}/test)
add_test(NAME ${CIFPP_TEST}
COMMAND $<TARGET_FILE:${CIFPP_TEST}> -- ${PROJECT_SOURCE_DIR}/test)
endforeach()
if(BUILD_TESTING AND PROJECT_IS_TOP_LEVEL)
add_subdirectory(test)
endif()
message("Will install in ${CMAKE_INSTALL_PREFIX}")
# Optionally install the update scripts for CCD and dictionary files
if(CIFPP_INSTALL_UPDATE_SCRIPT)
set(CIFPP_CRON_DIR "$ENV{DESTDIR}/etc/cron.weekly")
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/tools/update-libcifpp-data.in
update-libcifpp-data @ONLY)
configure_file(${CMAKE_SOURCE_DIR}/tools/update-libcifpp-data.in update-libcifpp-data @ONLY)
install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/update-libcifpp-data
DESTINATION ${CIFPP_CRON_DIR}
PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ
)
if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR
${CMAKE_SYSTEM_NAME} STREQUAL "GNU" OR
${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD")
install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/update-libcifpp-data
DESTINATION ${CMAKE_INSTALL_SYSCONFDIR}/cron.weekly
PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE
WORLD_READ)
else()
message(FATAL_ERROR "cifpp: Don't know where to install the update script")
endif()
install(DIRECTORY DESTINATION ${CIFPP_CACHE_DIR})
install(DIRECTORY DESTINATION "$ENV{DESTDIR}/etc/libcifpp/cache-update.d")
# a config to, to make it complete
if(NOT EXISTS "$ENV{DESTDIR}/etc/libcifpp.conf")
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf [[# Uncomment the next line to enable automatic updates
# a config file, to make it complete
# install(DIRECTORY DESTINATION "${CMAKE_INSTALL_LOCALSTATEDIR}/libcifpp")
if(NOT EXISTS "${CMAKE_INSTALL_SYSCONFDIR}/libcifpp.conf")
file(
WRITE ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf
[[# Uncomment the next line to enable automatic updates
# update=true
]])
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf DESTINATION "$ENV{DESTDIR}/etc")
install(CODE "message(\"A configuration file has been written to $ENV{DESTDIR}/etc/libcifpp.conf, please edit this file to enable automatic updates\")")
endif()
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf
DESTINATION ${CMAKE_INSTALL_SYSCONFDIR})
install(
CODE "message(\"cifpp: A configuration file has been written to ${CIFPP_ETC_DIR}/libcifpp.conf, please edit this file to enable automatic updates\")"
)
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
install(DIRECTORY DESTINATION ${CMAKE_INSTALL_SYSCONFDIR}/libcifpp/cache-update.d)
endif()
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
endif()
if(BUILD_DOCUMENTATION)
add_subdirectory(docs)
endif()

View File

@@ -1,16 +0,0 @@
@PACKAGE_INIT@
include(CMakeFindDependencyMacro)
find_dependency(Boost 1.70.0 REQUIRED COMPONENTS system iostreams regex program_options)
if(NOT WIN32)
find_dependency(ZLIB)
find_dependency(BZip2)
endif()
INCLUDE("${CMAKE_CURRENT_LIST_DIR}/cifppTargets.cmake")
set_and_check(CIFPP_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@")
set_and_check(CIFPP_LIBRARY_DIR "@PACKAGE_LIBRARY_INSTALL_DIR@")
set_and_check(CIFPP_SHARE_DIR "@PACKAGE_SHARE_INSTALL_DIR@")
check_required_components(cifpp)

View File

@@ -1,6 +1,7 @@
SPDX-License-Identifier: BSD-2-Clause
BSD-2-Clause License
Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
@@ -20,4 +21,4 @@ ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

238
README.md
View File

@@ -1,47 +1,221 @@
libcifpp
========
[![github CI](https://github.com/pdb-redo/libcifpp/actions/workflows/cmake-multi-platform.yml/badge.svg)](https://github.com/pdb-redo/libcifpp/actions)
[![GitHub License](https://img.shields.io/github/license/pdb-redo/libcifpp)](https://github.com/pdb-redo/libcifpp/LICENSE)
This library contains code to work with mmCIF and PDB files.
# libcifpp
Requirements
------------
As the name implies, this library was originally written to work with mmCIF files
using C++ as programming language. The design of this library leanes heavily on
the structure of CIF files. These files can be thought of as a text dump of a
relational databank with, often but not always, a very strict schema describing
the data. These schema's are called dictionaries.
Using information from the content of a mmCIF file and an optional schema,
libcifpp allows you to access the data in the file as a collection of datablock
each containing a collection of categories with rows of data. The categories can
be searched for data using queries written in regular C++ syntax. When a dictionary
was specified, inserted data is checked for validity. Likewise removal of data
may result in cascaded removal of linked data in other categories using
parent/child relationship information.
Since there were still many programs using the legacy PDB format at the time
development started, a layer was added that converts data to and from PDB format
into mmCIF format. This means you can manipulate PDB files as if they were
normal mmCIF files.
Apart from this basic functionality, libcifpp also offers code to help with
symmetry calculations, 3d manipulations and obtaining information from the CCD
[Chemical Component Dictionary](https://www.wwpdb.org/data/ccd).
## Documentation
The documentation can be found at [github.io](https://pdb-redo.github.io/libcifpp/)
## Synopsis
```cpp
// A simple program counting residues with an OXT atom
#include <filesystem>
#include <iostream>
#include <cif++.hpp>
namespace fs = std::filesystem;
int main(int argc, char *argv[])
{
if (argc != 2)
exit(1);
// Read file, can be PDB or mmCIF and can even be compressed with gzip.
cif::file file = cif::pdb::read(argv[1]);
if (file.empty())
{
std::cerr << "Empty file\n";
exit(1);
}
// Take the first datablock in the file
auto &db = file.front();
// Use the atom_site category
auto &atom_site = db["atom_site"];
// Count the atoms with atom-id "OXT"
auto n = atom_site.count(cif::key("label_atom_id") == "OXT");
std::cout << "File contains " << atom_site.size() << " atoms of which "
<< n << (n == 1 ? " is" : " are") << " OXT\n"
<< "residues with an OXT are:\n";
// Loop over all atoms with atom-id "OXT" and print out some info.
// That info is extracted using structured binding in C++
for (const auto &[asym, comp, seqnr] :
atom_site.find<std::string, std::string, int>(
cif::key("label_atom_id") == "OXT",
"label_asym_id", "label_comp_id", "label_seq_id"))
{
std::cout << asym << ' ' << comp << ' ' << seqnr << '\n';
}
return 0;
}
```
## Installation
You might be able to use libcifpp from a package manager used by your
OS distribution. But most likely this package will be out-of-date.
Therefore it is recommended to build *libcifpp* from code. It is not
hard to do. But it is recommended to read the following instructions
carefully.
### Requirements
The code for this library was written in C++17. You therefore need a
recent compiler to build it. For the development gcc 9.3 and clang 9.0
recent compiler to build it. For the development gcc >= 9.4 and clang >= 9.0
have been used as well as MSVC version 2019.
Other requirements are:
The other requirement you really need to have installed on your computer
is a version of [CMake](https://cmake.org). For now the minimum version
is 3.16 but that may soon change into a higher version. You should also
install the gui version of CMake to set build options easily, on Debian
I prefer to use the curses version installed with `cmake-curses-gui`.
- Boost libraries, at least version 1.70
- [mrc](https://github.com/mhekkel/mrc), a resource compiler that
allows including data files into the executable making them easier to
install. Strictly this is optional, but at the expense of functionality.
It is very useful to have [mrc](https://github.com/mhekkel/mrc) available.
However, this is only an option if you use Windows or an operating system
using the ELF executable format (i.e. Linux or FreeBSD). MRC is a resource
compiler that allows including data files into the executable making them
easier to install.
Building
--------
Other libraries you might want to install beforehand are:
This library uses [cmake](https://cmake.org). The usual way of building
and installing is to create a `build` directory and run cmake there.
- [libeigen](https://eigen.tuxfamily.org/index.php?title=Main_Page), a
library to do amongst others matrix calculations. This usually can be
installed using your package manager, in Debian/Ubuntu it is called
`libeigen3-dev`
- [zlib](https://github.com/madler/zlib), the development version of this
library. On Debian/Ubuntu this is the package `zlib1g-dev`.
- [pcre2](https://www.pcre.org/), the Perl Compatible Regular Expression
library. On Debian/Ubuntu this is the package `libpcre2-dev`.
On linux e.g. you would issue the following commands:
### Building
First you need to download the code:
```console
git clone https://github.com/PDB-REDO/libcifpp.git
cd libcifpp
```
git clone https://github.com/PDB-REDO/libcifpp.git
cd libcifpp
mkdir build
cd build
cmake ..
cmake --build . --config Release
ctest -C Release
cmake --install .
You should start by considering where to install libcifpp. If you have
sufficient permissions on your computer you perhaps should use the
default but libcifpp can be configured to be installed anywhere
including e.g. *$HOME/.local*.
Next step is to configure, for this use the CMake gui application. If you
installed the curses version of cmake you can type `ccmake`. On Windows
you can use `cmake-gui.exe`.
To install in the default location:
```console
ccmake -S . -B build
```
This checks out the source code from github, creates a new directory
where cmake stores its files. Run a configure, build the code and run
tests. And then it installs the library and auxiliary files.
The default is to install everything in `$HOME/.local` on Linux and
`%LOCALAPPDATA%` on Windows (the AppData/Local folder in your home directory).
You can change this by specifying the prefix with the
[CMAKE_INSTALL_PREFIX](https://cmake.org/cmake/help/v3.21/variable/CMAKE_INSTALL_PREFIX.html)
variable.
To install elsewhere, e.g. *$HOME/.local*:
```console
ccmake -S . -B build -DCMAKE_INSTALL_PREFIX=$HOME/.local
```
In the cmake window, start the configure command (use button or press 'c').
After the first configure step you will see a list of settable options.
Alter these to match your preferences. Most options are self explaining
and contain a description. Some may need a bit more explanation:
- CIFPP_DATA_DIR, this directory will be used to store initial versions
of the mmcif_pdbx dictionary as well as the optional CCD file.
- CIFPP_DOWNLOAD_CCD
The CCD file is huge and perhaps you think you don't
need it. In that case you can leave this OFF. But that will limit the
use cases.
- CIFPP_INSTALL_UPDATE_SCRIPT
The files in CIFPP_DATA_DIR are quickly becoming out of date. On
FreeBSD and Linux you can install a script that updates these files
on a weekly basis.
- CIFPP_CRON_DIR
The directory where the update script is to be installed.
- CIFPP_ETC_DIR
The update script will only work if the file called *libcifpp.conf*
in this *etc* directory will contain an uncommented line with
```console
update=true
```
- CIFPP_CACHE_DIR
When you installed and enabled the update script, new files are
written to this directory.
- CIFPP_RECREATE_SYMOP_DATA
If you had CCP4 sourced into your environment, this option allows
you to recreate the symop data file.
- BUILD_FOR_CCP4
Build a special version of libcifpp to be installed in the CCP4
environment.
After setting these options you can run the configure step again and
then use generate to create the makefiles.
Building and installing is then as simple as:
```console
cmake --build build
cmake --install build
```
If this fails due to lack of permissions, you can try:
```console
sudo cmake --install build
```
Tests are created by default, and to test the code you can run:
```console
ctest --test-dir build
```

263
changelog
View File

@@ -1,3 +1,266 @@
Version 10.0.0
- Added a SQLite interface.
Version 9.0.5
- Added exists to compound_factory
- Added sub_matrix, fix and extend determinant calculation
- Added yet another structure::create_non_poly
- Remove revision.hpp file in make clean (new VersionString.cmake)
Version 9.0.4
- Fix various stopping and reconstruction errors
Version 9.0.3
- Reconstruction fixed when some entity ids are missing
Version 9.0.2
- Fix code that reconstructs sequences, could throw a map::at
- Many optimisations in validation and reconstruction code.
Version 9.0.1
- Use pcre2 from pkg-config if available, if not
build a version from the original code.
Version 9.0.0
- Rename fields of cif::mm::polymer to match the naming
in mmcif_pdbx.dic. Also, related, fix building mm::structure
using the correct mapping between atom_site and residues.
- _atom_site.auth_alt_id does not exist, it should be
_atom_site.pdbx_auth_alt_id of course.
- Added a more lightweight fixup for mmcif_pdbx files
that lack certain categories.
Version 8.0.1
- Fix cif::mm::structure::cleanup_empty_categories, removed too much
- Add default value for B_iso_or_equiv in residue::create_new_atom
- Reconstruct some branch records in bare pdbx files
- Fix parsing PDB files (bug due to missing validator in dest. cat.)
- Do not fail conversion of PDB files when compound info is missing
Version 8.0.0
- A dictionary is for a datablock and a file can have
datablocks with differing dictionaries.
Version 7.0.10
- Deal with missing _entity.type in reconstructing mmCIF files
- Replace code creating quaternions from rotation matrices
that might sometimes give incorrect results. Or at least,
the test code failed on this particular kind of code. Sometimes.
- Fix reconstruction to build pdbx_nonpoly_scheme
Version 7.0.9
- Using cif::file::load_dictionary it is now possible to
load a dictionary along with its extensions in one go.
E.g. file.load_dictionary("mmcif_pdbx;dssp-extension")
- Fix in compound factory to avoid errors with lower case
compound id's
- Fix sac_parser's index to be case insensitive
Version 7.0.8
- Fix PDB Remark 3 parser
- Added three way comparison for point
Version 7.0.7
- Set CIFPP_DATA_DIR on target cifpp for use in projects that include
libcifpp directly
Version 7.0.6
- Fix linking to std::atomic
Version 7.0.5
- Fix case where category index was not updated for updated value
Version 7.0.4
- Do not install headers and library in case we're not the top project
Version 7.0.3
- Fix installation, write exports.hpp again
Version 7.0.2
- Fix in testing error_code results.
Version 7.0.1
- Various reconstruction fixes
- category order in output fixed
- better implementation of constructors for file, datablock and category
- small optimisation in iterator
Version 7.0.0
- Renaming many methods and parameters to be more
consistent with the mmCIF dictionaries.
(Most notably, item used to be called column or
tag sometimes).
- validation_error is now a std::system_error error
value. The exception is gone.
- Added repairSequenceInfo to repair invalid files
Version 6.1.0
- Add formula weight to entity in pdb2cif
- Change order of categories inside a datablock to match order in file
- Change default order to write out categories in a file based on
parent/child relationship
- Added validate_pdbx and recover_pdbx
- Fixed a serious bug in category_index when moving categories
Version 6.0.0
- Drop the use of CCP4's monomer library for compound information
Version 5.2.5
- Correctly import the Eigen3 library
Version 5.2.4
- Changes required to build on Windows
Version 5.2.3
- New constructors for cif::item, one taking std::optional values
and another taking only a name resulting in a value '.' (i.e. inapplicable).
- added cif::cell::get_volume
Version 5.2.2
- Remove dependency on Eigen3 for users of libcifpp
- Fix typos in documentation
- Do not build latex files in documentation
- Fixed conversion from string to integer, would fail on +2 e.g.
- sqrt is not constexpr, thus kGoldenRatio should be const, not constexpr
Version 5.2.1
- New versionstring module
- small fixes for generating documentation
- correctly setting SONAME
Version 5.2.0
- With lots of documentation
- Refactored coloured text output
- Removed the subdirectory cif++/pdb, there now is a single
header file pdb.hpp for I/O of legacy PDB files.
Version 5.1.3
- Dropped pkgconfig support
Version 5.1.2
- New version string code
- Added check for Eigen3 in cifppConfig.cmake
Version 5.1.1
- Added missing include <compare> in symmetry.hpp
- Added empty() to matrix
- Fix for parsing legacy PDB files with a last line that does
not end with a new line character.
Version 5.1
- New parser, optimised for speed
- Fix in unique ID generator
Version 5.0.10
- Fix in progress_bar, was using too much CPU
- Optimised mmCIF parser
Version 5.0.9
- Fix in dihedral angle calculations
- Added create_water to model
- Writing twin domain info in PDB files and more PDB fixes
- remove_atom improved (remove struct_conn records)
- Added a specialisation for category::find1<std::optional>
- fix memory leak in category
Version 5.0.8
- implemented find_first, find_min, find_max and count in category
- find1 now throws an exception if condition does not not exactly match one row
- Change in writing out PDB files, now looking up the original auth_seq_num
via the pdbx_xxx_scheme categories based on the atom_site.auth_seq_num ->
pdbx_xxx_scheme.pdb_seq_num relationship.
- fix memory leak in category
Version 5.0.7.1
- Use the implementation from zeep for std::experimental::is_detected
Version 5.0.7
- Reintroduce exports file. For DLL's
Version 5.0.6
- Fix file::contains, using iequals
- Fix is_cis
Version 5.0.5
- Fix code to work on 32 bit machines
Version 5.0.4
- Revert removal of CIFPP_SHARE_DIR export
Version 5.0.3
- Fix installation of libcifpp into the correct locations
Version 5.0.2
- Fix export of CISPEP records in PDB format
- Better support for exporting package_source
Version 5.0.1
- Fix loading dictionaries
- Support for cifv1.0 files
Version 5.0.0
- Total rewrite of cif part
- Removed DSSP code, moved into dssp project itself
Version 4.2.1
- Improved REMARK 3 parser (for TLS in large molecules)
Version 4.2.0
- Yet another rewrite of resource loading
Version 4.1.1
- Fall back to zero charge for scattering factors if the atom
was not found in the table.
- Improve code to locate resources, failing less.
Version 4.1.0
- Some interface changes for mmcif::Atom
Version 4.0.1
- Added a bunch of const methods to Datablock and Category.
- Changed PDB writing interface to accept Datablock instead of File.
Version 4.0.0
- getResidue in mmcif::Structure now requires both a
sequence ID and an auth sequence ID. As a result the code was cleaned
up considerably.
Version 3.0.5
- mmcif::Structure redesign. It is now a wrapper around a cif::Datablock.
Version 3.0.4
- Fix in mmCIF parser, now correctly handles the unquoted
string ??
Version 3.0.3
- Better configuration checks, for atomic e.g.
- Fixed a problem introduced in refactoring mmcif::Atom
- Version string creation
Version 3.0.2
- refactored mmcif::Atom for performance reasons
Version 3.0.1
- Fixed processing of proline restraints file from CCP4, proline
is a peptide, really.
- Added code to facilitate DSSP
Version 3.0.0
- Replaced many strings in the API with string_view for
performance reasons.
- Upgraded mmcif::Structure
- various other small fixes
Version 2.0.5
- Backporting updated CMakeLists.txt file
Version 2.0.4
- Reverted a too strict test when reading cif files.
Version 2.0.3
- Fixed reading mmCIF files where model numbers are used and
model number 1 is missing.
Version 2.0.2
- Added configuration flag to disable downloading CCD data during build
Note that there are now two flags for CCD data:

63
cmake/FindAtomic.cmake Normal file
View File

@@ -0,0 +1,63 @@
# Simple check to see if we need a library for std::atomic
if(TARGET std::atomic)
return()
endif()
cmake_minimum_required(VERSION 3.10)
include(CMakePushCheckState)
include(CheckIncludeFileCXX)
include(CheckCXXSourceRuns)
cmake_push_check_state()
check_include_file_cxx("atomic" _CXX_ATOMIC_HAVE_HEADER)
mark_as_advanced(_CXX_ATOMIC_HAVE_HEADER)
set(code [[
#include <atomic>
int main(int argc, char** argv) {
std::atomic<long long> s;
++s;
return 0;
}
]])
check_cxx_source_runs("${code}" _CXX_ATOMIC_BUILTIN)
if(_CXX_ATOMIC_BUILTIN)
set(_found 1)
else()
list(APPEND CMAKE_REQUIRED_LIBRARIES atomic)
list(APPEND FOLLY_LINK_LIBRARIES atomic)
check_cxx_source_runs("${code}" _CXX_ATOMIC_LIB_NEEDED)
if (NOT _CXX_ATOMIC_LIB_NEEDED)
message(FATAL_ERROR "unable to link C++ std::atomic code: you may need \
to install GNU libatomic")
else()
set(_found 1)
endif()
endif()
if(_found)
add_library(std::atomic INTERFACE IMPORTED)
set_property(TARGET std::atomic APPEND PROPERTY INTERFACE_COMPILE_FEATURES cxx_std_14)
if(_CXX_ATOMIC_BUILTIN)
# Nothing to add...
elseif(_CXX_ATOMIC_LIB_NEEDED)
set_target_properties(std::atomic PROPERTIES IMPORTED_LIBNAME atomic)
set(STDCPPATOMIC_LIBRARY atomic)
endif()
endif()
cmake_pop_check_state()
set(Atomic_FOUND ${_found} CACHE BOOL "TRUE if we can run a program using std::atomic" FORCE)
mark_as_advanced(Atomic_FOUND)
if(Atomic_FIND_REQUIRED AND NOT Atomic_FOUND)
message(FATAL_ERROR "Cannot run simple program using std::atomic")
endif()

View File

@@ -1,74 +0,0 @@
# Simplistic reimplementation of https://github.com/vector-of-bool/CMakeCM/blob/master/modules/FindFilesystem.cmake
if(TARGET std::filesystem)
return()
endif()
cmake_minimum_required(VERSION 3.10)
include(CMakePushCheckState)
include(CheckIncludeFileCXX)
include(CheckCXXSourceCompiles)
cmake_push_check_state()
set(CMAKE_CXX_STANDARD 17)
check_include_file_cxx("filesystem" _CXX_FILESYSTEM_HAVE_HEADER)
mark_as_advanced(_CXX_FILESYSTEM_HAVE_HEADER)
set(code [[
#include <cstdlib>
#include <filesystem>
int main() {
auto cwd = std::filesystem::current_path();
return EXIT_SUCCESS;
}
]])
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS_EQUAL 8.4.0)
# >> https://stackoverflow.com/questions/63902528/program-crashes-when-filesystempath-is-destroyed
set(CXX_FILESYSTEM_NO_LINK_NEEDED 0)
else()
# Check a simple filesystem program without any linker flags
check_cxx_source_compiles("${code}" CXX_FILESYSTEM_NO_LINK_NEEDED)
endif()
if(CXX_FILESYSTEM_NO_LINK_NEEDED)
set(_found 1)
else()
set(prev_libraries ${CMAKE_REQUIRED_LIBRARIES})
# Add the libstdc++ flag
set(CMAKE_REQUIRED_LIBRARIES ${prev_libraries} -lstdc++fs)
check_cxx_source_compiles("${code}" CXX_FILESYSTEM_STDCPPFS_NEEDED)
set(_found ${CXX_FILESYSTEM_STDCPPFS_NEEDED})
if(NOT CXX_FILESYSTEM_STDCPPFS_NEEDED)
# Try the libc++ flag
set(CMAKE_REQUIRED_LIBRARIES ${prev_libraries} -lc++fs)
check_cxx_source_compiles("${code}" CXX_FILESYSTEM_CPPFS_NEEDED)
set(_found ${CXX_FILESYSTEM_CPPFS_NEEDED})
endif()
endif()
if(_found)
add_library(std::filesystem INTERFACE IMPORTED)
set_property(TARGET std::filesystem APPEND PROPERTY INTERFACE_COMPILE_FEATURES cxx_std_17)
if(CXX_FILESYSTEM_NO_LINK_NEEDED)
# Nothing to add...
elseif(CXX_FILESYSTEM_STDCPPFS_NEEDED)
set_target_properties(std::filesystem PROPERTIES IMPORTED_LIBNAME stdc++fs)
elseif(CXX_FILESYSTEM_CPPFS_NEEDED)
set_target_properties(std::filesystem PROPERTIES IMPORTED_LIBNAME c++fs)
endif()
endif()
cmake_pop_check_state()
set(Filesystem_FOUND ${_found} CACHE BOOL "TRUE if we can run a program using std::filesystem" FORCE)
if(Filesystem_FIND_REQUIRED AND NOT Filesystem_FOUND)
message(FATAL_ERROR "Cannot run simple program using std::filesystem")
endif()

12
cmake/FindPCRE2.cmake Normal file
View File

@@ -0,0 +1,12 @@
# The problem is, find_package(PCRE2) does not work
# and using pkg-config results in linking to a shared library
# causing all kinds of trouble later on
find_path(PCRE2_INCLUDEDIR NAMES pcre2.h HINTS "C:/Program Files (x86)/PCRE2/include" REQUIRED)
find_library(PCRE2_LIBRARY NAMES pcre2-8-static libpcre2-8.a HINTS "C:/Program Files (x86)/PCRE2/lib" REQUIRED)
add_library(pcre2-8 IMPORTED STATIC)
target_include_directories(pcre2-8 INTERFACE ${PCRE2_INCLUDEDIR})
target_compile_definitions(pcre2-8 INTERFACE PCRE2_STATIC)
set_target_properties(pcre2-8 PROPERTIES IMPORTED_LOCATION ${PCRE2_LIBRARY})
set_target_properties(pcre2-8 PROPERTIES IMPORTED_IMPLIB ${PCRE2_LIBRARY})

11
cmake/FindSphinx.cmake Normal file
View File

@@ -0,0 +1,11 @@
#Look for an executable called sphinx-build
find_program(SPHINX_EXECUTABLE
NAMES sphinx-build
DOC "Path to sphinx-build executable")
include(FindPackageHandleStandardArgs)
#Handle standard arguments to find_package like REQUIRED and QUIET
find_package_handle_standard_args(Sphinx
"Failed to find sphinx-build executable"
SPHINX_EXECUTABLE)

View File

@@ -1,284 +0,0 @@
# - Returns a version string from Git
#
# These functions force a re-configure on each git commit so that you can
# trust the values of the variables in your build system.
#
# get_git_head_revision(<refspecvar> <hashvar> [ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR])
#
# Returns the refspec and sha hash of the current head revision
#
# git_describe(<var> [<additional arguments to git describe> ...])
#
# Returns the results of git describe on the source tree, and adjusting
# the output so that it tests false if an error occurs.
#
# git_describe_working_tree(<var> [<additional arguments to git describe> ...])
#
# Returns the results of git describe on the working tree (--dirty option),
# and adjusting the output so that it tests false if an error occurs.
#
# git_get_exact_tag(<var> [<additional arguments to git describe> ...])
#
# Returns the results of git describe --exact-match on the source tree,
# and adjusting the output so that it tests false if there was no exact
# matching tag.
#
# git_local_changes(<var>)
#
# Returns either "CLEAN" or "DIRTY" with respect to uncommitted changes.
# Uses the return code of "git diff-index --quiet HEAD --".
# Does not regard untracked files.
#
# Requires CMake 2.6 or newer (uses the 'function' command)
#
# Original Author:
# 2009-2020 Ryan Pavlik <ryan.pavlik@gmail.com> <abiryan@ryand.net>
# http://academic.cleardefinition.com
#
# Copyright 2009-2013, Iowa State University.
# Copyright 2013-2020, Ryan Pavlik
# Copyright 2013-2020, Contributors
# SPDX-License-Identifier: BSL-1.0
# Distributed under the Boost Software License, Version 1.0.
# (See accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt)
if(__get_git_revision_description)
return()
endif()
set(__get_git_revision_description YES)
# We must run the following at "include" time, not at function call time,
# to find the path to this module rather than the path to a calling list file
get_filename_component(_gitdescmoddir ${CMAKE_CURRENT_LIST_FILE} PATH)
# Function _git_find_closest_git_dir finds the next closest .git directory
# that is part of any directory in the path defined by _start_dir.
# The result is returned in the parent scope variable whose name is passed
# as variable _git_dir_var. If no .git directory can be found, the
# function returns an empty string via _git_dir_var.
#
# Example: Given a path C:/bla/foo/bar and assuming C:/bla/.git exists and
# neither foo nor bar contain a file/directory .git. This wil return
# C:/bla/.git
#
function(_git_find_closest_git_dir _start_dir _git_dir_var)
set(cur_dir "${_start_dir}")
set(git_dir "${_start_dir}/.git")
while(NOT EXISTS "${git_dir}")
# .git dir not found, search parent directories
set(git_previous_parent "${cur_dir}")
get_filename_component(cur_dir "${cur_dir}" DIRECTORY)
if(cur_dir STREQUAL git_previous_parent)
# We have reached the root directory, we are not in git
set(${_git_dir_var}
""
PARENT_SCOPE)
return()
endif()
set(git_dir "${cur_dir}/.git")
endwhile()
set(${_git_dir_var}
"${git_dir}"
PARENT_SCOPE)
endfunction()
function(get_git_head_revision _refspecvar _hashvar)
_git_find_closest_git_dir("${CMAKE_CURRENT_SOURCE_DIR}" GIT_DIR)
if("${ARGN}" STREQUAL "ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR")
set(ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR TRUE)
else()
set(ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR FALSE)
endif()
if(NOT "${GIT_DIR}" STREQUAL "")
file(RELATIVE_PATH _relative_to_source_dir "${CMAKE_SOURCE_DIR}"
"${GIT_DIR}")
if("${_relative_to_source_dir}" MATCHES "[.][.]" AND NOT ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR)
# We've gone above the CMake root dir.
set(GIT_DIR "")
endif()
endif()
if("${GIT_DIR}" STREQUAL "")
set(${_refspecvar}
"GITDIR-NOTFOUND"
PARENT_SCOPE)
set(${_hashvar}
"GITDIR-NOTFOUND"
PARENT_SCOPE)
return()
endif()
# Check if the current source dir is a git submodule or a worktree.
# In both cases .git is a file instead of a directory.
#
if(NOT IS_DIRECTORY ${GIT_DIR})
# The following git command will return a non empty string that
# points to the super project working tree if the current
# source dir is inside a git submodule.
# Otherwise the command will return an empty string.
#
execute_process(
COMMAND "${GIT_EXECUTABLE}" rev-parse
--show-superproject-working-tree
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
OUTPUT_VARIABLE out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT "${out}" STREQUAL "")
# If out is empty, GIT_DIR/CMAKE_CURRENT_SOURCE_DIR is in a submodule
file(READ ${GIT_DIR} submodule)
string(REGEX REPLACE "gitdir: (.*)$" "\\1" GIT_DIR_RELATIVE
${submodule})
string(STRIP ${GIT_DIR_RELATIVE} GIT_DIR_RELATIVE)
get_filename_component(SUBMODULE_DIR ${GIT_DIR} PATH)
get_filename_component(GIT_DIR ${SUBMODULE_DIR}/${GIT_DIR_RELATIVE}
ABSOLUTE)
set(HEAD_SOURCE_FILE "${GIT_DIR}/HEAD")
else()
# GIT_DIR/CMAKE_CURRENT_SOURCE_DIR is in a worktree
file(READ ${GIT_DIR} worktree_ref)
# The .git directory contains a path to the worktree information directory
# inside the parent git repo of the worktree.
#
string(REGEX REPLACE "gitdir: (.*)$" "\\1" git_worktree_dir
${worktree_ref})
string(STRIP ${git_worktree_dir} git_worktree_dir)
_git_find_closest_git_dir("${git_worktree_dir}" GIT_DIR)
set(HEAD_SOURCE_FILE "${git_worktree_dir}/HEAD")
endif()
else()
set(HEAD_SOURCE_FILE "${GIT_DIR}/HEAD")
endif()
set(GIT_DATA "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/git-data")
if(NOT EXISTS "${GIT_DATA}")
file(MAKE_DIRECTORY "${GIT_DATA}")
endif()
if(NOT EXISTS "${HEAD_SOURCE_FILE}")
return()
endif()
set(HEAD_FILE "${GIT_DATA}/HEAD")
configure_file("${HEAD_SOURCE_FILE}" "${HEAD_FILE}" COPYONLY)
configure_file("${_gitdescmoddir}/GetGitRevisionDescription.cmake.in"
"${GIT_DATA}/grabRef.cmake" @ONLY)
include("${GIT_DATA}/grabRef.cmake")
set(${_refspecvar}
"${HEAD_REF}"
PARENT_SCOPE)
set(${_hashvar}
"${HEAD_HASH}"
PARENT_SCOPE)
endfunction()
function(git_describe _var)
if(NOT GIT_FOUND)
find_package(Git QUIET)
endif()
get_git_head_revision(refspec hash)
if(NOT GIT_FOUND)
set(${_var}
"GIT-NOTFOUND"
PARENT_SCOPE)
return()
endif()
if(NOT hash)
set(${_var}
"HEAD-HASH-NOTFOUND"
PARENT_SCOPE)
return()
endif()
# TODO sanitize
#if((${ARGN}" MATCHES "&&") OR
# (ARGN MATCHES "||") OR
# (ARGN MATCHES "\\;"))
# message("Please report the following error to the project!")
# message(FATAL_ERROR "Looks like someone's doing something nefarious with git_describe! Passed arguments ${ARGN}")
#endif()
#message(STATUS "Arguments to execute_process: ${ARGN}")
execute_process(
COMMAND "${GIT_EXECUTABLE}" describe --tags --always ${hash} ${ARGN}
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
RESULT_VARIABLE res
OUTPUT_VARIABLE out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT res EQUAL 0)
set(out "${out}-${res}-NOTFOUND")
endif()
set(${_var}
"${out}"
PARENT_SCOPE)
endfunction()
function(git_describe_working_tree _var)
if(NOT GIT_FOUND)
find_package(Git QUIET)
endif()
if(NOT GIT_FOUND)
set(${_var}
"GIT-NOTFOUND"
PARENT_SCOPE)
return()
endif()
execute_process(
COMMAND "${GIT_EXECUTABLE}" describe --dirty ${ARGN}
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
RESULT_VARIABLE res
OUTPUT_VARIABLE out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT res EQUAL 0)
set(out "${out}-${res}-NOTFOUND")
endif()
set(${_var}
"${out}"
PARENT_SCOPE)
endfunction()
function(git_get_exact_tag _var)
git_describe(out --exact-match ${ARGN})
set(${_var}
"${out}"
PARENT_SCOPE)
endfunction()
function(git_local_changes _var)
if(NOT GIT_FOUND)
find_package(Git QUIET)
endif()
get_git_head_revision(refspec hash)
if(NOT GIT_FOUND)
set(${_var}
"GIT-NOTFOUND"
PARENT_SCOPE)
return()
endif()
if(NOT hash)
set(${_var}
"HEAD-HASH-NOTFOUND"
PARENT_SCOPE)
return()
endif()
execute_process(
COMMAND "${GIT_EXECUTABLE}" diff-index --quiet HEAD --
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
RESULT_VARIABLE res
OUTPUT_VARIABLE out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(res EQUAL 0)
set(${_var}
"CLEAN"
PARENT_SCOPE)
else()
set(${_var}
"DIRTY"
PARENT_SCOPE)
endif()
endfunction()

View File

@@ -1,43 +0,0 @@
#
# Internal file for GetGitRevisionDescription.cmake
#
# Requires CMake 2.6 or newer (uses the 'function' command)
#
# Original Author:
# 2009-2010 Ryan Pavlik <rpavlik@iastate.edu> <abiryan@ryand.net>
# http://academic.cleardefinition.com
# Iowa State University HCI Graduate Program/VRAC
#
# Copyright 2009-2012, Iowa State University
# Copyright 2011-2015, Contributors
# Distributed under the Boost Software License, Version 1.0.
# (See accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt)
# SPDX-License-Identifier: BSL-1.0
set(HEAD_HASH)
file(READ "@HEAD_FILE@" HEAD_CONTENTS LIMIT 1024)
string(STRIP "${HEAD_CONTENTS}" HEAD_CONTENTS)
if(HEAD_CONTENTS MATCHES "ref")
# named branch
string(REPLACE "ref: " "" HEAD_REF "${HEAD_CONTENTS}")
if(EXISTS "@GIT_DIR@/${HEAD_REF}")
configure_file("@GIT_DIR@/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY)
else()
configure_file("@GIT_DIR@/packed-refs" "@GIT_DATA@/packed-refs" COPYONLY)
file(READ "@GIT_DATA@/packed-refs" PACKED_REFS)
if(${PACKED_REFS} MATCHES "([0-9a-z]*) ${HEAD_REF}")
set(HEAD_HASH "${CMAKE_MATCH_1}")
endif()
endif()
else()
# detached HEAD
configure_file("@GIT_DIR@/HEAD" "@GIT_DATA@/head-ref" COPYONLY)
endif()
if(NOT HEAD_HASH)
file(READ "@GIT_DATA@/head-ref" HEAD_HASH LIMIT 1024)
string(STRIP "${HEAD_HASH}" HEAD_HASH)
endif()

275
cmake/VersionString.cmake Normal file
View File

@@ -0,0 +1,275 @@
# SPDX-License-Identifier: BSD-2-Clause
# Copyright (c) 2021-2023 Maarten L. Hekkelman
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# This cmake extension writes out a revision.hpp file in a specified directory.
# The file will contain a C++ inline function that can be used to write out
# version information.
cmake_minimum_required(VERSION 3.15)
# We want the revision.hpp file to be updated whenever the status of the
# git repository changes. Use the same technique as in GetGitRevisionDescription.cmake
# from https://github.com/rpavlik/cmake-modules
#[=======================================================================[.rst:
.. command:: write_version_header
Write a file named revision.hpp containing version info::
write_version_header(<destdir>
[FILE_NAME <file-name>]
[LIB_NAME <library-name>]
)
This command will generate the code to write a file name
revision.hpp in the directory ``<destdir>``.
``FILE_NAME``
Specify the name of the file to create, default is ``revision.hpp``.
``LIB_NAME``
Specify the library name which will be used as a prefix part for the
variables contained in the revision file.
#]=======================================================================]
# Record the location of this module now, not at the time the CMakeLists.txt
# is being processed
get_filename_component(_current_cmake_module_dir ${CMAKE_CURRENT_LIST_FILE} PATH)
# First locate a .git file or directory.
function(_get_git_dir _start_dir _variable)
set(cur_dir "${_start_dir}")
set(git_dir "${_start_dir}/.git")
while(NOT EXISTS "${git_dir}")
# .git dir not found, search parent directories
set(prev_dir "${cur_dir}")
get_filename_component(cur_dir "${cur_dir}" DIRECTORY)
if(cur_dir STREQUAL prev_dir OR cur_dir STREQUAL ${_start_dir})
# we are not in git since we either hit root or
# the ${_start_dir} which should be the top
set(${_variable} "" PARENT_SCOPE)
return()
endif()
set(git_dir "${cur_dir}/.git")
endwhile()
set(${_variable} "${git_dir}" PARENT_SCOPE)
endfunction()
# Locate the git refspec hash and load the hash
# This code locates the file containing the git refspec/hash
# and loads it. Doing it this way assures that each time the git
# repository changes the revision.hpp file gets out of date.
function(_get_git_hash _data_dir _variable)
# Be pessimistic
set(_variable "" PARENT_SCOPE)
# Load git package if needed
if(NOT GIT_FOUND)
find_package(Git QUIET)
endif()
# And fail if not found
if(NOT GIT_FOUND)
return()
endif()
# Locate the nearest .git file or directory
_get_git_dir(${CMAKE_CURRENT_SOURCE_DIR} GIT_DIR)
# And fail if not found
if("${GIT_DIR}" STREQUAL "")
return()
endif()
# Check if the current source dir is a git submodule or a worktree.
# In both cases .git is a file instead of a directory.
#
if(IS_DIRECTORY ${GIT_DIR})
set(HEAD_SOURCE_FILE "${GIT_DIR}/HEAD")
else()
# The following git command will return a non empty string that
# points to the super project working tree if the current
# source dir is inside a git submodule.
# Otherwise the command will return an empty string.
#
execute_process(
COMMAND "${GIT_EXECUTABLE}" rev-parse
--show-superproject-working-tree
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
OUTPUT_VARIABLE out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT "${out}" STREQUAL "")
# If out is not empty, GIT_DIR/CMAKE_CURRENT_SOURCE_DIR is in a submodule
file(READ ${GIT_DIR} submodule)
string(REGEX REPLACE "gitdir: (.*)$" "\\1" GIT_DIR_RELATIVE
${submodule})
string(STRIP ${GIT_DIR_RELATIVE} GIT_DIR_RELATIVE)
get_filename_component(SUBMODULE_DIR ${GIT_DIR} PATH)
get_filename_component(GIT_DIR ${SUBMODULE_DIR}/${GIT_DIR_RELATIVE}
ABSOLUTE)
set(HEAD_SOURCE_FILE "${GIT_DIR}/HEAD")
else()
# GIT_DIR/CMAKE_CURRENT_SOURCE_DIR is in a worktree
file(READ ${GIT_DIR} worktree_ref)
# The .git directory contains a path to the worktree information directory
# inside the parent git repo of the worktree.
#
string(REGEX REPLACE "gitdir: (.*)$" "\\1" git_worktree_dir
${worktree_ref})
string(STRIP ${git_worktree_dir} git_worktree_dir)
_get_git_dir("${git_worktree_dir}" GIT_DIR)
set(HEAD_SOURCE_FILE "${git_worktree_dir}/HEAD")
endif()
endif()
# Fail if the 'head' file was not found
if(NOT EXISTS "${HEAD_SOURCE_FILE}")
return()
endif()
# Make a copy of the head file
set(HEAD_FILE "${_data_dir}/HEAD")
configure_file("${HEAD_SOURCE_FILE}" "${HEAD_FILE}" COPYONLY)
# Now we create a cmake file that will read the contents of this
# head file in the appropriate way
file(WRITE "${_data_dir}/grab-ref.cmake.in" [[
set(HEAD_HASH)
file(READ "@HEAD_FILE@" HEAD_CONTENTS LIMIT 1024)
string(STRIP "${HEAD_CONTENTS}" HEAD_CONTENTS)
if(HEAD_CONTENTS MATCHES "ref")
# named branch
string(REPLACE "ref: " "" HEAD_REF "${HEAD_CONTENTS}")
if(EXISTS "@GIT_DIR@/${HEAD_REF}")
configure_file("@GIT_DIR@/${HEAD_REF}" "@VERSION_STRING_DATA@/head-ref" COPYONLY)
else()
configure_file("@GIT_DIR@/packed-refs" "@VERSION_STRING_DATA@/packed-refs" COPYONLY)
file(READ "@VERSION_STRING_DATA@/packed-refs" PACKED_REFS)
if(${PACKED_REFS} MATCHES "([0-9a-z]*) ${HEAD_REF}")
set(HEAD_HASH "${CMAKE_MATCH_1}")
endif()
endif()
else()
# detached HEAD
configure_file("@GIT_DIR@/HEAD" "@VERSION_STRING_DATA@/head-ref" COPYONLY)
endif()
if(NOT HEAD_HASH)
file(READ "@VERSION_STRING_DATA@/head-ref" HEAD_HASH LIMIT 1024)
string(STRIP "${HEAD_HASH}" HEAD_HASH)
endif()
]])
configure_file("${VERSION_STRING_DATA}/grab-ref.cmake.in"
"${VERSION_STRING_DATA}/grab-ref.cmake" @ONLY)
# Include the aforementioned file, this will define
# the HEAD_HASH variable we're looking for
include("${VERSION_STRING_DATA}/grab-ref.cmake")
set(${_variable} "${HEAD_HASH}" PARENT_SCOPE)
endfunction()
# Create a revision file, containing the current git version info, if any
function(write_version_header dir)
set(flags )
set(options LIB_NAME FILE_NAME)
set(sources )
cmake_parse_arguments(VERSION_STRING_OPTION "${flags}" "${options}" "${sources}" ${ARGN})
# parameter check
if(NOT IS_DIRECTORY ${dir})
message(FATAL_ERROR "First parameter to write_version_header should be a directory where the final revision.hpp file will be placed")
endif()
if(VERSION_STRING_OPTION_FILE_NAME)
set(file_name "${VERSION_STRING_OPTION_FILE_NAME}")
else()
set(file_name "revision.hpp")
endif()
# Where to store intermediate files
set(VERSION_STRING_DATA "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/VersionString")
if(NOT EXISTS "${VERSION_STRING_DATA}")
file(MAKE_DIRECTORY "${VERSION_STRING_DATA}")
endif()
# Load the git hash using the wizzard-like code above.
_get_git_hash("${VERSION_STRING_DATA}" GIT_HASH)
# If git was found, fetch the git description string
if(GIT_HASH)
execute_process(
COMMAND "${GIT_EXECUTABLE}" describe --dirty --match=build
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
RESULT_VARIABLE res
OUTPUT_VARIABLE out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(res EQUAL 0)
set(REVISION_STRING "${out}")
else()
message(STATUS "Git hash not found, does this project have a 'build' tag?")
endif()
else()
message(STATUS "Git hash not found")
endif()
# Check the revision string, if it matches we fill in the required info
if(REVISION_STRING MATCHES "build-([0-9]+)-g([0-9a-f]+)(-dirty)?")
set(BUILD_NUMBER ${CMAKE_MATCH_1})
if(CMAKE_MATCH_3)
set(REVISION_GIT_TAGREF "${CMAKE_MATCH_2}*")
else()
set(REVISION_GIT_TAGREF "${CMAKE_MATCH_2}")
endif()
string(TIMESTAMP REVISION_DATE_TIME "%Y-%m-%dT%H:%M:%SZ" UTC)
else()
set(REVISION_GIT_TAGREF "")
set(BUILD_NUMBER 0)
set(REVISION_DATE_TIME "")
endif()
if(VERSION_STRING_OPTION_LIB_NAME)
set(VAR_PREFIX "${VERSION_STRING_OPTION_LIB_NAME}")
set(IDENT_PREFIX "${VERSION_STRING_OPTION_LIB_NAME}_")
set(BOOL_IS_MAIN "false")
else()
set(VAR_PREFIX "")
set(IDENT_PREFIX "")
set(BOOL_IS_MAIN "true")
endif()
configure_file("${_current_cmake_module_dir}/revision.hpp.in" "${dir}/${file_name}" @ONLY)
endfunction()

View File

@@ -0,0 +1,13 @@
@PACKAGE_INIT@
include("${CMAKE_CURRENT_LIST_DIR}/cifpp-targets.cmake")
set_and_check(CIFPP_SHARE_DIR "@PACKAGE_CIFPP_DATA_DIR@")
include(CMakeFindDependencyMacro)
find_dependency(Threads)
find_dependency(ZLIB REQUIRED)
find_dependency(SQLite3 REQUIRED)
check_required_components(cifpp)

124
cmake/revision.hpp.in Normal file
View File

@@ -0,0 +1,124 @@
// This file was generated by VersionString.cmake
#pragma once
#include <ostream>
constexpr const char k@VAR_PREFIX@ProjectName[] = "@PROJECT_NAME@";
constexpr const char k@VAR_PREFIX@VersionNumber[] = "@PROJECT_VERSION@";
constexpr int k@VAR_PREFIX@BuildNumber = @BUILD_NUMBER@;
constexpr const char k@VAR_PREFIX@RevisionGitTag[] = "@REVISION_GIT_TAGREF@";
constexpr const char k@VAR_PREFIX@RevisionDate[] = "@REVISION_DATE_TIME@";
#ifndef VERSION_INFO_DEFINED
#define VERSION_INFO_DEFINED 1
namespace version_info_v1_1
{
class version_info_base
{
public:
static void write_version_string(std::ostream &os, bool verbose)
{
auto s_main = registered_main();
if (s_main != nullptr)
s_main->write(os, verbose);
if (verbose)
{
for (auto lib = registered_libraries(); lib != nullptr; lib = lib->m_next)
{
os << "-\n";
lib->write(os, verbose);
}
}
}
protected:
version_info_base(const char *name, const char *version, int build_number,
const char *git_tag, const char *revision_date, bool is_main) noexcept
: m_name(name)
, m_version(version)
, m_build_number(build_number)
, m_git_tag(git_tag)
, m_revision_date(revision_date)
{
if (is_main)
registered_main() = this;
else
{
auto &s_head = registered_libraries();
m_next = s_head;
s_head = this;
}
}
void write(std::ostream &os, bool verbose)
{
os << m_name << " version " << m_version << '\n';
if (verbose)
{
if (m_build_number != 0)
{
os << "build: " << m_build_number << ' ' << m_revision_date << '\n';
if (m_git_tag[0] != 0)
os << "git tag: " << m_git_tag << '\n';
}
}
}
using version_info_ptr = version_info_base *;
static version_info_ptr &registered_main() noexcept
{
static version_info_ptr s_main = nullptr;
return s_main;
}
static version_info_ptr &registered_libraries() noexcept
{
static version_info_ptr s_head = nullptr;
return s_head;
}
const char *m_name;
const char *m_version;
int m_build_number;
const char *m_git_tag;
const char *m_revision_date;
version_info_base *m_next = nullptr;
};
template <typename T>
class version_info : public version_info_base
{
public:
using implementation_type = T;
version_info(const char *name, const char *version, int build_number,
const char *git_tag, const char *revision_date, bool is_main) noexcept
: version_info_base(name, version, build_number, git_tag, revision_date, is_main)
{
}
};
} // namespace version_info_v1_1
inline void write_version_string(std::ostream &os, bool verbose)
{
version_info_v1_1::version_info_base::write_version_string(os, verbose);
}
#endif
const class version_info_@IDENT_PREFIX@impl : public version_info_v1_1::version_info<version_info_@IDENT_PREFIX@impl>
{
public:
version_info_@IDENT_PREFIX@impl() noexcept
: version_info(k@VAR_PREFIX@ProjectName, k@VAR_PREFIX@VersionNumber,
k@VAR_PREFIX@BuildNumber, k@VAR_PREFIX@RevisionGitTag, k@VAR_PREFIX@RevisionDate, @BOOL_IS_MAIN@)
{
}
} s_version_info_@IDENT_PREFIX@instance;

17
cmake/test-charconv.cpp Normal file
View File

@@ -0,0 +1,17 @@
#include <charconv>
#include <cassert>
#include <cstring>
int main()
{
float v;
char s[] = "1.0";
auto r = std::from_chars(s, s + strlen(s), v);
assert(r.ec == std::errc{});
assert(r.ptr = s + strlen(s));
assert(v == 1.0f);
return 0;
}

48
docs/CMakeLists.txt Normal file
View File

@@ -0,0 +1,48 @@
find_package(Doxygen REQUIRED)
find_package(Sphinx REQUIRED)
# Find all the public headers
# get_target_property(CIFPP_PUBLIC_HEADER_DIR libCIFPP INTERFACE_INCLUDE_DIRECTORIES)
set(CIFPP_PUBLIC_HEADER_DIR ${PROJECT_SOURCE_DIR}/include)
file(GLOB_RECURSE CIFPP_PUBLIC_HEADERS ${CIFPP_PUBLIC_HEADER_DIR}/*.hpp)
set(DOXYGEN_INPUT_DIR ${CIFPP_PUBLIC_HEADER_DIR})
set(DOXYGEN_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/xml)
set(DOXYGEN_INDEX_FILE ${DOXYGEN_OUTPUT_DIR}/index.xml)
set(DOXYFILE_IN ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in)
set(DOXYFILE_OUT ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile)
# Replace variables inside @@ with the current values
configure_file(${DOXYFILE_IN} ${DOXYFILE_OUT} @ONLY)
add_custom_command(
OUTPUT ${DOXYGEN_OUTPUT_DIR}
COMMAND ${CMAKE_COMMAND} -E make_directory ${DOXYGEN_OUTPUT_DIR})
add_custom_command(OUTPUT ${DOXYGEN_INDEX_FILE}
BYPRODUCTS ${DOXYGEN_OUTPUT_DIR}
DEPENDS ${DOXYGEN_OUTPUT_DIR} ${CIFPP_PUBLIC_HEADERS} ${DOXYFILE_OUT}
COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYFILE_OUT}
MAIN_DEPENDENCY ${DOXYFILE_OUT} ${DOXYFILE_IN}
COMMENT "Generating docs")
add_custom_target("Doxygen-${PROJECT_NAME}" ALL DEPENDS ${DOXYGEN_INDEX_FILE})
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in ${CMAKE_CURRENT_SOURCE_DIR}/conf.py @ONLY)
set(SPHINX_SOURCE ${CMAKE_CURRENT_SOURCE_DIR})
set(SPHINX_BUILD ${CMAKE_CURRENT_BINARY_DIR}/sphinx)
add_custom_target("Sphinx-${PROJECT_NAME}" ALL
COMMAND ${SPHINX_EXECUTABLE} -b html
-Dbreathe_projects.${PROJECT_NAME}=${DOXYGEN_OUTPUT_DIR}
${SPHINX_SOURCE} ${SPHINX_BUILD}
DEPENDS ${DOXYGEN_INDEX_FILE}
BYPRODUCTS ${CMAKE_CURRENT_SOURCE_DIR}/api
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
COMMENT "Generating documentation with Sphinx")
install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/sphinx/
DESTINATION ${CMAKE_INSTALL_DOCDIR}
PATTERN .doctrees EXCLUDE
PATTERN .buildinfo EXCLUDE)

10
docs/Doxyfile.in Normal file
View File

@@ -0,0 +1,10 @@
EXCLUDE_SYMBOLS = cif::detail::*, std*
FILE_PATTERNS = *.hpp
STRIP_FROM_PATH = @DOXYGEN_INPUT_DIR@
RECURSIVE = YES
GENERATE_XML = YES
GENERATE_LATEX = NO
PREDEFINED += and=&& or=|| not=! CIFPP_EXPORT= HAVE_LIBCLIPPER=1
GENERATE_HTML = NO
GENERATE_TODOLIST = NO
INPUT = @DOXYGEN_INPUT_DIR@

4
docs/_static/.gitignore vendored Normal file
View File

@@ -0,0 +1,4 @@
# Ignore everything in this directory
*
# Except this file
!.gitignore

400
docs/basics.rst Normal file
View File

@@ -0,0 +1,400 @@
Basic usage
===========
This library, *libcifpp*, is a generic *CIF* library with some specific additions to work with *mmCIF* files. The main focus of this library is to make sure that files read or written are valid. That is, they are syntactically valid *and* their content is valid with respect to a CIF dictionary, if such a dictionary is available and specified.
Reading a file is as simple as:
.. code-block:: cpp
#include <cif++.hpp>
cif::file f("/path/to/file.cif");
The file may also be compressed using *gzip* which is detected automatically.
Writing out the file again is also simple, to write out the terminal you can do:
.. code-block:: cpp
std::cout << f;
// or
f.save(std::cout);
// or write a compressed file using gzip compression:
f.save("/tmp/f.cif.gz");
CIF files contain one or more datablocks. To print out the names of all datablocks in our file:
.. code-block:: cpp
for (auto &db : f)
std::cout << db.name() << '\n';
Most often *libcifpp* is used to read in structure files in mmCIF format. These files only contain one datablock and so you can safely use code like this:
.. code-block:: cpp
// get a reference to the first datablock in f
auto &db = f.front();
But if you know the name of the datablock, this also works:
.. code-block:: cpp
// get a reference to the datablock name '1CBS'
auto &db = f["1CBS"];
Now, each datablock contains categories. To print out all their names:
.. code-block:: cpp
for (auto &cat : db)
std::cout << cat.name() << '\n';
But you probably know what category you need to use, so lets fetch it by name:
.. _atom_site-label:
.. code-block:: cpp
// get a reference to the atom_site category in db
auto &atom_site = db["atom_site"];
// and make sure there's some data in it:
assert(not atom_site.empty());
.. note::
Note that we omit the leading underscore in the name of the category here.
Categories contain rows of data and each row has fields or items. Referencing a row in a category results in a :cpp:class:`cif::row_handle` object which you can use to request or manipulate item data.
.. code-block:: cpp
// Get the first row in atom_site
auto rh = atom_site.front();
// Get the label_atom_id value from this row handle as a std::string
std::string atom_id = rh["label_atom_id"].as<std::string>();
// Get the x, y and z coordinates using structered binding
const auto &[x, y, z] = rh.get<float,float,float>("Cartn_x", "Cartn_y", "Cartn_z");
// Assign a new value to the x coordinate or our atom
rh["Cartn_x"] = x + 1;
Querying
--------
Walking over the rows in a category is often not very useful. More often you are interested in specific rows in a category. The function :cpp:func:`cif::category::find` and friends are here to help.
What these functions have in common is that they return data based on a query implemented by :cpp:class:`cif::condition`. These condition objects are built in code using regular C++ syntax. The most basic example of a query is:
.. code-block:: cpp
cif::condition c = cif::key("id") == 1;
Here the condition is that all rows returned should have a value of 1 in there item named *id*. Likewise you can use other data types and even combine those. Oh, and I said we use regular C++ syntax for conditions, so you may as well use other operators to compare values:
.. code-block:: cpp
// condition for C-alpha atoms having an occupancy less than 1.0
cif::condition c = cif::key("occupancy") < 1.0f and cif::key("label_atom_id") == "CA";
Using the namespace *cif::literals* that code becomes a little less verbose:
.. code-block:: cpp
using namespace cif::literals;
cif::condition c = "occupancy"_key < 1.0f and "label_atom_id"_key == "CA";
Conditions can also be combined:
.. code-block:: cpp
cif::condition c = "occupancy"_key < 1.0f and "label_atom_id"_key == "CA";
// extend the condition by requiring the compound ID to be unequal to PRO
c = std::move(c) and "label_comp_id"_key != "PRO";
.. note::
Note the use of std::move here.
Using queries constructed in this way is simple:
.. code-block:: cpp
cif::condition c = ...
auto result = atom_site.find(std::move(c));
// or construct a condition inline:
auto result = atom_site.find("label_atom_id"_key == "CA");
In the example above the result is a range of :cpp:class:`cif::row_handle` objects. Often, using individual field values is more useful:
.. code-block:: cpp
// Requesting a single item:
for (auto id : atom_site.find<std::string>("label_atom_id"_key == "CA", "id"))
std::cout << "ID for CA: " << id << '\n';
// Requesting multiple items:
for (const auto &[id, x, y, z] : atom_site.find<std::string,float,float,float>("label_atom_id"_key == "CA",
"id", "Cartn_x", "Cartn_y", "Cartn_z"))
{
std::cout << "Atom " << id << " is at [" << x << ", " << y << ", " z << "]\n";
}
Returning a complete set if often not required, if you only want to have the first you can use :cpp:func:`cif::category::find_first` as shown here:
.. code-block:: cpp
// return the ID item for the first C-alpha atom
std::string v1 = atom_site.find_first<std::string>("label_atom_id"_key == "CA", "id");
// If you're not sure the row exists, use std::optional
auto v2 = atom_site.find_first<std::optional<std::string>>("label_atom_id"_key == "CA", "id");
if (v2.has_value())
...
There are cases when you really need exactly one result. The :cpp:func:`cif::category::find1` can be used in that case, it will throw an exception if the query does not result in exactly one row.
NULL and ANY
------------
Sometimes items may be empty. The trouble is a bit that empty comes in two flavors: unknown and null. Null in *CIF* parlance means the item should not contain a value since it makes no sense in this case, the value stored in the file is a single dot character: ``'.'``. E.g. *atom_site* records may have a NULL value for label_seq_id for atoms that are part of a *non-polymer*.
The other empty value is indicated by a question mark character: ``'?'``. This means the value is simply unknown.
Both these are NULL in *libcifpp* conditions and can be searched for using :cpp:var:`cif::null`.
So you can search for:
.. code-block:: cpp
cif::condition c = "label_seq_id"_key == cif::null;
You might also want to look for a certain value and don't care in which item it is stored, in that case you can use :cpp:var:`cif::any`.
.. code-block:: cpp
cif::condition c = cif::any == "foo";
And in linked record you might have the items that have a value in both parent and child or both should be NULL. For that, you can request the value to return by find to be of type std::optional and then use that value to build the query. An example to explain this, let's find the location of the atom that is referenced as the first atom in a struct_conn record:
.. code-block:: cpp
// Take references to the two categories we need
auto struct_conn = db["struct_conn"];
auto atom_site = db["atom_site"];
// Loop over all rows in struct_conn taking only the values we need
// Note that the label_seq_id is returned as a std::optional<int>
// That means it may contain an integer or may be empty
for (const auto &[asym1, seqid1, authseqid1, atomid1] :
struct_conn.rows<std::string,std::optional<int>,std::string,std::string,std::string>(
"ptnr1_label_asym_id", "ptnr1_label_seq_id", "ptnr1_auth_seq_id", "ptnr1_label_atom_id"
))
{
// Find the location of the first atom
cif::point p1 = atom_site.find1<float,float,float>(
"label_asym_id"_key == asym1 and "label_seq_id"_key == seqid1 and "auth_seq_id"_key == authseqid1 and "label_atom_id"_key == atomid1,
"cartn_x", "cartn_y", "cartn_z");
}
Validation
----------
CIF files can have a dictionary attached. And based on such a dictionary a :cpp:class:`cif::validator` object can be constructed which in turn can be used to validate the content of the file.
A simple case:
.. code-block:: cpp
#include <cif++.hpp>
cif::file f("1cbs.cif.gz");
f.load_dictionary("mmcif_pdbx.dic");
if (not f.is_valid())
std::cout << "This file is not valid\n";
If you want to know why it is not valid, you should set the global variable :cpp:var:`cif::VERBOSE` to something higer than zero. Depending on the value more or less diagnostic output is sent to std::cerr.
In the case above we load a dictionary based on its name. You can of course also load dictionaries based on a specific file, that's a bit more work:
.. code-block:: cpp
std::filesystem::ifstream dictFile("/tmp/my-dictionary.dic");
auto &validator = cif::parse_dictionary("my-dictionary", dictFile);
cif::file f("1cbs.cif.gz");
// assign the validator
f.set_validator(&validator);
// alternatively, load it by name
f.load_dictionary("my-dictionary");
if (not f.is_valid())
std::cout << "This file is not valid\n";
Creating your own dictionary is a lot of work, especially if you are only extending an existing dictionary with a couple of new categories or items. So, what you can do is extend a loaded validator like this (code taken from DSSP):
.. code-block:: cpp
// db is a cif::datablock reference containing an mmCIF file with DSSP annotations
auto &validator = const_cast<cif::validator &>(*db.get_validator());
if (validator.get_validator_for_category("dssp_struct_summary") == nullptr)
{
auto dssp_extension = cif::load_resource("dssp-extension.dic");
if (dssp_extension)
cif::extend_dictionary(validator, *dssp_extension);
}
.. note::
In the example above we're loading the data using :doc:`/resources`. See the documentation on that for more information.
If a validator has been assigned to a file, assignments to items are checked for valid data. So the following code will throw an exception (see: :ref:`_atom_site-label`):
.. code-block:: cpp
auto rh = atom_site.front();
rh["Cartn_x"] = "foo";
Linking
-------
Based on information recorded in dictionary files (see :ref:`Validation`) you can locate linked records in parent or child categories.
To make this example not too complex, lets assume the following example file:
.. code-block:: cif
data_test
loop_
_cat_1.id
_cat_1.name
_cat_1.desc
1 aap Aap
2 noot Noot
3 mies Mies
loop_
_cat_2.id
_cat_2.name
_cat_2.num
_cat_2.desc
1 aap 1 'Een dier'
2 aap 2 'Een andere aap'
3 noot 1 'walnoot bijvoorbeeld'
And we have a dictionary containing the following link definition:
.. code-block:: cif
loop_
_pdbx_item_linked_group_list.parent_category_id
_pdbx_item_linked_group_list.link_group_id
_pdbx_item_linked_group_list.parent_name
_pdbx_item_linked_group_list.child_name
_pdbx_item_linked_group_list.child_category_id
cat_1 1 '_cat_1.name' '_cat_2.name' cat_2
So, there are links between *cat_1* and *cat_2* based on the value in items named *name*. Using this information, we can now locate children and parents:
.. code-block:: cpp
// Assuming the file was loaded in f:
auto &cat1 = f.front()["cat_1"];
auto &cat2 = f.front()["cat_2"];
auto &cat3 = f.front()["cat_3"];
// Loop over all ape's in cat2
for (auto r : cat1.get_children(cat1.find1("name"_key == "aap"), cat2))
std::cout << r.get<std::string>("desc") << '\n';
Updating a value in an item in a parent category will update the corresponding value in all related children:
.. code-block:: cpp
auto r1 = cat1.find1("id"_key == 1);
r1["name"] = "aapje";
auto rs1 = cat2.find("name"_key == "aapje");
assert(rs1.size() == 2);
However, changing a value in a child record will not update the parent. This may result in an invalid file since you may then have a child that has no parent:
.. code-block:: cpp
auto r2 = cat2.find1("id"_key == 3);
r2["name"] = "wim";
assert(f.is_valid() == false);
So you have to fix this yourself by inserting a new item in cat1 with the new value.
.. _splitting-rows:
Another situation is when you change a value in a parent and updating children might introduce a situation where you need to split a child. To give an example, consider this:
.. code-block:: cif
data_test
loop_
_cat_1.id
_cat_1.name
_cat_1.desc
1 aap Aap
2 noot Noot
3 mies Mies
loop_
_cat_2.id
_cat_2.name
_cat_2.num
_cat_2.desc
1 aap 1 'Een dier'
2 aap 2 'Een andere aap'
3 noot 1 'walnoot bijvoorbeeld'
loop_
_cat_3.id
_cat_3.name
_cat_3.num
1 aap 1
2 aap 2
And we have a dictionary containing the following link definition (reversed compared to the previous example):
.. code-block:: cif
loop_
_pdbx_item_linked_group_list.parent_category_id
_pdbx_item_linked_group_list.link_group_id
_pdbx_item_linked_group_list.parent_name
_pdbx_item_linked_group_list.child_name
_pdbx_item_linked_group_list.child_category_id
cat_2 1 '_cat_2.name' '_cat_1.name' cat_1
cat_3 1 '_cat_3.name' '_cat_2.name' cat_2
cat_3 1 '_cat_3.num' '_cat_2.num' cat_2
So *cat3* is a parent of *cat2* and *cat2* is a parent of *cat1*. Now, if you change the *name* value of the first row of *cat3* to 'aapje', the corresponding row in *cat2* is updated as well. But when you update *cat2* you have to update *cat1* too. And simply changing the name field in row 1 of *cat1* is wrong. The default behaviour in libcifpp is to split the record in *cat1* and have a new child with the new name whereas the other remains as is.
The new *cat1* will thus be like:
.. code-block:: cif
loop_
_cat_1.id
_cat_1.name
_cat_1.desc
1 aapje Aap
2 noot Noot
3 mies Mies
5 aap Aap

49
docs/bitsandpieces.rst Normal file
View File

@@ -0,0 +1,49 @@
Bits & Pieces
=============
The *libcifpp* library offers some extra code that makes the life of developers a bit easier.
gzio
----
To work with compressed data files a *std::streambuf* implemenation was added based on the code in `gxrio <https://github.com/mhekkel/gxrio>`_. This allows you to read and write compressed data streams transparently.
When working with files you can use :cpp:class:`cif::gzio::ifstream` and :cpp:class:`cif::gzio::ofstream`. The selection of whether to use compression or not is based on the file extension. If it is ``.gz`` gzip compression is used:
.. code-block:: cpp
cif::gzio::ifstream file("my-file.txt.gz");
std::string line;
while (std::getline(file, line))
std::cout << line << '\n';
Writing is equally easy:
.. code-block:: cpp
cif::gzio::ofstream file("/tmp/output.txt.gz");
file << "Hello, world!";
file.close();
You can also use the :cpp:class:`cif::gzio::istream` and feed it a *std::streambuf* object that may or may not contain compressed data. In that case the first bytes of the input are sniffed and if it is gzip compressed data, decompression will be done.
A progress bar
--------------
Applications based on *libcifpp* may have a longer run time. To give some feedback to the user running your application in a terminal you can use the :cpp:class:`cif::progress_bar`. This class will display an ASCII progress bar along with optional status messages, but only if output is to a real TTY (terminal).
A progress bar is also shown only if the duration is more than two seconds. To avoid having flashing progress bars for short actions.
The progress bar uses an internal progress counter that starts at zero and ends when the max value has been reached after which it will be removed from the screen. Updating this internal progress counter can be done by adding a number of steps calling :cpp:func:`cif::progress_bar::consumed` or by setting the exact value for the counter by calling :cpp:func:`cif::progress_bar::progress`.
Colouring output
----------------
It is also nice to emphasise some output in the terminal by using colours. For this you can create output manipulators using :cpp:func:`cif::coloured`. To write a string in white, and bold letters on a red background you can do:
.. code-block:: cpp
using namespace cif::colour;
std::cout << cif::coloured("Hello, world!", white, red, bold) << '\n';

33
docs/compound.rst Normal file
View File

@@ -0,0 +1,33 @@
Chemical Compounds
==================
The data in *CIF* and *mmCIF* files often describes the structure of some chemical compounds. The structure is recorded in the categories *atom_site* and friends. Records in these categories refer to chemical compounds using a compound ID. This compound ID is the ID field of the *chem_comp* category. For all of the known compounds in the PDB there is an entry in the Chemical Compounds Dictionary or `CCD <https://www.wwpdb.org/data/ccd>`_. If *libcifpp* was properly installed you have a copy of this file somewhere on your disk. And if you have installed the update scripts, a fresh version of this file will be retrieved weekly.
As an alternative to CCD there are the monomer library files from `CCP4 <https://www.ccp4.ac.uk/>`_. These contain somewhat different data but the overlap is good enough for usage in *libcifpp*.
Information about compounds is captured in the :cpp:class:`cif::compound`. An instance of a compound object for a certain compound ID can be obtained by using the singleton :cpp:class:`cif::compound_factory`.
If the compound you want to use is not available in the CCD or in CCP4, you can add that information yourself. For this you can use the method :cpp:func:`cif::compound_factory::push_dictionary`.
So, given that we have CCD, CCP4 monomer library and used defined compound definitions, what will you get when you try to retrieve such a compound by ID? The answer is, the factory has a stack of compound generators. The first thrown on the stack is the one for a CCD file (*components.cif*) if it can be found. Then, if the *CLIBD_MON* environmental variable is defined, a generator for monomer library files is added to the stack. And then all generators for files you added using *push_dictionary* are added in order. The generators are searched in the reverse order in which they were added to see if it creates a compound object for the ID. If no compound was created at all, nullptr is returned.
Updating CCD
------------
The CCD data is stored in a single file called *components.cif* and can be downloaded from `CCD <https://www.wwpdb.org/data/ccd>`_.
As can be read in the section on resources (:doc:`/resources`) files in libcifpp are loaded in a specific order. If the CCD datafile was downloaded during installation, a copy can be found in the directory */usr/share/libcifpp/* (if you installed in */usr*). This is a static file and will not be updated until the next installation of libcifpp.
When configuring libcifpp, you can specify the *CIFPP_INSTALL_UPDATE_SCRIPT* option, as in:
.. code-block:: console
cmake -S . -B build -DCIFPP_INSTALL_UPDATE_SCRIPT=ON # ... more options?
This will install a script named *update-libcifpp-data* in */etc/cron.weekly* or */etc/periodic/weekly*. This file uses a config file named */etc/libcifpp.conf* which you then need to edit. In this config file the following line needs to be uncommented:
.. code-block:: console
# update=true
After that, the update script will weekly download the latest components.cif file to */var/cache/libcifpp*.

66
docs/conf.py.in Normal file
View File

@@ -0,0 +1,66 @@
project = '@PROJECT_NAME@'
copyright = '2023, Maarten L. Hekkelman'
author = 'Maarten L. Hekkelman'
release = '@PROJECT_VERSION@'
# -- General configuration ---------------------------------------------------
extensions = [
"breathe",
"exhale",
"myst_parser"
]
breathe_projects = {
"@PROJECT_NAME@": "../build/docs/xml"
}
myst_enable_extensions = [ "colon_fence" ]
breathe_default_project = "@PROJECT_NAME@"
# Setup the exhale extension
exhale_args = {
# These arguments are required
"containmentFolder": "./api",
"rootFileName": "library_root.rst",
"doxygenStripFromPath": "../include/",
# Heavily encouraged optional argument (see docs)
"rootFileTitle": "API Reference",
# Suggested optional arguments
# "createTreeView": True,
# TIP: if using the sphinx-bootstrap-theme, you need
# "treeViewIsBootstrap": True,
"exhaleExecutesDoxygen": False,
"contentsDirectives" : False,
"verboseBuild": False
}
# Tell sphinx what the primary language being documented is.
primary_domain = 'cpp'
# Tell sphinx what the pygments highlight language should be.
highlight_language = 'cpp'
templates_path = ['_templates']
exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# -- Options for HTML output -------------------------------------------------
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,
# so a file named "default.css" will overwrite the builtin "default.css".
html_static_path = ['_static']
html_theme_options = {
}
cpp_index_common_prefix = [
'cif::'
]

2
docs/genindex.rst Normal file
View File

@@ -0,0 +1,2 @@
Index
=====

46
docs/index.rst Normal file
View File

@@ -0,0 +1,46 @@
Introduction
============
Information on 3D structures of proteins originally came formatted in `PDB <http://www.wwpdb.org/documentation/file-format-content/format33/v3.3.html>`_ files. Although the specification for this format had some real restrictions like a mandatory HEADER and CRYST line, many programs implemented this very poorly often writing out only ATOM records. And users became used to this.
The legacy PDB format has some severe limitations rendering it useless for all but very small protein structures. A new format called `mmCIF <https://mmcif.wwpdb.org/>`_ has been around for decades and now is the default format for the Protein Data Bank.
The software developed in the `PDB-REDO <https://pdb-redo.eu/>`_ project aims at improving 3D models based on original experimental data. For this, the tools need to be able to work with both legacy PDB and mmCIF files. A decision was made to make mmCIF leading internally in all programs and convert legacy PDB directly into mmCIF before processing the data. A robust conversion had to be developed to make this possible since, as noted above, files can come with more or less information making it sometimes needed to do a sequence alignment to find out the exact residue numbers.
And so libcif++ came to life, a library to work with mmCIF files. Work on this library started early 2017 and has developed quite a bit since then. To reduce dependency on other libraries, some functionality was added that is not strictly related to reading and writing mmCIF files but may be useful nonetheless. This is mostly code that is used in 3D calculations and symmetry operations.
Design
------
The main part of the library is a set of classes that work with mmCIF files. They are:
* :cpp:class:`cif::file`
* :cpp:class:`cif::datablock`
* :cpp:class:`cif::category`
The :cpp:class:`cif::file` class encapsulates the contents of a mmCIF file. In such a file there are one or more :cpp:class:`cif::datablock` objects and each datablock contains one or more :cpp:class:`cif::category` objects.
Synopsis
--------
Using *libcifpp* is easy, if you are familiar with modern C++:
.. literalinclude:: ../README.md
:language: c++
:start-after: ```cpp
:end-before: ```
.. toctree::
:maxdepth: 2
:caption: Contents
self
basics.rst
compound.rst
model.rst
resources.rst
symmetry.rst
bitsandpieces.rst
api/library_root.rst
genindex.rst

36
docs/model.rst Normal file
View File

@@ -0,0 +1,36 @@
Molecular Model
===============
Theoretically it is possible to get along with only the classes *cif::file*, *cif::datablock* and *cif::category*. But to keep your data complete and valid you then have to update lots of categories for all but the simplest manipulations. For this *libcifpp* comes with a higher level API modelling atoms, residues, monomers, polymers and complete structures in their respective classes.
Note that these classes only work properly if you are using *mmCIF* files and have an mmcif_pdbx dictionary available, either compiled in using `mrc <https://github.com/mhekkel/mrc.git>`_ or installed in the proper location.
.. note::
This part of *libcifpp* is the least developed part. What is available should work but functionality should eventually be extended.
Atom
----
The :cpp:class:`cif::mm::atom` is a lightweight proxy class giving access to the data stored in *atom_site* and *atom_site_anisotrop*. It only caches the most often used item data and every modification is directly written back into the *mmCIF* categories.
Atoms can be copied by value with low cost. The atom class only contains a pointer to an implementation that is reference counted.
Residue, Monomer and Polymer
----------------------------
The :cpp:class:`cif::mm::residue`, :cpp:class:`cif::mm::monomer` and :cpp:class:`cif::mm::polymer` implement what you'd expect. A monomer is a residue that is part of a polymer and thus has a sequence number and siblings.
Sugars & Branches
-----------------
There are also classes for modelling sugars and sugar branches. You can create sugar branches
Structure
---------
The :cpp:class:`cif::mm::structure` can be used to load one of the models from an *mmCIF* file. By default the first model is loaded. (Multiple models are often only available files containing structures defined using NMR).
A structure holds a reference to a *cif::datablock* and retrieves its data from this datablock and writes any modification back into that datablock.
One of the most useful parts of the structure class is the ability to create and modify residues. This updates related *chem_comp* and *entity* categories as well.

5
docs/requirements.in Normal file
View File

@@ -0,0 +1,5 @@
sphinx<5
exhale==0.3.6
myst-parser
breathe
sphinx_rtd_theme==1.3.0

93
docs/requirements.txt Normal file
View File

@@ -0,0 +1,93 @@
#
# This file is autogenerated by pip-compile with Python 3.10
# by the following command:
#
# pip-compile --output-file=requirements.txt requirements.in
#
alabaster==0.7.13
# via sphinx
babel==2.12.1
# via sphinx
beautifulsoup4==4.12.2
# via exhale
breathe==4.35.0
# via
# -r requirements.in
# exhale
certifi==2023.7.22
# via requests
charset-normalizer==3.2.0
# via requests
docutils==0.17.1
# via
# breathe
# exhale
# myst-parser
# sphinx
# sphinx-rtd-theme
exhale==0.3.6
# via -r requirements.in
idna==3.4
# via requests
imagesize==1.4.1
# via sphinx
jinja2==3.1.2
# via
# myst-parser
# sphinx
lxml==4.9.3
# via exhale
markdown-it-py==2.2.0
# via
# mdit-py-plugins
# myst-parser
markupsafe==2.1.3
# via jinja2
mdit-py-plugins==0.3.5
# via myst-parser
mdurl==0.1.2
# via markdown-it-py
myst-parser==0.18.1
# via -r requirements.in
packaging==23.1
# via sphinx
pygments==2.16.1
# via sphinx
pyyaml==6.0.1
# via myst-parser
requests==2.31.0
# via sphinx
six==1.16.0
# via exhale
snowballstemmer==2.2.0
# via sphinx
soupsieve==2.4.1
# via beautifulsoup4
sphinx==4.5.0
# via
# -r requirements.in
# breathe
# exhale
# myst-parser
# sphinx-rtd-theme
# sphinxcontrib-jquery
sphinx-rtd-theme==1.3.0
# via -r requirements.in
sphinxcontrib-applehelp==1.0.4
# via sphinx
sphinxcontrib-devhelp==1.0.2
# via sphinx
sphinxcontrib-htmlhelp==2.0.1
# via sphinx
sphinxcontrib-jquery==4.1
# via sphinx-rtd-theme
sphinxcontrib-jsmath==1.0.1
# via sphinx
sphinxcontrib-qthelp==1.0.3
# via sphinx
sphinxcontrib-serializinghtml==1.1.5
# via sphinx
typing-extensions==4.7.1
# via myst-parser
urllib3==2.0.4
# via requests

47
docs/resources.rst Normal file
View File

@@ -0,0 +1,47 @@
Resources
=========
Programs using libcifpp often need access to common data files. E.g. CIF dictionary files, CCP4 monomer restraints files or the CCD data file. In libcifpp these files are called resources. These files are often also based on external sources that are updated on a regular basis.
Resources can be compiled into the executable so that the resulting
application can be made portable to other machines. For this you
need to use `mrc <https://github.com/mhekkel/mrc.git>`_ which only works
on Un*x like systems using the ELF executable format or on MS Windows
But resources may also be located as files on the filesytem at
specific locations. And you can specify your own location for
files (a directory) or even override named resources with your
own data.
Loading Resources
-----------------
No matter where the resource is located, you should always use the single libcifpp API call :cpp:func:`cif::load_resource` to load them. This function returns a *std::istream* wrapped inside a *std::unique_ptr*.
The order in which resources are searched for is:
* Use the resource that was defined by calling :cpp:func:`cif::add_file_resource`
for this name.
* Search the paths specified by :cpp:func:`cif::add_data_directory`, last one
added is searched first
* Search the so-called *CACHE_DIR*. This location is defined
at compile time and based on the installation directory of
libcifpp. Usually it is */var/cache/libcifpp*.
It is in this directory where the cron job for libcifpp will
put the updated files weekly.
* If the *CCP4* environment is available, the
*$ENV{CCP4}/share/libcifpp* is searched.
* If the environment variable *LIBCIFPP_DATA_DIR* is set it
is searched
* The *DATA_DIR* is searched, this is also a variable defined
at compile time, also based on the installation directory
of libcifpp. It usually is */usr/share/libcifpp*
* As a last resort an attempt is made to load the data from
resources compiled by `mrc <https://github.com/mhekkel/mrc.git>`_.

108
docs/symmetry.rst Normal file
View File

@@ -0,0 +1,108 @@
Symmetry & Geometry
===================
Although not really a core *CIF* functionality, when working with *mmCIF* files you often need to work with symmetry information. And symmetry works on points in a certain space and thus geometry calculations are also something you need often. Former versions of *libcifpp* used to use `clipper <http://www.ysbl.york.ac.uk/~cowtan/clipper/doc/index.html>`_ to do many of these calculations, but that introduces a dependency and besides, the way clipper numbers symmetry operations is not completely compatible with the way this is done in the PDB.
Points
------
The most basic type in use is :cpp:type:`cif::point`. It can be thought of as a point in space with three coordinates, but it is also often used as a vector in 3d space. To keep the interface simple there's no separate vector type.
Many functions are available in :ref:`file_cif++_point.hpp` that work on points. There are functions to calculate the :cpp:func:`cif::distance` between two points and also function to calculate dot products, cross products and dihedral angles between sets of points.
Quaternions
-----------
All operations inside *libcifpp* that perform some kind of rotation use :cpp:type:`cif::quaternion`. The reason to use Quaternions is not only that they are cool, they are faster than multiplying with a matrix and the results also suffer less from numerical instability.
Matrix
------
Although Quaternions are the preferred way of doing rotations, not every manipulation is a rotation and thus we need a matrix class as well. Matrices and their operations are encoded as matrix_expressions in *libcifpp* allowing the compiler to generate very fast code. See the :ref:`file_cif++_matrix.hpp` for what is on offer.
Crystals
--------
The *CIF* and *mmCIF* were initially developed to store crystallographic information on structures. Apart from coordinates and the chemical information the crystallographic information is important. This information can be split into two parts, a unit cell and a set of :ref:`symmetry-ops` making up a spacegroup. The spacegroup number and name are stored in the *symmetry* category. The corresponding symmetry operations can be obtained in *libcifpp* by using the :cpp:class:`cif::spacegroup`. The cell is stored in the category *cell* and likewise can be loaded using the :cpp:class:`cif::cell`. Together these two classes make up a crystal and so we have a :cpp:class:`cif::crystal` which contains both. You can easily create such a crystal object by passing the datablock containing the data to the constructor. As in:
.. code:: cpp
// Load the file
cif::file f("1cbs.cif.gz");
auto &db = f.front();
cif::crystal c(db);
.. _symmetry-ops:
Symmetry operations
-------------------
Each basic symmetry operation in the crystallographic world consists of a matrix multiplication followed by a translation. To apply such an operation on a carthesian coordinate you first have to convert the point into a fractional coordinate with respect to the unit cell of the crystal, then apply the matrix and translation operations and then convert the result back into carthesian coordinates. This is all done by the proper routines in *libcifpp*.
Symmetry operations are encoded as a string in *mmCIF* PDBx files. The format is a string with the rotational number followed by an underscore and then the encoded translation in each direction where 5 means no translation. So, the identity operator is ``1_555`` meaning that we have rotational number 1 (which is always the identity rotation, point multiplied with the identity matrix) and a translation of zero in each direction.
To give an idea how this works, here's a piece of code copied from one of the unit tests in *libcifpp*. It takes the *struct_conn* records in a certain PDB file and checks wether the distances in each row correspond to what we can calculate.
.. code:: cpp
using namespace cif::literals;
// Load the file
cif::file f(gTestDir / "2bi3.cif.gz");
// Simply assume we can use the first datablock
auto &db = f.front();
// Load the crystal information
cif::crystal c(db);
// Take references to the two categories we need
auto struct_conn = db["struct_conn"];
auto atom_site = db["atom_site"];
// Loop over all rows in struct_conn taking only the values we need
for (const auto &[
asym1, seqid1, authseqid1, atomid1, symm1,
asym2, seqid2, authseqid2, atomid2, symm2,
dist] : struct_conn.find<
std::string,std::optional<int>,std::string,std::string,std::string,
std::string,std::optional<int>,std::string,std::string,std::string,
float>(
cif::key("ptnr1_symmetry") != "1_555" or cif::key("ptnr2_symmetry") != "1_555",
"ptnr1_label_asym_id", "ptnr1_label_seq_id", "ptnr1_auth_seq_id", "ptnr1_label_atom_id", "ptnr1_symmetry",
"ptnr2_label_asym_id", "ptnr2_label_seq_id", "ptnr2_auth_seq_id", "ptnr2_label_atom_id", "ptnr2_symmetry",
"pdbx_dist_value"
))
{
// Find the location of the first atom
cif::point p1 = atom_site.find1<float,float,float>(
"label_asym_id"_key == asym1 and "label_seq_id"_key == seqid1 and "auth_seq_id"_key == authseqid1 and "label_atom_id"_key == atomid1,
"cartn_x", "cartn_y", "cartn_z");
// Find the location of the second atom
cif::point p2 = atom_site.find1<float,float,float>(
"label_asym_id"_key == asym2 and "label_seq_id"_key == seqid2 and "auth_seq_id"_key == authseqid2 and "label_atom_id"_key == atomid2,
"cartn_x", "cartn_y", "cartn_z");
// Calculate the position of the first atom using the symmetry operator defined in struct_conn
auto sa1 = c.symmetry_copy(p1, cif::sym_op(symm1));
// Calculate the position of the second atom using the symmetry operator defined in struct_conn
auto sa2 = c.symmetry_copy(p2, cif::sym_op(symm2));
// The distance between these symmetry atoms should be equal to the distance in the struct_conn record
assert(cif::distance(sa1, sa2) == dist);
// And to show how you can obtain the closest symmetry copy of an atom near another one:
// here we request the symmetry copy of p2 that lies closest to p1
const auto &[d, p, so] = c.closest_symmetry_copy(p1, p2);
// And that should of course be equal to the location in struct_conn for p2
assert(p.m_x == sa2.m_x);
assert(p.m_y == sa2.m_y);
assert(p.m_z == sa2.m_z);
// Distance and symmetry operator string should also be the same
assert(d == dist);
assert(so.string() == symm2);
}

Binary file not shown.

7
examples/CMakeLists.txt Normal file
View File

@@ -0,0 +1,7 @@
cmake_minimum_required(VERSION 3.15)
project(cifpp_example LANGUAGES CXX)
find_package(cifpp REQUIRED)
add_executable(example example.cpp)
target_link_libraries(example cifpp::cifpp)

View File

@@ -1,30 +1,37 @@
#include <iostream>
#include <filesystem>
#include <iostream>
#include <cif++/Cif++.hpp>
#include <cif++.hpp>
namespace fs = std::filesystem;
int main()
int main(int argc, char *argv[])
{
fs::path in("1cbs.cif.gz");
cif::File file;
file.loadDictionary("mmcif_pdbx_v50");
file.load("1cbs.cif.gz");
auto& db = file.firstDatablock()["atom_site"];
auto n = db.find(cif::Key("label_atom_id") == "OXT").size();
std::cout << "File contains " << db.size() << " atoms of which " << n << (n == 1 ? " is" : " are") << " OXT" << std::endl
<< "residues with an OXT are:" << std::endl;
for (const auto& [asym, comp, seqnr]: db.find<std::string,std::string,int>(
cif::Key("label_atom_id") == "OXT", "label_asym_id", "label_comp_id", "label_seq_id"))
if (argc != 2)
{
std::cout << asym << ' ' << comp << ' ' << seqnr << std::endl;
std::cerr << "Usage: example <inputfile>\n";
exit(1);
}
cif::file file(argv[1]);
if (file.empty())
{
std::cerr << "Empty file\n";
exit(1);
}
auto &db = file.front();
auto &atom_site = db["atom_site"];
auto n = atom_site.find(cif::key("label_atom_id") == "OXT").size();
std::cout << "File contains " << atom_site.size() << " atoms of which " << n << (n == 1 ? " is" : " are") << " OXT\n"
<< "residues with an OXT are:\n";
for (const auto &[asym, comp, seqnr] : atom_site.find<std::string, std::string, int>(
cif::key("label_atom_id") == "OXT", "label_asym_id", "label_comp_id", "label_seq_id"))
{
std::cout << asym << ' ' << comp << ' ' << seqnr << '\n';
}
return 0;

View File

@@ -1,8 +0,0 @@
CXX = c++ -std=c++17
CXXFLAGS = $(shell pkg-config --cflags libcifpp)
LIBS = $(shell pkg-config --libs libcifpp)
all: example
example: example.cpp
$(CXX) -o $@ $? $(CXXFLAGS) $(LIBS)

View File

@@ -1,17 +1,17 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -26,32 +26,28 @@
#pragma once
#include <vector>
#include <string>
#include <tuple>
#include "cif++/Cif++.hpp"
namespace cif
{
extern const int
kResidueNrWildcard,
kNoSeqNum;
struct TLSSelection;
typedef std::unique_ptr<TLSSelection> TLSSelectionPtr;
struct TLSResidue;
struct TLSSelection
{
virtual ~TLSSelection() {}
virtual void CollectResidues(cif::Datablock& db, std::vector<TLSResidue>& residues, std::size_t indentLevel = 0) const = 0;
std::vector<std::tuple<std::string,int,int>> GetRanges(cif::Datablock& db, bool pdbNamespace) const;
};
// Low level: get the selections
TLSSelectionPtr ParseSelectionDetails(const std::string& program, const std::string& selection);
}
// IWYU pragma: begin_exports
#include "cif++/atom_type.hpp"
#include "cif++/category.hpp"
#include "cif++/compound.hpp"
#include "cif++/condition.hpp"
#include "cif++/cql.hpp"
#include "cif++/datablock.hpp"
#include "cif++/dictionary_parser.hpp"
#include "cif++/exports.hpp"
#include "cif++/file.hpp"
#include "cif++/format.hpp"
#include "cif++/gzio.hpp"
#include "cif++/item.hpp"
#include "cif++/iterator.hpp"
#include "cif++/matrix.hpp"
#include "cif++/model.hpp"
#include "cif++/parser.hpp"
#include "cif++/pdb.hpp"
#include "cif++/point.hpp"
#include "cif++/row.hpp"
#include "cif++/symmetry.hpp"
#include "cif++/text.hpp"
#include "cif++/utilities.hpp"
#include "cif++/validate.hpp"
// IWYU pragma: end_exports

View File

@@ -1,245 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// Lib for working with structures as contained in mmCIF and PDB files
#pragma once
#include <cstdint>
#include <string>
#include <stdexcept>
namespace mmcif
{
enum AtomType : uint8_t
{
Nn = 0, // Unknown
H = 1, // Hydro­gen
He = 2, // He­lium
Li = 3, // Lith­ium
Be = 4, // Beryl­lium
B = 5, // Boron
C = 6, // Carbon
N = 7, // Nitro­gen
O = 8, // Oxy­gen
F = 9, // Fluor­ine
Ne = 10, // Neon
Na = 11, // So­dium
Mg = 12, // Magne­sium
Al = 13, // Alumin­ium
Si = 14, // Sili­con
P = 15, // Phos­phorus
S = 16, // Sulfur
Cl = 17, // Chlor­ine
Ar = 18, // Argon
K = 19, // Potas­sium
Ca = 20, // Cal­cium
Sc = 21, // Scan­dium
Ti = 22, // Tita­nium
V = 23, // Vana­dium
Cr = 24, // Chrom­ium
Mn = 25, // Manga­nese
Fe = 26, // Iron
Co = 27, // Cobalt
Ni = 28, // Nickel
Cu = 29, // Copper
Zn = 30, // Zinc
Ga = 31, // Gallium
Ge = 32, // Germa­nium
As = 33, // Arsenic
Se = 34, // Sele­nium
Br = 35, // Bromine
Kr = 36, // Kryp­ton
Rb = 37, // Rubid­ium
Sr = 38, // Stront­ium
Y = 39, // Yttrium
Zr = 40, // Zirco­nium
Nb = 41, // Nio­bium
Mo = 42, // Molyb­denum
Tc = 43, // Tech­netium
Ru = 44, // Ruthe­nium
Rh = 45, // Rho­dium
Pd = 46, // Pallad­ium
Ag = 47, // Silver
Cd = 48, // Cad­mium
In = 49, // Indium
Sn = 50, // Tin
Sb = 51, // Anti­mony
Te = 52, // Tellurium
I = 53, // Iodine
Xe = 54, // Xenon
Cs = 55, // Cae­sium
Ba = 56, // Ba­rium
La = 57, // Lan­thanum
Hf = 72, // Haf­nium
Ta = 73, // Tanta­lum
W = 74, // Tung­sten
Re = 75, // Rhe­nium
Os = 76, // Os­mium
Ir = 77, // Iridium
Pt = 78, // Plat­inum
Au = 79, // Gold
Hg = 80, // Mer­cury
Tl = 81, // Thallium
Pb = 82, // Lead
Bi = 83, // Bis­muth
Po = 84, // Polo­nium
At = 85, // Asta­tine
Rn = 86, // Radon
Fr = 87, // Fran­cium
Ra = 88, // Ra­dium
Ac = 89, // Actin­ium
Rf = 104, // Ruther­fordium
Db = 105, // Dub­nium
Sg = 106, // Sea­borgium
Bh = 107, // Bohr­ium
Hs = 108, // Has­sium
Mt = 109, // Meit­nerium
Ds = 110, // Darm­stadtium
Rg = 111, // Roent­genium
Cn = 112, // Coper­nicium
Nh = 113, // Nihon­ium
Fl = 114, // Flerov­ium
Mc = 115, // Moscov­ium
Lv = 116, // Liver­morium
Ts = 117, // Tenness­ine
Og = 118, // Oga­nesson
Ce = 58, // Cerium
Pr = 59, // Praseo­dymium
Nd = 60, // Neo­dymium
Pm = 61, // Prome­thium
Sm = 62, // Sama­rium
Eu = 63, // Europ­ium
Gd = 64, // Gadolin­ium
Tb = 65, // Ter­bium
Dy = 66, // Dyspro­sium
Ho = 67, // Hol­mium
Er = 68, // Erbium
Tm = 69, // Thulium
Yb = 70, // Ytter­bium
Lu = 71, // Lute­tium
Th = 90, // Thor­ium
Pa = 91, // Protac­tinium
U = 92, // Ura­nium
Np = 93, // Neptu­nium
Pu = 94, // Pluto­nium
Am = 95, // Ameri­cium
Cm = 96, // Curium
Bk = 97, // Berkel­ium
Cf = 98, // Califor­nium
Es = 99, // Einstei­nium
Fm = 100, // Fer­mium
Md = 101, // Mende­levium
No = 102, // Nobel­ium
Lr = 103, // Lawren­cium
D = 129, // Deuterium
};
// --------------------------------------------------------------------
// AtomTypeInfo
enum RadiusType {
eRadiusCalculated,
eRadiusEmpirical,
eRadiusCovalentEmpirical,
eRadiusSingleBond,
eRadiusDoubleBond,
eRadiusTripleBond,
eRadiusVanderWaals,
eRadiusTypeCount
};
struct AtomTypeInfo
{
AtomType type;
std::string name;
std::string symbol;
float weight;
bool metal;
float radii[eRadiusTypeCount];
};
extern const AtomTypeInfo kKnownAtoms[];
// --------------------------------------------------------------------
// AtomTypeTraits
class AtomTypeTraits
{
public:
AtomTypeTraits(AtomType a);
AtomTypeTraits(const std::string& symbol);
AtomType type() const { return mInfo->type; }
std::string name() const { return mInfo->name; }
std::string symbol() const { return mInfo->symbol; }
float weight() const { return mInfo->weight; }
bool isMetal() const { return mInfo->metal; }
static bool isElement(const std::string& symbol);
static bool isMetal(const std::string& symbol);
float radius(RadiusType type = eRadiusSingleBond) const
{
if (type >= eRadiusTypeCount)
throw std::invalid_argument("invalid radius requested");
return mInfo->radii[type] / 100.f;
}
// data type encapsulating the Waasmaier & Kirfel scattering factors
// in a simplified form (only a and b).
// Added the electrion scattering factors as well
struct SFData
{
double a[6], b[6];
};
// to get the Cval and Siva values, use this constant as charge:
enum { kWKSFVal = -99 };
const SFData& wksf(int charge = 0) const;
const SFData& elsf() const;
private:
const struct AtomTypeInfo* mInfo;
};
}

View File

@@ -1,101 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <unordered_map>
#include <filesystem>
#include <stdexcept>
#include "cif++/Structure.hpp"
namespace mmcif
{
class BondMapException : public std::runtime_error
{
public:
BondMapException(const std::string& msg)
: runtime_error(msg) {}
};
class BondMap
{
public:
BondMap(const Structure& p);
BondMap(const BondMap&) = delete;
BondMap& operator=(const BondMap&) = delete;
bool operator()(const Atom& a, const Atom& b) const
{
return isBonded(index.at(a.id()), index.at(b.id()));
}
bool is1_4(const Atom& a, const Atom& b) const
{
uint32_t ixa = index.at(a.id());
uint32_t ixb = index.at(b.id());
return bond_1_4.count(key(ixa, ixb));
}
// links coming from the struct_conn records:
std::vector<std::string> linked(const Atom& a) const;
// This list of atomID's is comming from either CCD or the CCP4 dictionaries loaded
static std::vector<std::string> atomIDsForCompound(const std::string& compoundID);
private:
bool isBonded(uint32_t ai, uint32_t bi) const
{
return bond.count(key(ai, bi)) != 0;
}
uint64_t key(uint32_t a, uint32_t b) const
{
if (a > b)
std::swap(a, b);
return static_cast<uint64_t>(a) | (static_cast<uint64_t>(b) << 32);
}
std::tuple<uint32_t,uint32_t> dekey(uint64_t k) const
{
return std::make_tuple(
static_cast<uint32_t>(k >> 32),
static_cast<uint32_t>(k)
);
}
uint32_t dim;
std::unordered_map<std::string,uint32_t> index;
std::set<uint64_t> bond, bond_1_4;
std::map<std::string,std::set<std::string>> link;
};
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,248 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/Cif++.hpp"
#include <stack>
#include <map>
namespace cif
{
// --------------------------------------------------------------------
class CifParserError : public std::runtime_error
{
public:
CifParserError(uint32_t lineNr, const std::string& message);
};
// --------------------------------------------------------------------
extern const uint32_t kMaxLineLength;
extern const uint8_t kCharTraitsTable[128];
enum CharTraitsMask: uint8_t {
kOrdinaryMask = 1 << 0,
kNonBlankMask = 1 << 1,
kTextLeadMask = 1 << 2,
kAnyPrintMask = 1 << 3
};
inline bool isWhite(int ch)
{
return std::isspace(ch) or ch == '#';
}
inline bool isOrdinary(int ch)
{
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kOrdinaryMask) != 0;
}
inline bool isNonBlank(int ch)
{
return ch > 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kNonBlankMask) != 0;
}
inline bool isTextLead(int ch)
{
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kTextLeadMask) != 0;
}
inline bool isAnyPrint(int ch)
{
return ch == '\t' or
(ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
}
inline bool isUnquotedString(const char* s)
{
bool result = isOrdinary(*s++);
while (result and *s != 0)
{
result = isNonBlank(*s);
++s;
}
return result;
}
// --------------------------------------------------------------------
std::tuple<std::string,std::string> splitTagName(const std::string& tag);
// --------------------------------------------------------------------
using DatablockIndex = std::map<std::string,std::size_t>;
// --------------------------------------------------------------------
// sac Parser, analogous to SAX Parser (simple api for xml)
class SacParser
{
public:
SacParser(std::istream& is, bool init = true);
virtual ~SacParser() {}
enum CIFToken
{
eCIFTokenUnknown,
eCIFTokenEOF,
eCIFTokenDATA,
eCIFTokenLOOP,
eCIFTokenGLOBAL,
eCIFTokenSAVE,
eCIFTokenSTOP,
eCIFTokenTag,
eCIFTokenValue,
};
static const char* kTokenName[];
enum CIFValueType
{
eCIFValueInt,
eCIFValueFloat,
eCIFValueNumeric,
eCIFValueString,
eCIFValueTextField,
eCIFValueInapplicable,
eCIFValueUnknown
};
static const char* kValueName[];
int getNextChar();
void retract();
void restart();
CIFToken getNextToken();
void match(CIFToken token);
bool parseSingleDatablock(const std::string& datablock);
DatablockIndex indexDatablocks();
bool parseSingleDatablock(const std::string& datablock, const DatablockIndex &index);
void parseFile();
void parseGlobal();
void parseDataBlock();
virtual void parseSaveFrame();
void parseDictionary();
void error(const std::string& msg);
// production methods, these are pure virtual here
virtual void produceDatablock(const std::string& name) = 0;
virtual void produceCategory(const std::string& name) = 0;
virtual void produceRow() = 0;
virtual void produceItem(const std::string& category, const std::string& item, const std::string& value) = 0;
protected:
enum State
{
eStateStart,
eStateWhite,
eStateComment,
eStateQuestionMark,
eStateDot,
eStateQuotedString,
eStateQuotedStringQuote,
eStateUnquotedString,
eStateTag,
eStateTextField,
eStateFloat = 100,
eStateInt = 110,
// eStateNumericSuffix = 200,
eStateValue = 300
};
std::istream& mData;
// Parser state
bool mValidate;
uint32_t mLineNr;
bool mBol;
int mState, mStart;
CIFToken mLookahead;
std::string mTokenValue;
CIFValueType mTokenType;
std::stack<int> mBuffer;
};
// --------------------------------------------------------------------
class Parser : public SacParser
{
public:
Parser(std::istream& is, File& f, bool init = true);
virtual void produceDatablock(const std::string& name);
virtual void produceCategory(const std::string& name);
virtual void produceRow();
virtual void produceItem(const std::string& category, const std::string& item, const std::string& value);
protected:
File& mFile;
Datablock* mDataBlock;
Datablock::iterator mCat;
Row mRow;
};
// --------------------------------------------------------------------
class DictParser : public Parser
{
public:
DictParser(Validator& validator, std::istream& is);
~DictParser();
void loadDictionary();
private:
virtual void parseSaveFrame();
bool collectItemTypes();
void linkItems();
Validator& mValidator;
File mFile;
struct DictParserDataImpl* mImpl;
bool mCollectedItemTypes = false;
};
}

View File

@@ -1,239 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cassert>
#include <filesystem>
#include <iostream>
#include <list>
#include <memory>
#include <set>
#include <vector>
#ifndef STDOUT_FILENO
#define STDOUT_FILENO 1
#endif
#if _MSC_VER
#include <io.h>
#define isatty _isatty
#else
#include <unistd.h>
#endif
#include "cif++/Cif++Export.hpp"
#if _MSC_VER
#pragma warning(disable : 4996) // unsafe function or variable (strcpy e.g.)
#pragma warning(disable : 4068) // unknown pragma
#pragma warning(disable : 4100) // unreferenced formal parameter
#pragma warning(disable : 4101) // unreferenced local variable
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING 1
#endif
namespace cif
{
// the git 'build' number
std::string get_version_nr();
// std::string get_version_date();
// --------------------------------------------------------------------
// some basic utilities: Since we're using ASCII input only, we define for optimisation
// our own case conversion routines.
bool iequals(const std::string &a, const std::string &b);
int icompare(const std::string &a, const std::string &b);
bool iequals(const char *a, const char *b);
int icompare(const char *a, const char *b);
void toLower(std::string &s);
std::string toLowerCopy(const std::string &s);
// To make life easier, we also define iless and iset using iequals
struct iless
{
bool operator()(const std::string &a, const std::string &b) const
{
return icompare(a, b) < 0;
}
};
typedef std::set<std::string, iless> iset;
// --------------------------------------------------------------------
// This really makes a difference, having our own tolower routines
extern const uint8_t kCharToLowerMap[256];
inline char tolower(int ch)
{
return static_cast<char>(kCharToLowerMap[static_cast<uint8_t>(ch)]);
}
// --------------------------------------------------------------------
std::tuple<std::string, std::string> splitTagName(const std::string &tag);
// --------------------------------------------------------------------
// generate a cif name, mainly used to generate asym_id's
std::string cifIdForNumber(int number);
// --------------------------------------------------------------------
// custom wordwrapping routine
std::vector<std::string> wordWrap(const std::string &text, size_t width);
// --------------------------------------------------------------------
// Code helping with terminal i/o
uint32_t get_terminal_width();
// --------------------------------------------------------------------
// Path of the current executable
std::string get_executable_path();
// --------------------------------------------------------------------
// some manipulators to write coloured text to terminals
enum StringColour
{
scBLACK = 0,
scRED,
scGREEN,
scYELLOW,
scBLUE,
scMAGENTA,
scCYAN,
scWHITE,
scNONE = 9
};
template <typename String, typename CharT>
struct ColouredString
{
static_assert(std::is_reference<String>::value or std::is_pointer<String>::value, "String type must be pointer or reference");
ColouredString(String s, StringColour fore, StringColour back, bool bold = true)
: m_s(s)
, m_fore(fore)
, m_back(back)
, m_bold(bold)
{
}
ColouredString &operator=(const ColouredString &) = delete;
String m_s;
StringColour m_fore, m_back;
bool m_bold;
};
template <typename CharT, typename Traits>
std::basic_ostream<CharT, Traits> &operator<<(std::basic_ostream<CharT, Traits> &os, const ColouredString<const CharT *, CharT> &s)
{
if (isatty(STDOUT_FILENO))
{
std::basic_ostringstream<CharT, Traits> ostr;
ostr << "\033[" << (30 + s.m_fore) << ';' << (s.m_bold ? "1" : "22") << ';' << (40 + s.m_back) << 'm'
<< s.m_s
<< "\033[0m";
return os << ostr.str();
}
else
return os << s.m_s;
}
template <typename CharT, typename Traits, typename String>
std::basic_ostream<CharT, Traits> &operator<<(std::basic_ostream<CharT, Traits> &os, const ColouredString<String, CharT> &s)
{
if (isatty(STDOUT_FILENO))
{
std::basic_ostringstream<CharT, Traits> ostr;
ostr << "\033[" << (30 + s.m_fore) << ';' << (s.m_bold ? "1" : "22") << ';' << (40 + s.m_back) << 'm'
<< s.m_s
<< "\033[0m";
return os << ostr.str();
}
else
return os << s.m_s;
}
template <typename CharT>
inline auto coloured(const CharT *s, StringColour fore = scWHITE, StringColour back = scRED, bool bold = true)
{
return ColouredString<const CharT *, CharT>(s, fore, back, bold);
}
template <typename CharT, typename Traits, typename Alloc>
inline auto coloured(const std::basic_string<CharT, Traits, Alloc> &s, StringColour fore = scWHITE, StringColour back = scRED, bool bold = true)
{
return ColouredString<const std::basic_string<CharT, Traits, Alloc>, CharT>(s, fore, back, bold);
}
template <typename CharT, typename Traits, typename Alloc>
inline auto coloured(std::basic_string<CharT, Traits, Alloc> &s, StringColour fore = scWHITE, StringColour back = scRED, bool bold = true)
{
return ColouredString<std::basic_string<CharT, Traits, Alloc>, CharT>(s, fore, back, bold);
}
// --------------------------------------------------------------------
// A progress bar
class Progress
{
public:
Progress(int64_t inMax, const std::string &inAction);
virtual ~Progress();
void consumed(int64_t inConsumed); // consumed is relative
void progress(int64_t inProgress); // progress is absolute
void message(const std::string &inMessage);
private:
Progress(const Progress &) = delete;
Progress &operator=(const Progress &) = delete;
struct ProgressImpl *mImpl;
};
// --------------------------------------------------------------------
// Resources
std::unique_ptr<std::istream> loadResource(std::filesystem::path name);
void addFileResource(const std::string &name, std::filesystem::path dataFile);
void addDataDirectory(std::filesystem::path dataDir);
} // namespace cif

View File

@@ -1,198 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/Cif++.hpp"
// duh.. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86164
// #include <regex>
#include <boost/regex.hpp>
#include <set>
namespace cif
{
struct ValidateCategory;
// --------------------------------------------------------------------
class ValidationError : public std::exception
{
public:
ValidationError(const std::string& msg);
ValidationError(const std::string& cat, const std::string& item,
const std::string& msg);
const char* what() const noexcept { return mMsg.c_str(); }
std::string mMsg;
};
// --------------------------------------------------------------------
enum class DDL_PrimitiveType
{
Char, UChar, Numb
};
DDL_PrimitiveType mapToPrimitiveType(const std::string& s);
struct ValidateType
{
std::string mName;
DDL_PrimitiveType mPrimitiveType;
// std::regex mRx;
boost::regex mRx;
bool operator<(const ValidateType& rhs) const
{
return icompare(mName, rhs.mName) < 0;
}
// compare values based on type
// int compare(const std::string& a, const std::string& b) const
// {
// return compare(a.c_str(), b.c_str());
// }
int compare(const char* a, const char* b) const;
};
struct ValidateItem
{
std::string mTag;
bool mMandatory;
const ValidateType* mType;
cif::iset mEnums;
std::string mDefault;
bool mDefaultIsNull;
ValidateCategory* mCategory = nullptr;
// ItemLinked is used for non-key links
struct ItemLinked
{
ValidateItem* mParent;
std::string mParentItem;
std::string mChildItem;
};
std::vector<ItemLinked> mLinked;
bool operator<(const ValidateItem& rhs) const
{
return icompare(mTag, rhs.mTag) < 0;
}
bool operator==(const ValidateItem& rhs) const
{
return iequals(mTag, rhs.mTag);
}
void operator()(std::string value) const;
};
struct ValidateCategory
{
std::string mName;
std::vector<std::string> mKeys;
cif::iset mGroups;
cif::iset mMandatoryFields;
std::set<ValidateItem> mItemValidators;
bool operator<(const ValidateCategory& rhs) const
{
return icompare(mName, rhs.mName) < 0;
}
void addItemValidator(ValidateItem&& v);
const ValidateItem* getValidatorForItem(std::string tag) const;
const std::set<ValidateItem>& itemValidators() const
{
return mItemValidators;
}
};
struct ValidateLink
{
int mLinkGroupID;
std::string mParentCategory;
std::vector<std::string> mParentKeys;
std::string mChildCategory;
std::vector<std::string> mChildKeys;
std::string mLinkGroupLabel;
};
// --------------------------------------------------------------------
class Validator
{
public:
friend class DictParser;
Validator();
~Validator();
Validator(const Validator& rhs) = delete;
Validator& operator=(const Validator& rhs) = delete;
Validator(Validator&& rhs);
Validator& operator=(Validator&& rhs);
void addTypeValidator(ValidateType&& v);
const ValidateType* getValidatorForType(std::string typeCode) const;
void addCategoryValidator(ValidateCategory&& v);
const ValidateCategory* getValidatorForCategory(std::string category) const;
void addLinkValidator(ValidateLink&& v);
std::vector<const ValidateLink*> getLinksForParent(const std::string& category) const;
std::vector<const ValidateLink*> getLinksForChild(const std::string& category) const;
void reportError(const std::string& msg, bool fatal);
std::string dictName() const { return mName; }
void dictName(const std::string& name) { mName = name; }
std::string dictVersion() const { return mVersion; }
void dictVersion(const std::string& version) { mVersion = version; }
private:
// name is fully qualified here:
ValidateItem* getValidatorForItem(std::string name) const;
std::string mName;
std::string mVersion;
bool mStrict = false;
// std::set<uint32_t> mSubCategories;
std::set<ValidateType> mTypeValidators;
std::set<ValidateCategory> mCategoryValidators;
std::vector<ValidateLink> mLinkValidators;
};
}

View File

@@ -1,195 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
/// \file This file contains the definition for the class Compound, encapsulating
/// the information found for compounds in the CCD.
#include <map>
#include <set>
#include <tuple>
#include <vector>
#include "cif++/AtomType.hpp"
#include "cif++/Cif++.hpp"
namespace mmcif
{
// --------------------------------------------------------------------
class Compound;
struct CompoundAtom;
class CompoundFactoryImpl;
/// \brief The bond type as defined in the CCD, possible values taken from the mmcif_pdbx_v50 file
enum class BondType
{
sing, // 'single bond'
doub, // 'double bond'
trip, // 'triple bond'
quad, // 'quadruple bond'
arom, // 'aromatic bond'
poly, // 'polymeric bond'
delo, // 'delocalized double bond'
pi, // 'pi bond'
};
std::string to_string(BondType bondType);
BondType from_string(const std::string& bondType);
/// --------------------------------------------------------------------
/// \brief struct containing information about an atom in a chemical compound.
/// This is a subset of the available information. Contact the author if you need more fields.
struct CompoundAtom
{
std::string id;
AtomType typeSymbol;
int charge = 0;
bool aromatic = false;
bool leavingAtom = false;
bool stereoConfig = false;
float x, y, z;
};
/// --------------------------------------------------------------------
/// \brief struct containing information about the bonds
struct CompoundBond
{
std::string atomID[2];
BondType type;
bool aromatic = false, stereoConfig = false;
};
/// --------------------------------------------------------------------
/// \brief a class that contains information about a chemical compound.
/// This information is derived from the CDD by default.
///
/// To create compounds, you use the factory method. You can add your own
/// compound definitions by calling the addExtraComponents function and
/// pass it a valid CCD formatted file.
class Compound
{
public:
// accessors
std::string id() const { return mID; }
std::string name() const { return mName; }
std::string type() const { return mType; }
std::string formula() const { return mFormula; }
float formulaWeight() const { return mFormulaWeight; }
int formalCharge() const { return mFormalCharge; }
const std::vector<CompoundAtom> &atoms() const { return mAtoms; }
const std::vector<CompoundBond> &bonds() const { return mBonds; }
CompoundAtom getAtomByID(const std::string &atomID) const;
bool atomsBonded(const std::string &atomId_1, const std::string &atomId_2) const;
// float atomBondValue(const std::string &atomId_1, const std::string &atomId_2) const;
// float bondAngle(const std::string &atomId_1, const std::string &atomId_2, const std::string &atomId_3) const;
// float chiralVolume(const std::string &centreID) const;
bool isWater() const
{
return mID == "HOH" or mID == "H2O" or mID == "WAT";
}
private:
friend class CompoundFactoryImpl;
friend class CCDCompoundFactoryImpl;
friend class CCP4CompoundFactoryImpl;
Compound(cif::Datablock &db);
Compound(cif::Datablock &db, const std::string &id, const std::string &name, const std::string &type);
std::string mID;
std::string mName;
std::string mType;
std::string mFormula;
float mFormulaWeight = 0;
int mFormalCharge = 0;
std::vector<CompoundAtom> mAtoms;
std::vector<CompoundBond> mBonds;
};
// --------------------------------------------------------------------
// Factory class for Compound and Link objects
CIFPP_EXPORT extern const std::map<std::string, char> kAAMap, kBaseMap;
class CompoundFactory
{
public:
/// \brief Initialise a singleton instance.
///
/// If you have a multithreaded application and want to have different
/// compounds in each thread (e.g. a web service processing user requests
/// with different sets of compounds) you can set the \a useThreadLocalInstanceOnly
/// flag to true.
static void init(bool useThreadLocalInstanceOnly);
static CompoundFactory &instance();
static void clear();
void setDefaultDictionary(const std::filesystem::path &inDictFile);
void pushDictionary(const std::filesystem::path &inDictFile);
void popDictionary();
bool isKnownPeptide(const std::string &res_name) const;
bool isKnownBase(const std::string &res_name) const;
/// \brief Create the Compound object for \a id
///
/// This will create the Compound instance for \a id if it doesn't exist already.
/// The result is owned by this factory and should not be deleted by the user.
/// \param id The Compound ID, a three letter code usually
/// \result The compound, or nullptr if it could not be created (missing info)
const Compound *create(std::string id);
~CompoundFactory();
private:
CompoundFactory();
CompoundFactory(const CompoundFactory &) = delete;
CompoundFactory &operator=(const CompoundFactory &) = delete;
static std::unique_ptr<CompoundFactory> sInstance;
static thread_local std::unique_ptr<CompoundFactory> tlInstance;
static bool sUseThreadLocalInstance;
std::shared_ptr<CompoundFactoryImpl> mImpl;
};
} // namespace mmcif

View File

@@ -1,391 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright Maarten L. Hekkelman, Radboud University 2008-2011.
* Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// --------------------------------------------------------------------
// uBlas compatible matrix types
#pragma once
#include <iostream>
#include <vector>
// matrix is m x n, addressing i,j is 0 <= i < m and 0 <= j < n
// element m i,j is mapped to [i * n + j] and thus storage is row major
template <typename T>
class MatrixBase
{
public:
using value_type = T;
virtual ~MatrixBase() {}
virtual uint32_t dim_m() const = 0;
virtual uint32_t dim_n() const = 0;
virtual value_type &operator()(uint32_t i, uint32_t j) { throw std::runtime_error("unimplemented method"); }
virtual value_type operator()(uint32_t i, uint32_t j) const = 0;
MatrixBase &operator*=(const value_type &rhs);
MatrixBase &operator-=(const value_type &rhs);
};
template <typename T>
MatrixBase<T> &MatrixBase<T>::operator*=(const T &rhs)
{
for (uint32_t i = 0; i < dim_m(); ++i)
{
for (uint32_t j = 0; j < dim_n(); ++j)
{
operator()(i, j) *= rhs;
}
}
return *this;
}
template <typename T>
MatrixBase<T> &MatrixBase<T>::operator-=(const T &rhs)
{
for (uint32_t i = 0; i < dim_m(); ++i)
{
for (uint32_t j = 0; j < dim_n(); ++j)
{
operator()(i, j) -= rhs;
}
}
return *this;
}
template <typename T>
std::ostream &operator<<(std::ostream &lhs, const MatrixBase<T> &rhs)
{
lhs << '[' << rhs.dim_m() << ',' << rhs.dim_n() << ']' << '(';
for (uint32_t i = 0; i < rhs.dim_m(); ++i)
{
lhs << '(';
for (uint32_t j = 0; j < rhs.dim_n(); ++j)
{
if (j > 0)
lhs << ',';
lhs << rhs(i, j);
}
lhs << ')';
}
lhs << ')';
return lhs;
}
template <typename T>
class Matrix : public MatrixBase<T>
{
public:
using value_type = T;
template <typename T2>
Matrix(const MatrixBase<T2> &m)
: m_m(m.dim_m())
, m_n(m.dim_n())
{
m_data = new value_type[m_m * m_n];
for (uint32_t i = 0; i < m_m; ++i)
{
for (uint32_t j = 0; j < m_n; ++j)
operator()(i, j) = m(i, j);
}
}
Matrix()
: m_data(nullptr)
, m_m(0)
, m_n(0)
{
}
Matrix(const Matrix &m)
: m_m(m.m_m)
, m_n(m.m_n)
{
m_data = new value_type[m_m * m_n];
std::copy(m.m_data, m.m_data + (m_m * m_n), m_data);
}
Matrix &operator=(const Matrix &m)
{
value_type *t = new value_type[m.m_m * m.m_n];
std::copy(m.m_data, m.m_data + (m.m_m * m.m_n), t);
delete[] m_data;
m_data = t;
m_m = m.m_m;
m_n = m.m_n;
return *this;
}
Matrix(uint32_t m, uint32_t n, T v = T())
: m_m(m)
, m_n(n)
{
m_data = new value_type[m_m * m_n];
std::fill(m_data, m_data + (m_m * m_n), v);
}
virtual ~Matrix()
{
delete[] m_data;
}
virtual uint32_t dim_m() const { return m_m; }
virtual uint32_t dim_n() const { return m_n; }
virtual value_type operator()(uint32_t i, uint32_t j) const
{
assert(i < m_m);
assert(j < m_n);
return m_data[i * m_n + j];
}
virtual value_type &operator()(uint32_t i, uint32_t j)
{
assert(i < m_m);
assert(j < m_n);
return m_data[i * m_n + j];
}
template <typename Func>
void each(Func f)
{
for (uint32_t i = 0; i < m_m * m_n; ++i)
f(m_data[i]);
}
template <typename U>
Matrix &operator/=(U v)
{
for (uint32_t i = 0; i < m_m * m_n; ++i)
m_data[i] /= v;
return *this;
}
private:
value_type *m_data;
uint32_t m_m, m_n;
};
// --------------------------------------------------------------------
template <typename T>
class SymmetricMatrix : public MatrixBase<T>
{
public:
typedef typename MatrixBase<T>::value_type value_type;
SymmetricMatrix(uint32_t n, T v = T())
: m_owner(true)
, m_n(n)
{
uint32_t N = (m_n * (m_n + 1)) / 2;
m_data = new value_type[N];
std::fill(m_data, m_data + N, v);
}
SymmetricMatrix(const T *data, uint32_t n)
: m_owner(false)
, m_data(const_cast<T *>(data))
, m_n(n)
{
}
virtual ~SymmetricMatrix()
{
if (m_owner)
delete[] m_data;
}
virtual uint32_t dim_m() const { return m_n; }
virtual uint32_t dim_n() const { return m_n; }
T operator()(uint32_t i, uint32_t j) const;
virtual T &operator()(uint32_t i, uint32_t j);
// erase two rows, add one at the end (for neighbour joining)
void erase_2(uint32_t i, uint32_t j);
template <typename Func>
void each(Func f)
{
uint32_t N = (m_n * (m_n + 1)) / 2;
for (uint32_t i = 0; i < N; ++i)
f(m_data[i]);
}
template <typename U>
SymmetricMatrix &operator/=(U v)
{
uint32_t N = (m_n * (m_n + 1)) / 2;
for (uint32_t i = 0; i < N; ++i)
m_data[i] /= v;
return *this;
}
private:
bool m_owner;
value_type *m_data;
uint32_t m_n;
};
template <typename T>
inline T SymmetricMatrix<T>::operator()(uint32_t i, uint32_t j) const
{
return i < j
? m_data[(j * (j + 1)) / 2 + i]
: m_data[(i * (i + 1)) / 2 + j];
}
template <typename T>
inline T &SymmetricMatrix<T>::operator()(uint32_t i, uint32_t j)
{
if (i > j)
std::swap(i, j);
assert(j < m_n);
return m_data[(j * (j + 1)) / 2 + i];
}
template <typename T>
void SymmetricMatrix<T>::erase_2(uint32_t di, uint32_t dj)
{
uint32_t s = 0, d = 0;
for (uint32_t i = 0; i < m_n; ++i)
{
for (uint32_t j = 0; j < i; ++j)
{
if (i != di and j != dj and i != dj and j != di)
{
if (s != d)
m_data[d] = m_data[s];
++d;
}
++s;
}
}
--m_n;
}
template <typename T>
class IdentityMatrix : public MatrixBase<T>
{
public:
typedef typename MatrixBase<T>::value_type value_type;
IdentityMatrix(uint32_t n)
: m_n(n)
{
}
virtual uint32_t dim_m() const { return m_n; }
virtual uint32_t dim_n() const { return m_n; }
virtual value_type operator()(uint32_t i, uint32_t j) const
{
value_type result = 0;
if (i == j)
result = 1;
return result;
}
private:
uint32_t m_n;
};
// --------------------------------------------------------------------
// matrix functions
template <typename T>
Matrix<T> operator*(const MatrixBase<T> &lhs, const MatrixBase<T> &rhs)
{
Matrix<T> result(std::min(lhs.dim_m(), rhs.dim_m()), std::min(lhs.dim_n(), rhs.dim_n()));
for (uint32_t i = 0; i < result.dim_m(); ++i)
{
for (uint32_t j = 0; j < result.dim_n(); ++j)
{
for (uint32_t li = 0, rj = 0; li < lhs.dim_m() and rj < rhs.dim_n(); ++li, ++rj)
result(i, j) += lhs(li, j) * rhs(i, rj);
}
}
return result;
}
template <typename T>
Matrix<T> operator*(const MatrixBase<T> &lhs, T rhs)
{
Matrix<T> result(lhs);
result *= rhs;
return result;
}
template <typename T>
Matrix<T> operator-(const MatrixBase<T> &lhs, const MatrixBase<T> &rhs)
{
Matrix<T> result(std::min(lhs.dim_m(), rhs.dim_m()), std::min(lhs.dim_n(), rhs.dim_n()));
for (uint32_t i = 0; i < result.dim_m(); ++i)
{
for (uint32_t j = 0; j < result.dim_n(); ++j)
{
result(i, j) = lhs(i, j) - rhs(i, j);
}
}
return result;
}
template <typename T>
Matrix<T> operator-(const MatrixBase<T> &lhs, T rhs)
{
Matrix<T> result(lhs.dim_m(), lhs.dim_n());
result -= rhs;
return result;
}
// template <typename T>
// symmetric_matrix<T> hammingDistance(const MatrixBase<T> &lhs, T rhs);
// template <typename T>
// std::vector<T> sum(const MatrixBase<T> &m);

View File

@@ -1,428 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <functional>
#if HAVE_LIBCLIPPER
#include <clipper/core/coords.h>
#endif
#include <boost/math/quaternion.hpp>
namespace mmcif
{
typedef boost::math::quaternion<float> Quaternion;
const double
kPI = 3.141592653589793238462643383279502884;
// --------------------------------------------------------------------
// Point, a location with x, y and z coordinates as floating point.
// This one is derived from a tuple<float,float,float> so
// you can do things like:
//
// float x, y, z;
// tie(x, y, z) = atom.loc();
template<typename F>
struct PointF
{
typedef F FType;
FType mX, mY, mZ;
PointF() : mX(0), mY(0), mZ(0) {}
PointF(FType x, FType y, FType z) : mX(x), mY(y), mZ(z) {}
template<typename PF>
PointF(const PointF<PF>& pt)
: mX(static_cast<F>(pt.mX))
, mY(static_cast<F>(pt.mY))
, mZ(static_cast<F>(pt.mZ)) {}
#if HAVE_LIBCLIPPER
PointF(const clipper::Coord_orth& pt): mX(pt[0]), mY(pt[1]), mZ(pt[2]) {}
PointF& operator=(const clipper::Coord_orth& rhs)
{
mX = rhs[0];
mY = rhs[1];
mZ = rhs[2];
return *this;
}
#endif
template<typename PF>
PointF& operator=(const PointF<PF>& rhs)
{
mX = static_cast<F>(rhs.mX);
mY = static_cast<F>(rhs.mY);
mZ = static_cast<F>(rhs.mZ);
return *this;
}
FType& getX() { return mX; }
FType getX() const { return mX; }
void setX(FType x) { mX = x; }
FType& getY() { return mY; }
FType getY() const { return mY; }
void setY(FType y) { mY = y; }
FType& getZ() { return mZ; }
FType getZ() const { return mZ; }
void setZ(FType z) { mZ = z; }
PointF& operator+=(const PointF& rhs)
{
mX += rhs.mX;
mY += rhs.mY;
mZ += rhs.mZ;
return *this;
}
PointF& operator+=(FType d)
{
mX += d;
mY += d;
mZ += d;
return *this;
}
PointF& operator-=(const PointF& rhs)
{
mX -= rhs.mX;
mY -= rhs.mY;
mZ -= rhs.mZ;
return *this;
}
PointF& operator-=(FType d)
{
mX -= d;
mY -= d;
mZ -= d;
return *this;
}
PointF& operator*=(FType rhs)
{
mX *= rhs;
mY *= rhs;
mZ *= rhs;
return *this;
}
PointF& operator/=(FType rhs)
{
mX /= rhs;
mY /= rhs;
mZ /= rhs;
return *this;
}
FType normalize()
{
auto length = mX * mX + mY * mY + mZ * mZ;
if (length > 0)
{
length = std::sqrt(length);
operator/=(length);
}
return length;
}
void rotate(const boost::math::quaternion<FType>& q)
{
boost::math::quaternion<FType> p(0, mX, mY, mZ);
p = q * p * boost::math::conj(q);
mX = p.R_component_2();
mY = p.R_component_3();
mZ = p.R_component_4();
}
#if HAVE_LIBCLIPPER
operator clipper::Coord_orth() const
{
return clipper::Coord_orth(mX, mY, mZ);
}
#endif
operator std::tuple<const FType&, const FType&, const FType&>() const
{
return std::make_tuple(std::ref(mX), std::ref(mY), std::ref(mZ));
}
operator std::tuple<FType&,FType&,FType&>()
{
return std::make_tuple(std::ref(mX), std::ref(mY), std::ref(mZ));
}
bool operator==(const PointF& rhs) const
{
return mX == rhs.mX and mY == rhs.mY and mZ == rhs.mZ;
}
// consider point as a vector... perhaps I should rename Point?
FType lengthsq() const
{
return mX * mX + mY * mY + mZ * mZ;
}
FType length() const
{
return sqrt(mX * mX + mY * mY + mZ * mZ);
}
};
typedef PointF<float> Point;
typedef PointF<double> DPoint;
template<typename F>
inline std::ostream& operator<<(std::ostream& os, const PointF<F>& pt)
{
os << '(' << pt.mX << ',' << pt.mY << ',' << pt.mZ << ')';
return os;
}
template<typename F>
inline PointF<F> operator+(const PointF<F>& lhs, const PointF<F>& rhs)
{
return PointF<F>(lhs.mX + rhs.mX, lhs.mY + rhs.mY, lhs.mZ + rhs.mZ);
}
template<typename F>
inline PointF<F> operator-(const PointF<F>& lhs, const PointF<F>& rhs)
{
return PointF<F>(lhs.mX - rhs.mX, lhs.mY - rhs.mY, lhs.mZ - rhs.mZ);
}
template<typename F>
inline PointF<F> operator-(const PointF<F>& pt)
{
return PointF<F>(-pt.mX, -pt.mY, -pt.mZ);
}
template<typename F>
inline PointF<F> operator*(const PointF<F>& pt, F f)
{
return PointF<F>(pt.mX * f, pt.mY * f, pt.mZ * f);
}
template<typename F>
inline PointF<F> operator*(F f, const PointF<F>& pt)
{
return PointF<F>(pt.mX * f, pt.mY * f, pt.mZ * f);
}
template<typename F>
inline PointF<F> operator/(const PointF<F>& pt, F f)
{
return PointF<F>(pt.mX / f, pt.mY / f, pt.mZ / f);
}
// --------------------------------------------------------------------
// several standard 3d operations
template<typename F>
inline double DistanceSquared(const PointF<F>& a, const PointF<F>& b)
{
return
(a.mX - b.mX) * (a.mX - b.mX) +
(a.mY - b.mY) * (a.mY - b.mY) +
(a.mZ - b.mZ) * (a.mZ - b.mZ);
}
template<typename F>
inline double Distance(const PointF<F>& a, const PointF<F>& b)
{
return sqrt(
(a.mX - b.mX) * (a.mX - b.mX) +
(a.mY - b.mY) * (a.mY - b.mY) +
(a.mZ - b.mZ) * (a.mZ - b.mZ));
}
template<typename F>
inline F DotProduct(const PointF<F>& a, const PointF<F>& b)
{
return a.mX * b.mX + a.mY * b.mY + a.mZ * b.mZ;
}
template<typename F>
inline PointF<F> CrossProduct(const PointF<F>& a, const PointF<F>& b)
{
return PointF<F>(a.mY * b.mZ - b.mY * a.mZ,
a.mZ * b.mX - b.mZ * a.mX,
a.mX * b.mY - b.mX * a.mY);
}
template<typename F>
double Angle(const PointF<F>& p1, const PointF<F>& p2, const PointF<F>& p3)
{
PointF<F> v1 = p1 - p2;
PointF<F> v2 = p3 - p2;
return std::acos(DotProduct(v1, v2) / (v1.length() * v2.length())) * 180 / kPI;
}
template<typename F>
double DihedralAngle(const PointF<F>& p1, const PointF<F>& p2, const PointF<F>& p3, const PointF<F>& p4)
{
PointF<F> v12 = p1 - p2; // vector from p2 to p1
PointF<F> v43 = p4 - p3; // vector from p3 to p4
PointF<F> z = p2 - p3; // vector from p3 to p2
PointF<F> p = CrossProduct(z, v12);
PointF<F> x = CrossProduct(z, v43);
PointF<F> y = CrossProduct(z, x);
double u = DotProduct(x, x);
double v = DotProduct(y, y);
double result = 360;
if (u > 0 and v > 0)
{
u = DotProduct(p, x) / sqrt(u);
v = DotProduct(p, y) / sqrt(v);
if (u != 0 or v != 0)
result = atan2(v, u) * 180 / kPI;
}
return result;
}
template<typename F>
double CosinusAngle(const PointF<F>& p1, const PointF<F>& p2, const PointF<F>& p3, const PointF<F>& p4)
{
PointF<F> v12 = p1 - p2;
PointF<F> v34 = p3 - p4;
double result = 0;
double x = DotProduct(v12, v12) * DotProduct(v34, v34);
if (x > 0)
result = DotProduct(v12, v34) / sqrt(x);
return result;
}
template<typename F>
auto DistancePointToLine(const PointF<F> &l1, const PointF<F> &l2, const PointF<F> &p)
{
auto line = l2 - l1;
auto p_to_l1 = p - l1;
auto p_to_l2 = p - l2;
auto cross = CrossProduct(p_to_l1, p_to_l2);
return cross.length() / line.length();
}
// --------------------------------------------------------------------
// For e.g. simulated annealing, returns a new point that is moved in
// a random direction with a distance randomly chosen from a normal
// distribution with a stddev of offset.
template<typename F>
PointF<F> Nudge(PointF<F> p, F offset);
// --------------------------------------------------------------------
// We use quaternions to do rotations in 3d space
Quaternion Normalize(Quaternion q);
std::tuple<double,Point> QuaternionToAngleAxis(Quaternion q);
Point Centroid(std::vector<Point>& Points);
Point CenterPoints(std::vector<Point>& Points);
Quaternion AlignPoints(const std::vector<Point>& a, const std::vector<Point>& b);
double RMSd(const std::vector<Point>& a, const std::vector<Point>& b);
// --------------------------------------------------------------------
// Helper class to generate evenly divided Points on a sphere
// we use a fibonacci sphere to calculate even distribution of the dots
template<int N>
class SphericalDots
{
public:
enum { P = 2 * N + 1 };
typedef typename std::array<Point,P> array_type;
typedef typename array_type::const_iterator iterator;
static SphericalDots& instance()
{
static SphericalDots sInstance;
return sInstance;
}
size_t size() const { return mPoints.size(); }
const Point operator[](uint32_t inIx) const { return mPoints[inIx]; }
iterator begin() const { return mPoints.begin(); }
iterator end() const { return mPoints.end(); }
double weight() const { return mWeight; }
SphericalDots()
{
const double
kGoldenRatio = (1 + std::sqrt(5.0)) / 2;
mWeight = (4 * kPI) / P;
auto p = mPoints.begin();
for (int32_t i = -N; i <= N; ++i)
{
double lat = std::asin((2.0 * i) / P);
double lon = std::fmod(i, kGoldenRatio) * 2 * kPI / kGoldenRatio;
p->mX = sin(lon) * cos(lat);
p->mY = cos(lon) * cos(lat);
p->mZ = sin(lat);
++p;
}
}
private:
array_type mPoints;
double mWeight;
};
typedef SphericalDots<50> SphericalDots_50;
}

View File

@@ -1,218 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// Calculate DSSP-like secondary structure information
#pragma once
namespace mmcif
{
class Structure;
class Monomer;
struct Res;
extern const float
kCouplingConstant, kMinHBondEnergy, kMaxHBondEnergy;
enum SecondaryStructureType : char
{
ssLoop = ' ',
ssAlphahelix = 'H',
ssBetabridge = 'B',
ssStrand = 'E',
ssHelix_3 = 'G',
ssHelix_5 = 'I',
ssHelix_PPII = 'P',
ssTurn = 'T',
ssBend = 'S'
};
enum class HelixType
{
rh_3_10, rh_alpha, rh_pi, rh_pp
};
enum class Helix
{
None, Start, End, StartAndEnd, Middle
};
//struct HBond
//{
// std::string labelAsymID;
// int labelSeqID;
// double energy;
//};
//
//struct BridgePartner
//{
// std::string labelAsymID;
// int labelSeqID;
// int ladder;
// bool parallel;
//};
struct SecondaryStructure
{
SecondaryStructureType type;
// HBond donor[2], acceptor[2];
// BridgePartner beta[2];
// int sheet;
// bool bend;
};
//void CalculateSecondaryStructure(Structure& s);
const size_t
kHistogramSize = 30;
struct DSSP_Statistics
{
uint32_t nrOfResidues, nrOfChains, nrOfSSBridges, nrOfIntraChainSSBridges, nrOfHBonds;
uint32_t nrOfHBondsInAntiparallelBridges, nrOfHBondsInParallelBridges;
uint32_t nrOfHBondsPerDistance[11] = {};
double accessibleSurface = 0;
uint32_t residuesPerAlphaHelixHistogram[kHistogramSize] = {};
uint32_t parallelBridgesPerLadderHistogram[kHistogramSize] = {};
uint32_t antiparallelBridgesPerLadderHistogram[kHistogramSize] = {};
uint32_t laddersPerSheetHistogram[kHistogramSize] = {};
};
enum class ChainBreak
{
None, NewChain, Gap
};
class DSSP
{
public:
DSSP(const Structure& s, int min_poly_proline_stretch_length, bool calculateSurfaceAccessibility);
~DSSP();
DSSP(const DSSP&) = delete;
DSSP& operator=(const DSSP&) = delete;
SecondaryStructureType operator()(const std::string& inAsymID, int inSeqID) const;
SecondaryStructureType operator()(const Monomer& m) const;
double accessibility(const std::string& inAsymID, int inSeqID) const;
double accessibility(const Monomer& m) const;
bool isAlphaHelixEndBeforeStart(const Monomer& m) const;
bool isAlphaHelixEndBeforeStart(const std::string& inAsymID, int inSeqID) const;
DSSP_Statistics GetStatistics() const;
class iterator;
using res_iterator = typename std::vector<Res>::iterator;
class ResidueInfo
{
public:
friend class iterator;
explicit operator bool() const { return not empty(); }
bool empty() const { return mImpl == nullptr; }
const Monomer& residue() const;
std::string alt_id() const;
/// \brief return 0 if not a break, ' ' in case of a new chain and '*' in case of a broken chain
ChainBreak chainBreak() const;
/// \brief the internal number in DSSP
int nr() const;
SecondaryStructureType ss() const;
int ssBridgeNr() const;
Helix helix(HelixType helixType) const;
bool bend() const;
double accessibility() const;
/// \brief returns resinfo, ladder and parallel
std::tuple<ResidueInfo,int,bool> bridgePartner(int i) const;
int sheet() const;
/// \brief return resinfo and the energy of the bond
std::tuple<ResidueInfo,double> acceptor(int i) const;
std::tuple<ResidueInfo,double> donor(int i) const;
private:
ResidueInfo(Res* res) : mImpl(res) {}
Res* mImpl;
};
class iterator
{
public:
using iterator_category = std::input_iterator_tag;
using value_type = ResidueInfo;
using difference_type = std::ptrdiff_t;
using pointer = value_type*;
using reference = value_type&;
iterator(const iterator& i);
iterator(Res* res);
iterator& operator=(const iterator& i);
reference operator*() { return mCurrent; }
pointer operator->() { return &mCurrent; }
iterator& operator++();
iterator operator++(int)
{
auto tmp(*this);
this->operator++();
return tmp;
}
bool operator==(const iterator& rhs) const { return mCurrent.mImpl == rhs.mCurrent.mImpl; }
bool operator!=(const iterator& rhs) const { return mCurrent.mImpl != rhs.mCurrent.mImpl; }
private:
ResidueInfo mCurrent;
};
iterator begin() const;
iterator end() const;
bool empty() const { return begin() == end(); }
private:
struct DSSPImpl* mImpl;
};
}

View File

@@ -1,544 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <numeric>
#include "cif++/AtomType.hpp"
#include "cif++/Cif++.hpp"
#include "cif++/Compound.hpp"
#include "cif++/Point.hpp"
/*
To modify a structure, you will have to use actions.
The currently supported actions are:
// - Move atom to new location
- Remove atom
// - Add new atom that was formerly missing
// - Add alternate Residue
-
*/
namespace mmcif
{
class Atom;
class Residue;
class Monomer;
class Polymer;
class Structure;
class File;
// --------------------------------------------------------------------
class Atom
{
public:
Atom();
Atom(struct AtomImpl *impl);
Atom(const Atom &rhs);
Atom(cif::Datablock &db, cif::Row &row);
// a special constructor to create symmetry copies
Atom(const Atom &rhs, const Point &symmmetry_location, const std::string &symmetry_operation);
~Atom();
explicit operator bool() const { return mImpl_ != nullptr; }
// return a copy of this atom, with data copied instead of referenced
Atom clone() const;
Atom &operator=(const Atom &rhs);
const std::string &id() const;
AtomType type() const;
Point location() const;
void location(Point p);
/// \brief Translate the position of this atom by \a t
void translate(Point t);
/// \brief Rotate the position of this atom by \a q
void rotate(Quaternion q);
// for direct access to underlying data, be careful!
const cif::Row getRow() const;
const cif::Row getRowAniso() const;
// Atom symmetryCopy(const Point& d, const clipper::RTop_orth& rt);
bool isSymmetryCopy() const;
std::string symmetry() const;
// const clipper::RTop_orth& symop() const;
const Compound &comp() const;
bool isWater() const;
int charge() const;
float uIso() const;
bool getAnisoU(float anisou[6]) const;
float occupancy() const;
template <typename T>
T property(const std::string &name) const;
void property(const std::string &name, const std::string &value);
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
void property(const std::string &name, const T &value)
{
property(name, std::to_string(value));
}
// specifications
std::string labelAtomID() const;
std::string labelCompID() const;
std::string labelAsymID() const;
std::string labelEntityID() const;
int labelSeqID() const;
std::string labelAltID() const;
bool isAlternate() const;
std::string authAtomID() const;
std::string authCompID() const;
std::string authAsymID() const;
std::string authSeqID() const;
std::string pdbxAuthInsCode() const;
std::string pdbxAuthAltID() const;
std::string labelID() const; // label_comp_id + '_' + label_asym_id + '_' + label_seq_id
std::string pdbID() const; // auth_comp_id + '_' + auth_asym_id + '_' + auth_seq_id + pdbx_PDB_ins_code
bool operator==(const Atom &rhs) const;
// // get clipper format Atom
// clipper::Atom toClipper() const;
// Radius calculation based on integrating the density until perc of electrons is found
void calculateRadius(float resHigh, float resLow, float perc);
float radius() const;
// access data in compound for this atom
// convenience routine
bool isBackBone() const
{
auto atomID = labelAtomID();
return atomID == "N" or atomID == "O" or atomID == "C" or atomID == "CA";
}
void swap(Atom &b)
{
std::swap(mImpl_, b.mImpl_);
}
int compare(const Atom &b) const;
bool operator<(const Atom &rhs) const
{
return compare(rhs) < 0;
}
friend std::ostream &operator<<(std::ostream &os, const Atom &atom);
private:
friend class Structure;
void setID(int id);
AtomImpl *impl();
const AtomImpl *impl() const;
struct AtomImpl *mImpl_;
};
inline void swap(mmcif::Atom &a, mmcif::Atom &b)
{
a.swap(b);
}
inline double Distance(const Atom &a, const Atom &b)
{
return Distance(a.location(), b.location());
}
inline double DistanceSquared(const Atom &a, const Atom &b)
{
return DistanceSquared(a.location(), b.location());
}
typedef std::vector<Atom> AtomView;
// --------------------------------------------------------------------
class Residue
{
public:
// constructors should be private, but that's not possible for now (needed in emplace)
// constructor for waters
Residue(const Structure &structure, const std::string &compoundID,
const std::string &asymID, const std::string &authSeqID);
// constructor for a residue without a sequence number
Residue(const Structure &structure, const std::string &compoundID,
const std::string &asymID);
// constructor for a residue with a sequence number
Residue(const Structure &structure, const std::string &compoundID,
const std::string &asymID, int seqID, const std::string &authSeqID);
Residue(const Residue &rhs) = delete;
Residue &operator=(const Residue &rhs) = delete;
Residue(Residue &&rhs);
Residue &operator=(Residue &&rhs);
virtual ~Residue();
const Compound &compound() const;
const AtomView &atoms() const;
/// \brief Unique atoms returns only the atoms without alternates and the first of each alternate atom id.
AtomView unique_atoms() const;
/// \brief The alt ID used for the unique atoms
std::string unique_alt_id() const;
Atom atomByID(const std::string &atomID) const;
const std::string &compoundID() const { return mCompoundID; }
const std::string &asymID() const { return mAsymID; }
int seqID() const { return mSeqID; }
std::string entityID() const;
std::string authAsymID() const;
std::string authSeqID() const;
std::string authInsCode() const;
// return a human readable PDB-like auth id (chain+seqnr+iCode)
std::string authID() const;
// similar for mmCIF space
std::string labelID() const;
// Is this residue a single entity?
bool isEntity() const;
bool isWater() const { return mCompoundID == "HOH"; }
const Structure &structure() const { return *mStructure; }
bool empty() const { return mStructure == nullptr; }
bool hasAlternateAtoms() const;
/// \brief Return the list of unique alt ID's present in this residue
std::set<std::string> getAlternateIDs() const;
/// \brief Return the list of unique atom ID's
std::set<std::string> getAtomIDs() const;
/// \brief Return the list of atoms having ID \a atomID
AtomView getAtomsByID(const std::string &atomID) const;
// some routines for 3d work
std::tuple<Point, float> centerAndRadius() const;
friend std::ostream &operator<<(std::ostream &os, const Residue &res);
protected:
Residue() {}
friend class Polymer;
const Structure *mStructure = nullptr;
std::string mCompoundID, mAsymID;
int mSeqID = 0;
// Watch out, this is used only to label waters... The rest of the code relies on
// MapLabelToAuth to get this info. Perhaps we should rename this member field.
std::string mAuthSeqID;
AtomView mAtoms;
};
// --------------------------------------------------------------------
// a monomer models a single Residue in a protein chain
class Monomer : public Residue
{
public:
// Monomer();
Monomer(const Monomer &rhs) = delete;
Monomer &operator=(const Monomer &rhs) = delete;
Monomer(Monomer &&rhs);
Monomer &operator=(Monomer &&rhs);
Monomer(const Polymer &polymer, size_t index, int seqID, const std::string &authSeqID,
const std::string &compoundID);
bool is_first_in_chain() const;
bool is_last_in_chain() const;
// convenience
bool has_alpha() const;
bool has_kappa() const;
// Assuming this is really an amino acid...
float phi() const;
float psi() const;
float alpha() const;
float kappa() const;
float tco() const;
float omega() const;
// torsion angles
size_t nrOfChis() const;
float chi(size_t i) const;
bool isCis() const;
/// \brief Returns true if the four atoms C, CA, N and O are present
bool isComplete() const;
/// \brief Returns true if any of the backbone atoms has an alternate
bool hasAlternateBackboneAtoms() const;
Atom CAlpha() const { return atomByID("CA"); }
Atom C() const { return atomByID("C"); }
Atom N() const { return atomByID("N"); }
Atom O() const { return atomByID("O"); }
Atom H() const { return atomByID("H"); }
bool isBondedTo(const Monomer &rhs) const
{
return this != &rhs and areBonded(*this, rhs);
}
static bool areBonded(const Monomer &a, const Monomer &b, float errorMargin = 0.5f);
static bool isCis(const Monomer &a, const Monomer &b);
static float omega(const Monomer &a, const Monomer &b);
// for LEU and VAL
float chiralVolume() const;
private:
const Polymer *mPolymer;
size_t mIndex;
};
// --------------------------------------------------------------------
class Polymer : public std::vector<Monomer>
{
public:
Polymer(const Structure &s, const std::string &entityID, const std::string &asymID);
Polymer(const Polymer &) = delete;
Polymer &operator=(const Polymer &) = delete;
// Polymer(Polymer&& rhs) = delete;
// Polymer& operator=(Polymer&& rhs) = de;
Monomer &getBySeqID(int seqID);
const Monomer &getBySeqID(int seqID) const;
Structure *structure() const { return mStructure; }
std::string asymID() const { return mAsymID; }
std::string entityID() const { return mEntityID; }
std::string chainID() const;
int Distance(const Monomer &a, const Monomer &b) const;
private:
Structure *mStructure;
std::string mEntityID;
std::string mAsymID;
cif::RowSet mPolySeq;
};
// --------------------------------------------------------------------
// file is a reference to the data stored in e.g. the cif file.
// This object is not copyable.
class File : public std::enable_shared_from_this<File>
{
public:
File();
File(const std::filesystem::path &path);
File(const char *data, size_t length); // good luck trying to find out what it is...
~File();
File(const File &) = delete;
File &operator=(const File &) = delete;
cif::Datablock& createDatablock(const std::string &name);
void load(const std::filesystem::path &path);
void save(const std::filesystem::path &path);
Structure *model(size_t nr = 1);
struct FileImpl &impl() const { return *mImpl; }
cif::Datablock &data();
cif::File &file();
private:
struct FileImpl *mImpl;
};
// --------------------------------------------------------------------
enum class StructureOpenOptions
{
SkipHydrogen = 1 << 0
};
inline bool operator&(StructureOpenOptions a, StructureOpenOptions b)
{
return static_cast<int>(a) bitand static_cast<int>(b);
}
// --------------------------------------------------------------------
class Structure
{
public:
Structure(File &p, size_t modelNr = 1, StructureOpenOptions options = {});
Structure &operator=(const Structure &) = delete;
~Structure();
// Create a read-only clone of the current structure (for multithreaded calculations that move atoms)
Structure(const Structure &);
File &getFile() const;
const AtomView &atoms() const { return mAtoms; }
AtomView waters() const;
const std::list<Polymer> &polymers() const { return mPolymers; }
std::list<Polymer> &polymers() { return mPolymers; }
const std::vector<Residue> &nonPolymers() const { return mNonPolymers; }
const std::vector<Residue> &branchResidues() const { return mBranchResidues; }
Atom getAtomByID(std::string id) const;
// Atom getAtomByLocation(Point pt, float maxDistance) const;
Atom getAtomByLabel(const std::string &atomID, const std::string &asymID,
const std::string &compID, int seqID, const std::string &altID = "");
/// \brief Get a residue, if \a seqID is zero, the non-polymers are searched
const Residue &getResidue(const std::string &asymID, const std::string &compID, int seqID = 0) const;
// map between auth and label locations
std::tuple<std::string, int, std::string> MapAuthToLabel(const std::string &asymID,
const std::string &seqID, const std::string &compID, const std::string &insCode = "");
std::tuple<std::string, std::string, std::string, std::string> MapLabelToAuth(
const std::string &asymID, int seqID, const std::string &compID);
// returns chain, seqnr, icode
std::tuple<char, int, char> MapLabelToAuth(
const std::string &asymID, int seqID) const;
// returns chain,seqnr,comp,iCode
std::tuple<std::string, int, std::string, std::string> MapLabelToPDB(
const std::string &asymID, int seqID, const std::string &compID,
const std::string &authSeqID) const;
std::tuple<std::string, int, std::string> MapPDBToLabel(
const std::string &asymID, int seqID, const std::string &compID, const std::string &iCode) const;
// Actions
void removeAtom(Atom &a);
void swapAtoms(Atom &a1, Atom &a2); // swap the labels for these atoms
void moveAtom(Atom &a, Point p); // move atom to a new location
void changeResidue(const Residue &res, const std::string &newCompound,
const std::vector<std::tuple<std::string, std::string>> &remappedAtoms);
/// \brief Create a new non-polymer entity, returns new ID
/// \param mon_id The mon_id for the new nonpoly, must be an existing and known compound from CCD
/// \return The ID of the created entity
std::string createNonPolyEntity(const std::string &mon_id);
/// \brief Create a new NonPolymer struct_asym with atoms constructed from \a atoms, returns asym_id.
/// This method assumes you are copying data from one cif file to another.
///
/// \param entity_id The entity ID of the new nonpoly
/// \param atoms The array of atom_site rows containing the data.
/// \return The newly create asym ID
std::string createNonpoly(const std::string &entity_id, const std::vector<mmcif::Atom> &atoms);
/// \brief To sort the atoms in order of model > asym-id > res-id > atom-id
/// Will asssign new atom_id's to all atoms. Be carefull
void sortAtoms();
/// \brief Translate the coordinates of all atoms in the structure by \a t
void translate(Point t);
/// \brief Rotate the coordinates of all atoms in the structure by \a q
void rotate(Quaternion t);
const std::vector<Residue> &getNonPolymers() const { return mNonPolymers; }
const std::vector<Residue> &getBranchResidues() const { return mBranchResidues; }
void cleanupEmptyCategories();
private:
friend Polymer;
friend Residue;
// friend residue_view;
// friend residue_iterator;
cif::Category &category(const char *name) const;
cif::Datablock &datablock() const;
std::string insertCompound(const std::string &compoundID, bool isEntity);
void loadData();
void updateAtomIndex();
File &mFile;
size_t mModelNr;
AtomView mAtoms;
std::vector<size_t> mAtomIndex;
std::list<Polymer> mPolymers;
std::vector<Residue> mNonPolymers, mBranchResidues;
};
} // namespace mmcif

View File

@@ -1,138 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <string>
#include <cstdint>
#include <array>
#include "CifUtils.hpp"
namespace mmcif
{
// --------------------------------------------------------------------
struct Spacegroup
{
const char* name;
const char* xHM;
const char* Hall;
int nr;
};
CIFPP_EXPORT extern const Spacegroup kSpaceGroups[];
CIFPP_EXPORT extern const std::size_t kNrOfSpaceGroups;
// --------------------------------------------------------------------
struct SymopData
{
constexpr SymopData(const std::array<int,15>& data)
: m_packed((data[ 0] & 0x03ULL) << 34 bitor
(data[ 1] & 0x03ULL) << 32 bitor
(data[ 2] & 0x03ULL) << 30 bitor
(data[ 3] & 0x03ULL) << 28 bitor
(data[ 4] & 0x03ULL) << 26 bitor
(data[ 5] & 0x03ULL) << 24 bitor
(data[ 6] & 0x03ULL) << 22 bitor
(data[ 7] & 0x03ULL) << 20 bitor
(data[ 8] & 0x03ULL) << 18 bitor
(data[ 9] & 0x07ULL) << 15 bitor
(data[10] & 0x07ULL) << 12 bitor
(data[11] & 0x07ULL) << 9 bitor
(data[12] & 0x07ULL) << 6 bitor
(data[13] & 0x07ULL) << 3 bitor
(data[14] & 0x07ULL) << 0)
{
}
bool operator==(const SymopData& rhs) const
{
return m_packed == rhs.m_packed;
}
std::array<int,15> data() const
{
return {
static_cast<int>(m_packed >> 34) bitand 0x03,
static_cast<int>(m_packed >> 32) bitand 0x03,
static_cast<int>(m_packed >> 30) bitand 0x03,
static_cast<int>(m_packed >> 28) bitand 0x03,
static_cast<int>(m_packed >> 26) bitand 0x03,
static_cast<int>(m_packed >> 24) bitand 0x03,
static_cast<int>(m_packed >> 22) bitand 0x03,
static_cast<int>(m_packed >> 20) bitand 0x03,
static_cast<int>(m_packed >> 18) bitand 0x03,
static_cast<int>(m_packed >> 15) bitand 0x07,
static_cast<int>(m_packed >> 12) bitand 0x07,
static_cast<int>(m_packed >> 9) bitand 0x07,
static_cast<int>(m_packed >> 6) bitand 0x07,
static_cast<int>(m_packed >> 3) bitand 0x07,
static_cast<int>(m_packed >> 0) bitand 0x07,
};
}
private:
friend struct SymopDataBlock;
const uint64_t kPackMask = (~0ULL >> (64-36));
SymopData(uint64_t v)
: m_packed(v bitand kPackMask) {}
uint64_t m_packed;
};
struct SymopDataBlock
{
constexpr SymopDataBlock(int spacegroup, int rotational_number, const std::array<int,15>& rt_data)
: m_v((spacegroup & 0xffffULL) << 48 bitor
(rotational_number & 0xffULL) << 40 bitor
SymopData(rt_data).m_packed)
{
}
uint16_t spacegroup() const { return m_v >> 48; }
SymopData symop() const { return SymopData(m_v); }
uint8_t rotational_number() const { return (m_v >> 40) bitand 0xff; }
private:
uint64_t m_v;
};
static_assert(sizeof(SymopDataBlock) == sizeof(uint64_t), "Size of SymopData is wrong");
CIFPP_EXPORT extern const SymopDataBlock kSymopNrTable[];
CIFPP_EXPORT extern const std::size_t kSymopNrTableSize;
// --------------------------------------------------------------------
int GetSpacegroupNumber(std::string spacegroup); // alternative for clipper's parsing code
}

340
include/cif++/atom_type.hpp Normal file
View File

@@ -0,0 +1,340 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/** \file atom_type.hpp
*
* This file contains information about all known elements
*/
#pragma once
#include "cif++/exports.hpp"
#include <array>
#include <cstdint>
#include <limits>
#include <stdexcept>
#include <string>
namespace cif
{
/** Atom type as an integer. All known elements are available as a constant. */
enum atom_type : uint8_t
{
Nn = 0, ///< Unknown
H = 1, ///< Hydro­gen
He = 2, ///< He­lium
Li = 3, ///< Lith­ium
Be = 4, ///< Beryl­lium
B = 5, ///< Boron
C = 6, ///< Carbon
N = 7, ///< Nitro­gen
O = 8, ///< Oxy­gen
F = 9, ///< Fluor­ine
Ne = 10, ///< Neon
Na = 11, ///< So­dium
Mg = 12, ///< Magne­sium
Al = 13, ///< Alumin­ium
Si = 14, ///< Sili­con
P = 15, ///< Phos­phorus
S = 16, ///< Sulfur
Cl = 17, ///< Chlor­ine
Ar = 18, ///< Argon
K = 19, ///< Potas­sium
Ca = 20, ///< Cal­cium
Sc = 21, ///< Scan­dium
Ti = 22, ///< Tita­nium
V = 23, ///< Vana­dium
Cr = 24, ///< Chrom­ium
Mn = 25, ///< Manga­nese
Fe = 26, ///< Iron
Co = 27, ///< Cobalt
Ni = 28, ///< Nickel
Cu = 29, ///< Copper
Zn = 30, ///< Zinc
Ga = 31, ///< Gallium
Ge = 32, ///< Germa­nium
As = 33, ///< Arsenic
Se = 34, ///< Sele­nium
Br = 35, ///< Bromine
Kr = 36, ///< Kryp­ton
Rb = 37, ///< Rubid­ium
Sr = 38, ///< Stront­ium
Y = 39, ///< Yttrium
Zr = 40, ///< Zirco­nium
Nb = 41, ///< Nio­bium
Mo = 42, ///< Molyb­denum
Tc = 43, ///< Tech­netium
Ru = 44, ///< Ruthe­nium
Rh = 45, ///< Rho­dium
Pd = 46, ///< Pallad­ium
Ag = 47, ///< Silver
Cd = 48, ///< Cad­mium
In = 49, ///< Indium
Sn = 50, ///< Tin
Sb = 51, ///< Anti­mony
Te = 52, ///< Tellurium
I = 53, ///< Iodine
Xe = 54, ///< Xenon
Cs = 55, ///< Cae­sium
Ba = 56, ///< Ba­rium
La = 57, ///< Lan­thanum
Hf = 72, ///< Haf­nium
Ta = 73, ///< Tanta­lum
W = 74, ///< Tung­sten
Re = 75, ///< Rhe­nium
Os = 76, ///< Os­mium
Ir = 77, ///< Iridium
Pt = 78, ///< Plat­inum
Au = 79, ///< Gold
Hg = 80, ///< Mer­cury
Tl = 81, ///< Thallium
Pb = 82, ///< Lead
Bi = 83, ///< Bis­muth
Po = 84, ///< Polo­nium
At = 85, ///< Asta­tine
Rn = 86, ///< Radon
Fr = 87, ///< Fran­cium
Ra = 88, ///< Ra­dium
Ac = 89, ///< Actin­ium
Rf = 104, ///< Ruther­fordium
Db = 105, ///< Dub­nium
Sg = 106, ///< Sea­borgium
Bh = 107, ///< Bohr­ium
Hs = 108, ///< Has­sium
Mt = 109, ///< Meit­nerium
Ds = 110, ///< Darm­stadtium
Rg = 111, ///< Roent­genium
Cn = 112, ///< Coper­nicium
Nh = 113, ///< Nihon­ium
Fl = 114, ///< Flerov­ium
Mc = 115, ///< Moscov­ium
Lv = 116, ///< Liver­morium
Ts = 117, ///< Tenness­ine
Og = 118, ///< Oga­nesson
Ce = 58, ///< Cerium
Pr = 59, ///< Praseo­dymium
Nd = 60, ///< Neo­dymium
Pm = 61, ///< Prome­thium
Sm = 62, ///< Sama­rium
Eu = 63, ///< Europ­ium
Gd = 64, ///< Gadolin­ium
Tb = 65, ///< Ter­bium
Dy = 66, ///< Dyspro­sium
Ho = 67, ///< Hol­mium
Er = 68, ///< Erbium
Tm = 69, ///< Thulium
Yb = 70, ///< Ytter­bium
Lu = 71, ///< Lute­tium
Th = 90, ///< Thor­ium
Pa = 91, ///< Protac­tinium
U = 92, ///< Ura­nium
Np = 93, ///< Neptu­nium
Pu = 94, ///< Pluto­nium
Am = 95, ///< Ameri­cium
Cm = 96, ///< Curium
Bk = 97, ///< Berkel­ium
Cf = 98, ///< Califor­nium
Es = 99, ///< Einstei­nium
Fm = 100, ///< Fer­mium
Md = 101, ///< Mende­levium
No = 102, ///< Nobel­ium
Lr = 103, ///< Lawren­cium
D = 119, ///< Deuterium
};
// --------------------------------------------------------------------
/// An enum used to select the desired radius for an atom.
/// All values are collected from the wikipedia pages on atom radii
enum class radius_type
{
calculated, ///< Calculated radius from theoretical models
empirical, ///< Empirically measured covalent radii
/// @deprecated It is a bit unclear where these values came from. So, better not use them
covalent_empirical,
single_bond, ///< Bond length for a single covalent bond calculated using statistically analysis
double_bond, ///< Bond length for a double covalent bond calculated using statistically analysis
triple_bond, ///< Bond length for a triple covalent bond calculated using statistically analysis
van_der_waals, ///< Radius of an imaginary hard sphere representing the distance of closest approach for another atom
type_count ///< Number of radii
};
/// @brief The number of radii per element which can be requested from atom_type_info
constexpr std::size_t kRadiusTypeCount = static_cast<std::size_t>(radius_type::type_count);
/// An enum used to select either the effective or the crystal radius of an ion.
/// See explanation on Wikipedia: https://en.wikipedia.org/wiki/Ionic_radius
enum class ionic_radius_type
{
effective, ///< Based on distance between ions in a crystal structure as determined by X-ray crystallography
crystal ///< Calculated ion radius based on a function of ionic charge and spin
};
/// Requests for an unknown radius value return kNA
constexpr float kNA = std::numeric_limits<float>::quiet_NaN();
/// A struct holding the known information for all elements defined in atom_type
struct atom_type_info
{
/// The type as an atom_type
atom_type type;
/// The official name for this element
std::string name;
/// The official symbol for this element
std::string symbol;
/// The weight of this element
float weight;
/// A flag indicating whether the element is a metal
bool metal;
/// Array containing all known radii for this element. A value of kNA is
/// stored for unknown values
std::array<float, kRadiusTypeCount> radii;
};
/// Array of atom_type_info struct for each of the defined elements in atom_type
extern CIFPP_EXPORT const atom_type_info kKnownAtoms[];
// --------------------------------------------------------------------
// AtomTypeTraits
/// A traits class to access information for known elements
class atom_type_traits
{
public:
/// Constructor taking an atom_type \a a
atom_type_traits(atom_type a);
/// Constructor based on the element as a string in \a symbol
atom_type_traits(const std::string &symbol);
[[nodiscard]] atom_type type() const { return m_info->type; } ///< Returns the atom_type
[[nodiscard]] std::string name() const { return m_info->name; } ///< Returns the name of the element
[[nodiscard]] std::string symbol() const { return m_info->symbol; } ///< Returns the symbol of the element
[[nodiscard]] float weight() const { return m_info->weight; } ///< Returns the average weight of the element
[[nodiscard]] bool is_metal() const { return m_info->metal; } ///< Returns true if the element is a metal
/// Return true if the symbol in \a symbol actually exists in the list of known elements in atom_type
static bool is_element(const std::string &symbol);
/// Return true if the symbol in \a symbol exists and is a metal
static bool is_metal(const std::string &symbol);
/// @brief Return the radius for the element, use \a type to select which radius to return
/// @param type The selector for which radius to return
/// @return The requested radius or kNA if not known (or applicable)
[[nodiscard]] float radius(radius_type type = radius_type::single_bond) const
{
if (type >= radius_type::type_count)
throw std::invalid_argument("invalid radius requested");
return m_info->radii[static_cast<std::size_t>(type)] / 100.f;
}
/// \brief Return the radius for a charged version of this atom in a solid crystal
///
/// \param charge The charge of the ion
/// \return The radius of the ion
[[nodiscard]] float crystal_ionic_radius(int charge) const;
/// \brief Return the radius for a charged version of this atom in a non-solid environment
///
/// \param charge The charge of the ion
/// \return The radius of the ion
[[nodiscard]] float effective_ionic_radius(int charge) const;
/// \brief Return the radius for a charged version of this atom, returns the effective radius by default
///
/// \param charge The charge of the ion
/// \param type The requested ion radius type
/// \return The radius of the ion
[[nodiscard]] float ionic_radius(int charge, ionic_radius_type type = ionic_radius_type::effective) const
{
return type == ionic_radius_type::effective ? effective_ionic_radius(charge) : crystal_ionic_radius(charge);
}
/**
* @brief data type encapsulating the scattering factors
* in a simplified form (only a and b).
*/
struct SFData
{
/** @cond */
double a[6], b[6];
/** @endcond */
};
/// @brief to get the Cval and Siva scattering factor values, use this constant as charge:
static constexpr int kWKSFVal = -99;
/// @brief Return the Waasmaier & Kirfel scattering factor values for the element
///
/// The coefficients from Waasmaier & Kirfel (1995), Acta Cryst. A51, 416-431.
///
/// @param charge The charge for which the structure values should be returned, use kWSKFVal to return the *Cval* and *Siva* values
/// @return The scattering factors as a SFData struct
[[nodiscard]] const SFData &wksf(int charge = 0) const;
/// @brief Return the electron scattering factor values for the element
///
/// @return The scattering factors as a SFData struct
[[nodiscard]] const SFData &elsf() const;
/// Clipper doesn't like atoms with charges that do not have a scattering factor. And
/// rightly so, but we need to know in advance if this is the case
[[nodiscard]] bool has_sf(int charge) const;
private:
const struct atom_type_info *m_info;
};
} // namespace cif

1334
include/cif++/category.hpp Normal file

File diff suppressed because it is too large Load Diff

353
include/cif++/compound.hpp Normal file
View File

@@ -0,0 +1,353 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020-2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/datablock.hpp"
#include "cif++/exports.hpp"
#include "cif++/point.hpp"
#include <cstdint>
#include <filesystem>
#include <map>
#include <memory>
#include <string>
#include <string_view>
#include <vector>
/// \file compound.hpp
/// This file contains the definition for the class compound, encapsulating
/// the information found for compounds in the CCD.
///
/// The data is loaded by default from a file called `components.cif`. This file
/// is located using load_resource. (See documentation on cif::load_resource for more information)
///
/// Note that since version 6 the CCP4 monomer library is no longer used.
/// See also :doc:`/compound` for more information.
namespace cif
{
// --------------------------------------------------------------------
class compound_factory_impl;
class datablock;
class file;
enum atom_type : uint8_t;
/// \brief The bond type or bond order as defined in the CCD, possible values taken from the mmcif_pdbx file
enum class bond_type
{
sing, ///< single bond
doub, ///< double bond
trip, ///< triple bond
quad, ///< quadruple bond
arom, ///< aromatic bond
poly, ///< polymeric bond
delo, ///< delocalized double bond
pi, ///< pi bond
};
/// @brief return the string representation of @a bondType
std::string bond_type_to_string(bond_type bondType);
/// @brief return the cif::bond_type for the string representation @a bondType
bond_type parse_bond_type_from_string(const std::string &bondType);
/// \brief The possible stereo config values for a compound_atom.
///
/// As the site https://psiberg.com/r-s-nomenclature/ states:
///
/// > RS nomenclature is currently the preferred system for assigning absolute
/// > configuration to chiral molecules. The letters R and S come from the Latin
/// > words Rectus and Sinister meaning right and left. Molecules that
/// > rotate the plane of polarized light to right are referred to as R isomers
/// > and the molecules that rotate the plane of polarized light to left are
/// > referred to S isomers.
enum class stereo_config_type : uint8_t
{
N = 'N', ///< Not polarizing
R = 'R', ///< Rectus
S = 'S' ///< Sinister
};
/// @brief return the string representation of @a stereo_config
std::string to_string(stereo_config_type stereo_config);
/// @brief return the cif::stereo_config_type for the string representation @a stereo_config
stereo_config_type parse_stereo_config_from_string(const std::string &stereo_config);
/// --------------------------------------------------------------------
/// \brief struct containing information about an atom in a chemical compound.
/// This is a subset of the available information. Contact the author if you need more fields.
struct compound_atom
{
std::string id; ///< Identifier for each atom in the chemical component
atom_type type_symbol; ///< The element type for each atom in the chemical component.
int charge = 0; ///< The formal charge assigned to each atom in the chemical component.
bool aromatic = false; ///< Defines atoms in an aromatic moiety
bool leaving_atom = false; ///< Flags atoms with "leaving" capability
stereo_config_type stereo_config = stereo_config_type::N; ///< Defines the stereochemical configuration of the chiral center atom.
float x, ///< The x component of the coordinates for each atom specified as orthogonal angstroms.
y, ///< The y component of the coordinates for each atom specified as orthogonal angstroms.
z; ///< The z component of the coordinates for each atom specified as orthogonal angstroms.
/// Return the location of the atom as a point
[[nodiscard]] point get_location() const
{
return { x, y, z };
}
};
/// --------------------------------------------------------------------
/// \brief struct containing information about the bonds
struct compound_bond
{
std::string atom_id[2]; ///< The ID's of the two atoms that define the bond.
bond_type type; ///< The bond order of the chemical bond associated with the specified atoms.
bool aromatic = false, ///< Defines aromatic bonds.
stereo_config = false; ///< Defines stereochemical bonds.
};
/// --------------------------------------------------------------------
/// \brief a class that contains information about a chemical compound.
/// This information is derived from the CDD by default.
///
/// To create compounds, you use the factory method. You can add your own
/// compound definitions by calling the push_dictionary function and
/// pass it a valid CCD formatted file.
class compound
{
public:
// accessors
[[nodiscard]] std::string id() const { return m_id; } ///< Return the alphanumeric code for the chemical component.
[[nodiscard]] std::string name() const { return m_name; } ///< Return the name of the chemical component.
[[nodiscard]] std::string type() const { return m_type; } ///< Return the type of monomer.
[[nodiscard]] std::string formula() const { return m_formula; } ///< Return the chemical formula of the chemical component.
[[nodiscard]] float formula_weight() const { return m_formula_weight; } ///< Return the formula mass of the chemical component in Daltons.
[[nodiscard]] int formal_charge() const { return m_formal_charge; } ///< Return the formal charge on the chemical component.
[[nodiscard]] const std::vector<compound_atom> &atoms() const { return m_atoms; } ///< Return the list of atoms for this compound
[[nodiscard]] const std::vector<compound_bond> &bonds() const { return m_bonds; } ///< Return the list of bonds for this compound
[[nodiscard]] compound_atom get_atom_by_atom_id(const std::string &atom_id) const; ///< Return the atom with id @a atom_id
[[nodiscard]] bool atoms_bonded(const std::string &atomId_1, const std::string &atomId_2) const; ///< Return true if @a atomId_1 is bonded to @a atomId_2
[[nodiscard]] float bond_length(const std::string &atomId_1, const std::string &atomId_2) const; ///< Return the bond length between @a atomId_1 and @a atomId_2
[[nodiscard]] bool is_water() const ///< Return if the compound is actually a water
{
return m_id == "HOH" or m_id == "H2O" or m_id == "WAT";
}
/** \brief Return whether this compound has a type of either 'peptide linking' or 'L-peptide linking' */
[[nodiscard]] bool is_peptide() const;
/** \brief Return whether this compound has a type of either 'DNA linking' or 'RNA linking' */
[[nodiscard]] bool is_base() const;
[[nodiscard]] char one_letter_code() const { return m_one_letter_code; }; ///< Return the one letter code to use in a canonical sequence. If unknown the value '\0' is returned
[[nodiscard]] std::string parent_id() const { return m_parent_id; }; ///< Return the parent id code in case a parent is specified (e.g. MET for MSE)
private:
friend class compound_factory_impl;
friend class local_compound_factory_impl;
compound(datablock &db);
std::string m_id;
std::string m_name;
std::string m_type;
std::string m_formula;
char m_one_letter_code = 0;
std::string m_parent_id;
float m_formula_weight = 0;
int m_formal_charge = 0;
std::vector<compound_atom> m_atoms;
std::vector<compound_bond> m_bonds;
};
// --------------------------------------------------------------------
// Factory class for compound and Link objects
/// Use the compound_factory singleton instance to create compound objects
class compound_factory
{
public:
compound_factory(const compound_factory &) = delete;
compound_factory &operator=(const compound_factory &) = delete;
/// \brief Initialise a singleton instance.
///
/// If you have a multithreaded application and want to have different
/// compounds in each thread (e.g. a web service processing user requests
/// with different sets of compounds) you can set the \a useThreadLocalInstanceOnly
/// flag to true.
static void init(bool useThreadLocalInstanceOnly);
/// Return the singleton instance. If initialized with local threads, this is the
/// instance for the current thread.
static compound_factory &instance();
/// Delete and reset the singleton instance. If initialized with local threads, this is the
/// instance for the current thread.
static void clear();
/// Set the default dictionary file to @a inDictFile
void set_default_dictionary(const std::filesystem::path &inDictFile);
/// Override any previously loaded dictionary with @a inDictFile
void push_dictionary(const std::filesystem::path &inDictFile);
/** @brief Override any previously loaded dictionary with the data in @a file
*
* @note experimental feature
*
* Load the file @a file as a source for compound information. This may
* be e.g. a regular mmCIF file with extra files containing compound
* information.
*
* Be carefull to remove the block again, best use @ref cif::compound_source
* as a stack based object.
*/
void push_dictionary(const file &file);
/// Remove the last pushed dictionary
void pop_dictionary();
/// Return whether @a res_name is a valid and known peptide
[[deprecated("use is_peptide or is_std_peptide instead)")]]
[[nodiscard]] bool
is_known_peptide(const std::string &res_name) const;
/// Return whether @a res_name is a valid and known base
[[deprecated("use is_base or is_std_base instead)")]]
[[nodiscard]] bool
is_known_base(const std::string &res_name) const;
/// Return whether @a res_name is a peptide
[[nodiscard]] bool is_peptide(std::string_view res_name) const;
/// Return whether @a res_name is a base
[[nodiscard]] bool is_base(std::string_view res_name) const;
/// Return whether @a res_name is one of the standard peptides
[[nodiscard]] bool is_std_peptide(std::string_view res_name) const;
/// Return whether @a res_name is one of the standard bases
[[nodiscard]] bool is_std_base(std::string_view res_name) const;
/// Return whether @a res_name is a monomer (either base or peptide)
[[nodiscard]] bool is_monomer(std::string_view res_name) const;
/// Return whether @a res_name is one of the standard bases or peptides
[[nodiscard]] bool is_std_monomer(std::string_view res_name) const
{
return is_std_base(res_name) or is_std_peptide(res_name);
}
/// Return whether @a res_name is water
[[nodiscard]] bool is_water(std::string_view res_name) const
{
return res_name == "HOH" or res_name == "H2O" or res_name == "WAT";
}
/// Return whether @a res_name already exists, without creating it.
[[nodiscard]] bool exists(std::string_view res_name) const;
/// \brief Create the compound object for \a id
///
/// This will create the compound instance for \a id if it doesn't exist already.
/// The result is owned by this factory and should not be deleted by the user.
/// \param id The compound ID, a three letter code usually
/// \result The compound, or nullptr if it could not be created (missing info)
const compound *create(std::string_view id);
~compound_factory() = default;
CIFPP_EXPORT static const std::map<std::string, char> kAAMap, ///< Globally accessible static list of the default amino acids
kBaseMap; ///< Globally accessible static list of the default bases
void report_missing_compound(std::string_view compound_id);
[[nodiscard]] bool get_report_missing() const { return m_report_missing; }
void set_report_missing(bool report)
{
m_report_missing = report;
}
private:
compound_factory();
static std::unique_ptr<compound_factory> s_instance;
static thread_local std::unique_ptr<compound_factory> tl_instance;
static bool s_use_thread_local_instance;
std::shared_ptr<compound_factory_impl> m_impl;
bool m_report_missing = true;
};
// --------------------------------------------------------------------
/**
* @brief Stack based source for compound info.
*
* Use this class to temporarily add a compound source to the
* compound_factory.
*
* @code{.cpp}
* cif::file f("1cbs-with-custom-rea.cif");
* cif::compound_source cs(f);
*
* auto &cf = cif::compound_factory::instance();
* auto rea_compound = cf.create("REA");
* @endcode
*/
class compound_source
{
public:
compound_source(const file &file)
{
compound_factory::instance().push_dictionary(file);
}
~compound_source()
{
compound_factory::instance().pop_dictionary();
}
};
} // namespace cif

1201
include/cif++/condition.hpp Normal file

File diff suppressed because it is too large Load Diff

442
include/cif++/cql.hpp Normal file
View File

@@ -0,0 +1,442 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2025 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/category.hpp"
#include "cif++/item.hpp"
#include "cif++/iterator.hpp"
#include "cif++/row.hpp"
#include <cstddef>
#include <cstdint>
#include <iterator>
#include <memory>
#include <ostream>
#include <stdexcept>
#include <string>
#include <string_view>
#include <utility>
// --------------------------------------------------------------------
namespace cif::cql
{
class connection;
struct result_impl;
// --------------------------------------------------------------------
class field_ref final
{
public:
[[nodiscard]] std::string_view name() const &
{
return m_row.get_category().get_item_name(m_index);
}
[[nodiscard]] constexpr size_t num() const noexcept
{
return m_index;
}
/** Return the contents of this item as type @tparam T */
template <typename T = std::string>
[[nodiscard]] auto as() const -> T
{
return m_row[m_index].as<T>();
}
[[nodiscard]] bool is_null() const
{
return m_row[m_index].is_null();
}
/** Return the contents of this item as type @tparam T or, if not
* set, use @a dv as the default value.
*/
template <typename T>
auto value_or(const T &dv) const
{
return m_row[m_index].value_or(dv);
}
field_ref(const_row_handle rh, uint16_t col, std::shared_ptr<result_impl> result_impl)
: m_row(std::move(rh))
, m_index(col)
, m_result_impl(std::move(result_impl))
{
}
field_ref(const field_ref &) = default;
field_ref(field_ref &&) = default;
field_ref &operator=(const field_ref &) = default;
field_ref &operator=(field_ref &&) = default;
private:
const_row_handle m_row;
uint16_t m_index;
std::shared_ptr<result_impl> m_result_impl;
};
// --------------------------------------------------------------------
class row_ref final
{
public:
class const_field_iterator
{
public:
friend class result;
using iterator_category = std::forward_iterator_tag;
using value_type = const field_ref;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
using reference = value_type &;
const_field_iterator(const const_field_iterator &) = default;
const_field_iterator(const_field_iterator &&) = default;
const_field_iterator &operator=(const const_field_iterator &) = default;
const_field_iterator &operator=(const_field_iterator &&) = default;
reference operator*()
{
return m_current;
}
pointer operator->()
{
return &m_current;
}
const_field_iterator &operator++()
{
if (m_row)
{
++m_col;
m_current = field_ref(m_row, m_col, m_result_impl);
}
return *this;
}
const_field_iterator operator++(int)
{
const_field_iterator result(*this);
this->operator++();
return result;
}
bool operator==(const const_field_iterator &rhs) const
{
return m_row == rhs.m_row and m_col == rhs.m_col;
}
bool operator!=(const const_field_iterator &rhs) const
{
return m_row != rhs.m_row or m_col != rhs.m_col;
}
private:
friend class row_ref;
const_field_iterator(const_row_handle row, uint16_t column, std::shared_ptr<result_impl> result_impl)
: m_row(std::move(row))
, m_col(column)
, m_current(m_row, m_col, result_impl)
, m_result_impl(result_impl)
{
}
const_row_handle m_row;
uint16_t m_col;
field_ref m_current;
std::shared_ptr<result_impl> m_result_impl;
};
// --------------------------------------------------------------------
row_ref() = default;
row_ref(const_row_handle rh, std::shared_ptr<result_impl> result_impl)
: m_row(std::move(rh))
, m_result_impl(std::move(result_impl))
{
}
row_ref(const row_ref &) = default;
row_ref &operator=(const row_ref &) = default;
// --------------------------------------------------------------------
[[nodiscard]] const_field_iterator cbegin() const noexcept { return { m_row, 0, m_result_impl }; }
[[nodiscard]] const_field_iterator begin() const noexcept { return { m_row, 0, m_result_impl }; }
[[nodiscard]] const_field_iterator cend() const noexcept { return { m_row, static_cast<uint16_t>(size()), m_result_impl }; }
[[nodiscard]] const_field_iterator end() const noexcept { return { m_row, static_cast<uint16_t>(size()), m_result_impl }; }
[[nodiscard]] field_ref front() const noexcept { return { m_row, 0, m_result_impl }; }
[[nodiscard]] field_ref back() const noexcept { return { m_row, static_cast<uint16_t>(size() - 1), m_result_impl }; }
[[nodiscard]] size_t size() const noexcept;
[[nodiscard]] bool empty() const noexcept { return size() == 0; }
[[nodiscard]] field_ref operator[](uint16_t index) const noexcept { return { m_row, index, m_result_impl }; }
[[nodiscard]] field_ref operator[](std::string_view name) const;
// --------------------------------------------------------------------
bool operator==(const row_ref &rhs) const { return m_row == rhs.m_row; }
bool operator!=(const row_ref &rhs) const { return m_row != rhs.m_row; }
private:
const_row_handle m_row;
std::shared_ptr<result_impl> m_result_impl;
};
// --------------------------------------------------------------------
class result
{
public:
// --------------------------------------------------------------------
class iterator
{
public:
friend class view;
using iterator_category = std::forward_iterator_tag;
using value_type = const row_ref;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
using reference = value_type &;
// const_row_iterator() = default;
iterator(std::shared_ptr<result_impl> result_impl, category::const_iterator cat_iter)
: m_iter(std::move(cat_iter))
, m_current(*m_iter, result_impl)
, m_result_impl(result_impl)
{
}
iterator(const iterator &) = default;
iterator(iterator &&) = default;
// const_row_iterator &operator=(const const_row_iterator &) = default;
// const_row_iterator &operator=(const_row_iterator &&) = default;
reference operator*()
{
return m_current;
}
pointer operator->()
{
return &m_current;
}
iterator &operator++()
{
++m_iter;
m_current = { *m_iter, m_result_impl };
return *this;
}
iterator operator++(int)
{
iterator result(*this);
this->operator++();
return result;
}
bool operator==(const iterator &rhs) const
{
return m_result_impl == rhs.m_result_impl and m_iter == rhs.m_iter;
}
bool operator!=(const iterator &rhs) const
{
return m_result_impl != rhs.m_result_impl or m_iter != rhs.m_iter;
}
private:
category::const_iterator m_iter;
row_ref m_current;
std::shared_ptr<result_impl> m_result_impl;
};
// --------------------------------------------------------------------
result() = delete;
result(result const &rhs) noexcept = default;
result(result &&rhs) noexcept = default;
result &operator=(result const &rhs) noexcept = default;
result &operator=(result &&rhs) noexcept = default;
result(category &&data, const std::string &query = "");
~result() = default;
[[nodiscard]] row_ref one_row() const
{
if (size() != 1)
throw std::runtime_error("Expected one row");
return front();
}
[[nodiscard]] field_ref one_field() const
{
expect_columns(1);
if (size() != 1)
throw std::runtime_error("Expected one row");
return one_row().front();
}
// --------------------------------------------------------------------
[[nodiscard]] iterator begin() const noexcept;
[[nodiscard]] iterator cbegin() const noexcept;
[[nodiscard]] iterator end() const noexcept;
[[nodiscard]] iterator cend() const noexcept;
[[nodiscard]] row_ref front() const;
[[nodiscard]] row_ref back() const;
[[nodiscard]] size_t size() const noexcept;
[[nodiscard]] bool empty() const noexcept { return size() == 0; }
[[nodiscard]] size_t column_count() const;
[[nodiscard]] category &get_category() const;
void expect_columns(size_t cols) const
{
if (auto actual = column_count(); size() > 0 and cols != actual)
throw std::runtime_error("Unexpected number of columns");
}
// --------------------------------------------------------------------
friend std::ostream &operator<<(std::ostream &os, const result &r)
{
os << r.get_category();
return os;
}
private:
friend class transaction;
friend class SelectStatement;
std::shared_ptr<result_impl> m_impl;
};
// --------------------------------------------------------------------
template <typename... Ts>
class cql_iterator_proxy : public cif::iterator_proxy<Ts...>
{
public:
cql_iterator_proxy(result &&res)
: cif::iterator_proxy<Ts...>(res.get_category())
, m_result(std::forward<result>(res))
{
m_result.expect_columns(cif::iterator_proxy<Ts...>::N);
}
private:
result m_result;
};
// --------------------------------------------------------------------
class transaction final
{
public:
transaction(connection &conn);
~transaction();
transaction(const transaction &) = delete;
transaction &operator=(const transaction &) = delete;
/// \brief Execute the sql in @a query returning an iterable result
result exec(std::string query);
/// \brief Execute the sql in @a query returning an iterable result.
/// Updates @a tail with what remains after the first statement in @a query
result exec(std::string query, std::string &tail);
template <typename... Ts>
cql_iterator_proxy<Ts...> stream(const std::string &sql)
{
return cql_iterator_proxy<Ts...>{ exec(sql) };
}
void commit();
void rollback();
private:
connection &m_conn;
bool m_transaction_active = false;
};
// --------------------------------------------------------------------
class connection final
{
public:
connection(datablock &db);
~connection();
friend class transaction;
/// \brief Return true if the string @a sql contains a complete statement.
[[nodiscard]] bool is_complete_statement(const std::string &sql) const;
/// \brief Execute the sql in @a query returning an iterable result
result exec(std::string query);
/// \brief Execute the sql in @a query returning an iterable result.
/// Updates @a tail with what remains after the first statement in @a query
result exec(std::string query, std::string &tail);
/// \brief Return true if the underlying data was modified by any query.
[[nodiscard]] bool is_modified() const;
private:
struct connection_impl *m_impl;
};
} // namespace cif::cql

256
include/cif++/datablock.hpp Normal file
View File

@@ -0,0 +1,256 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/category.hpp"
#include <iosfwd>
#include <list>
#include <string>
#include <string_view>
#include <tuple>
#include <utility>
#include <vector>
/** \file datablock.hpp
* Each valid mmCIF file contains at least one @ref cif::datablock.
* A datablock has a name and can contain one or more @ref cif::category "categories"
*/
namespace cif
{
class validator;
// --------------------------------------------------------------------
/**
* @brief A datablock is a list of category objects with some additional features
*
*/
class datablock : public std::list<category>
{
public:
datablock() = default;
/**
* @brief Construct a new datablock object with name @a name
*
* @param name The name for the new datablock
*/
datablock(std::string_view name)
: m_name(name)
{
}
/** @cond */
datablock(const datablock &);
datablock(datablock &&db) noexcept
{
swap_(*this, db);
}
datablock &operator=(datablock db)
{
swap_(*this, db);
return *this;
}
/** @endcond */
friend void swap_(datablock &a, datablock &b) noexcept
{
std::swap(a.m_name, b.m_name);
std::swap(a.m_validator, b.m_validator);
std::swap(static_cast<std::list<category> &>(a), static_cast<std::list<category> &>(b));
}
// --------------------------------------------------------------------
/**
* @brief Return the name of this datablock
*/
[[nodiscard]] const std::string &name() const { return m_name; }
/**
* @brief Set the name of this datablock to @a name
*
* @param name The new name
*/
void set_name(std::string_view name)
{
m_name = name;
}
/**
* @brief Attempt to load the dictionary specified in audit_conform category
*
*/
void load_dictionary();
/**
* @brief Attempt to load the dictionary @a dict
*
*/
void load_dictionary(std::string_view dict);
/**
* @brief Set the validator object to @a v
*
* @param v The new validator object, may be null
*/
void set_validator(const validator *v);
/**
* @brief Get the validator object
*
* @return const validator* The validator or nullptr if there is none
*/
[[nodiscard]] const validator *get_validator() const;
/**
* @brief Validates the content of this datablock and all its content
*
* @return true If the content is valid
* @return false If the content is not valid
*/
[[nodiscard]] bool is_valid() const;
/**
* @brief Validates all contained data for valid links between parents and children
* as defined in the validator
*
* @return true If all links are valid
* @return false If all links are not valid
*/
[[nodiscard]] bool validate_links() const;
/**
* @brief Strip removes all categories and items that are invalid according
* to the assigned validator. Will also add a valid audit_conform block.
*
* @return true if the remaining datablock is valid
*/
bool strip();
// --------------------------------------------------------------------
/**
* @brief Return the category named @a name, will create a new and empty
* category named @a name if it does not exist.
*
* @param name The name of the category to return
* @return category& Reference to the named category
*/
category &operator[](std::string_view name);
/**
* @brief Return the const category named @a name, will return a reference
* to a static empty category if it was not found.
*
* @param name The name of the category to return
* @return category& Reference to the named category
*/
const category &operator[](std::string_view name) const;
/**
* @brief Return a pointer to the category named @a name or nullptr if
* it does not exist.
*
* @param name The name of the category
* @return category* Pointer to the category found or nullptr
*/
category *get(std::string_view name);
/**
* @brief Return a pointer to the category named @a name or nullptr if
* it does not exist.
*
* @param name The name of the category
* @return category* Pointer to the category found or nullptr
*/
[[nodiscard]] const category *get(std::string_view name) const;
/**
* @brief Return true if this datablock contains a non-empty category
*/
[[nodiscard]] bool contains(std::string_view name) const
{
return get(name) != nullptr;
}
/**
* @brief Tries to find a category with name @a name and will create a
* new one if it is not found. The result is a tuple of an iterator
* pointing to the category and a boolean indicating whether the category
* was created or not.
*
* @param name The name for the category
* @return std::tuple<iterator, bool> A tuple containing an iterator pointing
* at the category and a boolean indicating whether the category was newly
* created.
*/
std::tuple<iterator, bool> emplace(std::string_view name);
/**
* @brief Get the preferred order of the categories when writing them
*/
[[nodiscard]] std::vector<std::string> get_item_order() const;
/**
* @brief Write out the contents to @a os
*/
void write(std::ostream &os) const;
/**
* @brief Write out the contents to @a os using the order defined in @a item_name_order
*/
void write(std::ostream &os, const std::vector<std::string> &item_name_order);
/**
* @brief Friend operator<< to write datablock @a db to std::ostream @a os
*/
friend std::ostream &operator<<(std::ostream &os, const datablock &db)
{
db.write(os);
return os;
}
// --------------------------------------------------------------------
/**
* @brief Comparison operator to compare two datablock for equal content
*/
bool operator==(const datablock &rhs) const;
private:
std::string m_name;
const validator *m_validator = nullptr;
};
} // namespace cif

View File

@@ -1,17 +1,17 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -26,14 +26,23 @@
#pragma once
#include "cif++/Cif++.hpp"
#include "cif++/validate.hpp"
#include <iosfwd>
void WritePDBFile(std::ostream& pdbFile, cif::File& cifFile);
/**
* @file validate.hpp
*
* Functions to create and manipulate validator objects
*/
/// \brief Just the HEADER, COMPND, SOURCE and AUTHOR lines
void WritePDBHeaderLines(std::ostream& os, cif::File& cifFile);
namespace cif
{
std::string GetPDBHEADERLine(cif::File& cifFile, std::string::size_type truncate_at = 127);
std::string GetPDBCOMPNDLine(cif::File& cifFile, std::string::size_type truncate_at = 127);
std::string GetPDBSOURCELine(cif::File& cifFile, std::string::size_type truncate_at = 127);
std::string GetPDBAUTHORLine(cif::File& cifFile, std::string::size_type truncate_at = 127);
class validator;
/**
* @brief Parse the contents of @a is and place content in validator @a v
*/
void parse_dictionary(validator &v, std::istream &is);
} // namespace cif

228
include/cif++/file.hpp Normal file
View File

@@ -0,0 +1,228 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/datablock.hpp"
#include <cassert>
#include <cstddef>
#include <filesystem>
#include <istream>
#include <list>
#include <string_view>
#include <tuple>
/** \file file.hpp
*
* The file class defined here encapsulates the contents of an mmCIF file
* It is mainly a list of @ref cif::datablock objects
*
* The class file has methods to load dictionaries. These dictionaries are
* loaded from resources (if available) or from disk from several locations.
*
* See the documentation on load_resource() in file: utilities.hpp for more
* information on how data is loaded.
*/
namespace cif
{
// --------------------------------------------------------------------
/**
* @brief The class file is actually a list of datablock objects
*
*/
class file : public std::list<datablock>
{
public:
file() = default;
/**
* @brief Construct a new file object using the data in the file @a p as content
*
* @param p Path to a file containing the data to load
*/
explicit file(const std::filesystem::path &p)
{
load(p);
}
/**
* @brief Construct a new file object using the data in the std::istream @a is
*
* @param is The istream containing the data to load
*/
explicit file(std::istream &is)
{
load(is);
}
/**
* @brief Construct a new file object with data in the constant string defined
* by @a data and @a length
*
* @param data The pointer to the character string with data to load
* @param length The length of the data
*/
explicit file(const char *data, std::size_t length)
{
struct membuf : public std::streambuf
{
membuf(char *text, std::size_t length)
{
this->setg(text, text, text + length);
}
} buffer(const_cast<char *>(data), length);
std::istream is(&buffer);
load(is);
}
/** @cond */
file(const file &rhs) // NOLINT
: std::list<datablock>(rhs)
{
}
file(file &&rhs)
{
this->swap(rhs);
}
file &operator=(file f)
{
this->swap(f);
return *this;
}
/** @endcond */
/**
* @brief Validate the content and return true if everything was valid.
*
* Will throw an exception if there is no validator defined.
*
* If each category was valid, validate_links will also be called.
*
* @return true If the content is valid
* @return false If the content is not valid
*/
[[nodiscard]] bool is_valid() const;
/**
* @brief Validate the content and return true if everything was valid.
*
* Will attempt to load the referenced dictionary if none was specified.
*
* If each category was valid, validate_links will also be called.
*
* @return true If the content is valid
* @return false If the content is not valid
*/
bool is_valid();
/**
* @brief Validate the links for all datablocks contained.
*
* Will throw an exception if no validator was specified.
*
* @return true If all links were valid
* @return false If all links were not valid
*/
[[nodiscard]] bool validate_links() const;
/**
* @brief Return true if a datablock with the name @a name is part of this file
*/
[[nodiscard]] bool contains(std::string_view name) const;
/**
* @brief return a reference to the first datablock in the file
*/
datablock &front()
{
assert(not empty());
return std::list<datablock>::front();
}
/**
* @brief return a const reference to the first datablock in the file
*/
[[nodiscard]] const datablock &front() const
{
assert(not empty());
return std::list<datablock>::front();
}
/**
* @brief return a reference to the datablock named @a name
*/
datablock &operator[](std::string_view name);
/**
* @brief return a const reference to the datablock named @a name
*/
const datablock &operator[](std::string_view name) const;
/**
* @brief Tries to find a datablock with name @a name and will create a
* new one if it is not found. The result is a tuple of an iterator
* pointing to the datablock and a boolean indicating whether the datablock
* was created or not.
*
* @param name The name for the datablock
* @return std::tuple<iterator, bool> A tuple containing an iterator pointing
* at the datablock and a boolean indicating whether the datablock was newly
* created.
*/
std::tuple<iterator, bool> emplace(std::string_view name);
/** Load the data from the file specified by @a p */
void load(const std::filesystem::path &p);
/** Load the data from @a is */
void load(std::istream &is);
/** Save the data to the file specified by @a p */
void save(const std::filesystem::path &p) const;
/** Save the data to @a is */
void save(std::ostream &os) const;
/**
* @brief Friend operator<< to write file @a f to std::ostream @a os
*/
friend std::ostream &operator<<(std::ostream &os, const file &f)
{
f.save(os);
return os;
}
};
} // namespace cif

126
include/cif++/format.hpp Normal file
View File

@@ -0,0 +1,126 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <ostream>
#include <streambuf>
/** \file format.hpp
*
* Now using std::format instead of a home grown rip off
*/
namespace cif
{
// --------------------------------------------------------------------
/// A streambuf that fills out lines with spaces up until a specified width
class fill_out_streambuf : public std::streambuf
{
public:
/** @cond */
using base_type = std::streambuf;
using int_type = base_type::int_type;
using char_type = base_type::char_type;
using traits_type = base_type::traits_type;
/** @endcond */
/**
* @brief Construct a new fill out streambuf object based on ostream @a os and a
* width to fill out to of @a width
*/
fill_out_streambuf(std::ostream &os, int width = 80)
: m_os(os)
, m_upstream(os.rdbuf())
, m_width(width)
{
}
/** @cond */
~fill_out_streambuf() override
{
m_os.rdbuf(m_upstream);
}
/** @endcond */
/**
* @brief The magic happens here. Write out a couple of spaces when
* the last character to write is a newline to make the line as
* wide as the requested width.
*/
int_type overflow(int_type ic = traits_type::eof()) override
{
char ch = traits_type::to_char_type(ic);
int_type result = ic;
if (ch == '\n')
{
for (int i = m_column_count; result != traits_type::eof() and i < m_width; ++i)
result = m_upstream->sputc(' ');
}
if (result != traits_type::eof())
result = m_upstream->sputc(ch);
if (result != traits_type::eof())
{
if (ch == '\n')
{
m_column_count = 0;
++m_line_count;
}
else
++m_column_count;
}
return result;
}
/** Return the upstream streambuf */
[[nodiscard]] std::streambuf *get_upstream() const { return m_upstream; }
/** Return how many lines have been written */
[[nodiscard]] int get_line_count() const { return m_line_count; }
private:
std::ostream &m_os;
std::streambuf *m_upstream;
int m_width;
int m_line_count = 0;
int m_column_count = 0;
};
} // namespace pdbx

1116
include/cif++/gzio.hpp Normal file

File diff suppressed because it is too large Load Diff

887
include/cif++/item.hpp Normal file
View File

@@ -0,0 +1,887 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/text.hpp"
#include <algorithm>
#include <cassert>
#include <cstdint>
#include <cstring>
#include <iostream>
#include <optional>
#include <stdexcept>
#include <string>
#include <string_view>
#include <system_error>
#include <type_traits>
#include <utility>
#include <version>
/** \file item.hpp
*
* This file contains the declaration of item but also the item_value and item_handle
* These handle the storage of and access to the data for a single data item.
*/
namespace cif
{
class category;
class row;
// --------------------------------------------------------------------
/** @brief item is a transient class that is used to pass data into rows
* but it also takes care of formatting data.
*
*
*
* The class cif::item is often used implicitly when creating a row in a category
* using the emplace function.
*
* @code{.cpp}
* cif::category cat("my-cat");
* cat.emplace({
* { "item-1", 1 }, // <- stores an item with value 1
* { "item-2", 1.0, 2 }, // <- stores an item with value 1.00
* { "item-3", std::optional<int>() }, // <- stores an item with value ?
* { "item-4", std::make_optional<int>(42) }, // <- stores an item with value 42
* { "item-5" } // <- stores an item with value .
* });
*
* std::cout << cat << '\n';
* @endcode
*
* Will result in:
*
* @code{.txt}
* _my-cat.item-1 1
* _my-cat.item-2 1.00
* _my-cat.item-3 ?
* _my-cat.item-4 42
* _my-cat.item-5 .
* @endcode
*/
enum class item_value_type
{
INT,
FLOAT,
TEXT,
INAPPLICABLE,
MISSING
};
template <typename T>
concept IntegralType = (std::is_integral_v<std::remove_cvref_t<T>>);
template <typename T>
concept FloatType = std::is_floating_point_v<std::remove_cvref_t<T>>;
template <typename T>
concept StringType = (std::is_assignable_v<std::string, T> and not std::is_integral_v<T> and not std::is_floating_point_v<T>);
// --------------------------------------------------------------------
/// \cond
template <typename T>
inline constexpr bool is_optional_v = false;
template <typename T>
inline constexpr bool is_optional_v<std::optional<T>> = true;
/// \endcond
class item_value
{
public:
item_value() noexcept
{
m_data.m_type = item_value_type::MISSING;
}
item_value(item_value_type type) noexcept
: m_data(type)
{
}
item_value(const item_value &rhs)
{
m_data.m_type = rhs.m_data.m_type;
switch (m_data.m_type)
{
case item_value_type::INT:
m_data.m_value = rhs.m_data.m_value.m_integer;
break;
case item_value_type::FLOAT:
m_data.m_len = rhs.m_data.m_len;
m_data.m_value = rhs.m_data.m_value.m_float;
break;
case item_value_type::TEXT:
m_data.m_len = rhs.m_data.m_len;
m_data.m_value = rhs.m_data.sv();
break;
default: break;
}
}
item_value(std::nullptr_t)
{
m_data.m_type = item_value_type::MISSING;
}
item_value(std::string_view s)
{
if (s == ".")
m_data.m_type = item_value_type::INAPPLICABLE;
else if (s == "?")
m_data.m_type = item_value_type::MISSING;
else
{
m_data.m_type = item_value_type::TEXT;
m_data.m_len = s.length();
m_data.m_value = s;
}
}
template <size_t N>
item_value(const char(s)[N])
: item_value(std::string_view{ s, N })
{
}
item_value(const char *s)
: item_value(std::string_view{ s })
{
}
item_value(const std::string &s)
: item_value(std::string_view{ s })
{
}
template <IntegralType T>
item_value(T v)
{
m_data.m_type = item_value_type::INT;
m_data.m_value = static_cast<int64_t>(v);
}
template <FloatType T>
item_value(T v, int precision = 0)
{
m_data.m_type = item_value_type::FLOAT;
m_data.m_value = static_cast<double>(v);
m_data.m_len = precision;
}
template <typename T>
item_value(std::optional<T> v)
{
if (v.has_value())
{
item_value iv{ *v };
swap(*this, iv);
}
else
m_data.m_type = item_value_type::MISSING;
}
item_value(item_value &&rhs) noexcept
{
swap(*this, rhs);
}
item_value &operator=(item_value rhs) noexcept
{
swap(*this, rhs);
return *this;
}
// --------------------------------------------------------------------
[[nodiscard]] constexpr bool is_inapplicable() const noexcept { return m_data.m_type == item_value_type::INAPPLICABLE; }
[[nodiscard]] constexpr bool is_missing() const noexcept { return m_data.m_type == item_value_type::MISSING; }
[[nodiscard]] constexpr bool is_null() const noexcept { return is_inapplicable() or is_missing(); }
[[nodiscard]] constexpr bool is_string() const noexcept { return m_data.m_type == item_value_type::TEXT; }
[[nodiscard]] constexpr bool is_number_int() const noexcept { return m_data.m_type == item_value_type::INT; }
[[nodiscard]] constexpr bool is_number_float() const noexcept { return m_data.m_type == item_value_type::FLOAT; }
[[nodiscard]] constexpr bool is_number() const noexcept { return is_number_int() or is_number_float(); }
[[nodiscard]] constexpr item_value_type type() const { return m_data.m_type; }
explicit operator bool() const noexcept
{
bool result = false;
switch (m_data.m_type)
{
case item_value_type::INT: result = m_data.m_value.m_integer != 0; break;
case item_value_type::FLOAT: result = m_data.m_value.m_float != 0; break;
case item_value_type::TEXT: result = m_data.m_len != 0; break;
case item_value_type::INAPPLICABLE:
case item_value_type::MISSING: result = false; break;
}
return result;
}
[[nodiscard]] bool empty() const noexcept
{
switch (m_data.m_type)
{
case item_value_type::INAPPLICABLE:
case item_value_type::MISSING:
return true;
case item_value_type::TEXT:
return m_data.sv().empty();
default:
return false;
}
}
// --------------------------------------------------------------------
template <StringType T>
[[nodiscard]] inline std::string get() const
{
return str();
}
template <IntegralType T>
[[nodiscard]] std::remove_cvref_t<T> get() const
{
static_assert(not std::is_same_v<std::remove_cvref_t<T>, bool>, "bool is no longer supported");
switch (m_data.m_type)
{
case item_value_type::INT:
return m_data.m_value.m_integer;
case item_value_type::FLOAT:
return m_data.m_value.m_float;
case item_value_type::TEXT:
{
auto sv = m_data.sv();
int64_t v;
auto &&[ptr, ec] = from_chars(sv.data(), sv.data() + sv.length(), v);
if (ec != std::errc{})
throw std::system_error(std::make_error_code(ec));
if (ptr != sv.data() + sv.length())
throw std::invalid_argument("String value does not contain only an integer");
return v;
}
default:
return not empty();
}
}
template <FloatType T>
[[nodiscard]] std::remove_cvref_t<T> get() const
{
switch (m_data.m_type)
{
case item_value_type::INT:
return m_data.m_value.m_integer;
case item_value_type::FLOAT:
return m_data.m_value.m_float;
case item_value_type::TEXT:
{
auto sv = m_data.sv();
double v;
auto &&[ptr, ec] = from_chars(sv.data(), sv.data() + sv.length(), v);
if (ec != std::errc{})
throw std::system_error(std::make_error_code(ec));
if (ptr != sv.data() + sv.length())
throw std::invalid_argument("String value does not contain only a floating point number");
return v;
}
default:
return not empty();
}
}
template <typename T>
requires is_optional_v<T>
[[nodiscard]] auto get() const
{
switch (m_data.m_type)
{
case item_value_type::INAPPLICABLE:
case item_value_type::MISSING:
return T{};
default:
{
auto v = get<typename T::value_type>();
return T{ v };
}
}
}
[[nodiscard]] std::string str() const;
[[nodiscard]] const std::string_view sv() const
{
assert(m_data.m_type == cif::item_value_type::TEXT);
return m_data.sv();
}
// --------------------------------------------------------------------
friend void swap(item_value &a, item_value &b) noexcept
{
std::swap(a.m_data.m_type, b.m_data.m_type);
std::swap(a.m_data.m_len, b.m_data.m_len);
std::swap(a.m_data.m_value, b.m_data.m_value);
}
// --------------------------------------------------------------------
// std::partial_ordering operator<=>(const item_value &rhs) const
// {
// if (m_data.m_type == rhs.m_data.m_type)
// {
// switch (m_data.m_type)
// {
// case item_value_type::INT: return m_data.m_value.m_integer <=> rhs.m_data.m_value.m_integer;
// case item_value_type::FLOAT: return m_data.m_value.m_float <=> rhs.m_data.m_value.m_float;
// case item_value_type::TEXT: return m_data.sv() <=> rhs.m_data.sv();
// case item_value_type::MISSING:
// case item_value_type::EMPTY: return std::strong_ordering::equivalent;
// }
// }
// else
// return m_data.m_type <=> rhs.m_data.m_type;
// }
bool operator==(const item_value &rhs) const
{
if (m_data.m_type == rhs.m_data.m_type)
{
switch (m_data.m_type)
{
case item_value_type::INT: return m_data.m_value.m_integer == rhs.m_data.m_value.m_integer;
case item_value_type::FLOAT: return m_data.m_value.m_float == rhs.m_data.m_value.m_float;
case item_value_type::TEXT: return m_data.sv() == rhs.m_data.sv();
case item_value_type::INAPPLICABLE:
case item_value_type::MISSING: return true;
}
}
return false;
}
[[nodiscard]] int compare(const item_value &b, bool ignore_case = false) const noexcept;
friend std::ostream &operator<<(std::ostream &os, const item_value &v);
private:
union value
{
int64_t m_integer{};
double m_float;
char m_local_str[8];
char *m_str;
value() = default;
value(int64_t v)
: m_integer(v)
{
}
value(double v)
: m_float(v)
{
}
value(std::string_view s)
{
if (s.length() >= sizeof(m_local_str))
{
m_str = new char[s.length() + 1];
std::copy(s.data(), s.data() + s.length(), m_str);
m_str[s.length()] = 0;
}
else
std::memcpy(m_local_str, s.data(), s.length() + 1);
}
value(item_value_type t)
{
m_integer = 0;
}
void destroy(item_value_type t, size_t len)
{
if (t == item_value_type::TEXT and len >= sizeof(m_local_str))
delete[] m_str;
}
};
struct data
{
item_value_type m_type = item_value_type::MISSING;
uint32_t m_len{};
value m_value{};
data(item_value_type t)
: m_type(t)
, m_value(t)
{
}
data() noexcept = default;
data(data &&rhs) noexcept
{
std::swap(m_type, rhs.m_type);
std::swap(m_len, rhs.m_len);
std::swap(m_value, rhs.m_value);
}
data(const data &) noexcept = delete;
data &operator=(data &&) noexcept = delete;
data &operator=(const data &) noexcept = delete;
~data()
{
m_value.destroy(m_type, m_len);
}
[[nodiscard]] std::string_view sv() const noexcept
{
assert(m_type == item_value_type::TEXT);
return m_type == item_value_type::TEXT ? std::string_view(m_len >= sizeof(m_value.m_local_str) ? m_value.m_str : m_value.m_local_str, m_len) : std::string_view{};
}
[[nodiscard]] const char *c_str() const noexcept
{
assert(m_type == item_value_type::TEXT);
return m_type == item_value_type::TEXT ? (m_len >= sizeof(m_value.m_local_str) ? m_value.m_str : m_value.m_local_str) : nullptr;
}
} m_data{};
};
static_assert(sizeof(item_value) == 16, "item_value should be 16 bytes");
class item
{
public:
/// \brief Default constructor, empty item
item() = default;
/// \brief constructor for an item with name \a name and as
/// content the character '.', i.e. an inapplicable value.
item(std::string name)
: m_name(std::move(name))
, m_value(item_value_type::MISSING)
{
}
item(std::string name, item_value value)
: m_name(std::move(name))
, m_value(std::move(value))
{
}
/** @cond */
item(const item &rhs) = default;
item(item &&rhs)
{
swap(*this, rhs);
}
item &operator=(item rhs) noexcept
{
swap(*this, rhs);
return *this;
}
/** @endcond */
friend void swap(item &a, item &b) noexcept
{
std::swap(a.m_name, b.m_name);
std::swap(a.m_value, b.m_value);
}
[[nodiscard]] const std::string &name() const { return m_name; } ///< Return the name of the item
[[nodiscard]] const item_value &value() const & { return m_value; } ///< Return the value of the item
item_value &value() & { return m_value; } ///< Return the value of the item
/// \brief replace the content of the stored value with \a v
void value(item_value v) { m_value = std::move(v); }
/// \brief empty means either null or unknown
[[nodiscard]] bool empty() const { return m_value.empty(); }
/// \brief returns true if the item contains '.' or '?'
[[nodiscard]] bool is_null() const { return m_value.is_null(); }
/// \brief returns true if the item contains '?'
[[nodiscard]] bool is_unknown() const { return m_value.is_missing(); }
// /// \brief the length of the value string
// std::size_t length() const { return m_value.length(); }
/// \brief support for structured binding
template <std::size_t N>
decltype(auto) get() const
{
if constexpr (N == 0)
return name();
else if constexpr (N == 1)
return value();
}
// auto operator<=>(const item &rhs) const = default;
private:
std::string m_name;
item_value m_value;
};
// --------------------------------------------------------------------
/// \brief This is item_handle, it is used to access the data stored in
/// item_value's in rows
struct item_handle
{
public:
item_handle() = delete;
/**
* @brief Assign value @a value to the item referenced
*
* @tparam T Type of the value
* @param value The value
* @return reference to this item_handle
*/
item_handle &operator=(item_value value)
{
set(std::move(value), true);
return *this;
}
[[nodiscard]] item_value &value();
[[nodiscard]] const item_value &value() const;
[[nodiscard]] bool is_inapplicable() const noexcept { return value().type() == item_value_type::INAPPLICABLE; }
[[nodiscard]] bool is_missing() const noexcept { return value().type() == item_value_type::MISSING; }
[[nodiscard]] bool is_null() const noexcept { return is_inapplicable() or is_missing(); }
[[nodiscard]] bool is_string() const noexcept { return value().type() == item_value_type::TEXT; }
[[nodiscard]] bool is_number_int() const noexcept { return value().type() == item_value_type::INT; }
[[nodiscard]] bool is_number_float() const noexcept { return value().type() == item_value_type::FLOAT; }
[[nodiscard]] bool is_number() const noexcept { return is_number_int() or is_number_float(); }
[[nodiscard]] auto type() const { return value().type(); }
template <typename T>
[[nodiscard]] auto get() const
{
if (empty())
return T{};
else
return value().template get<T>();
}
template <typename T>
[[nodiscard]] auto as() const
{
if (empty())
return T{};
else
return value().template get<T>();
}
[[nodiscard]] auto str() const
{
return value().str();
}
[[nodiscard]] auto sv() const
{
return value().sv();
}
/** Swap contents of @a a and @a b */
friend void swap(item_handle &a, item_handle &b) noexcept;
/** Return the contents of this item as type @tparam T or, if not
* set, use @a dv as the default value.
*/
template <typename T>
[[nodiscard]] auto value_or(const T &dv) const
{
return empty() ? dv : this->get<T>();
}
/**
* @brief Compare the contents of this item with value @a value
* optionally ignoring character case, if @a icase is true.
* Returns 0 if both are equal, -1 if this sorts before @a value
* and 1 if this sorts after @a value
*
* @tparam T Type of the value @a value
* @param value The value to compare with
* @param icase Flag indicating if we should compare character case sensitive
* @return -1, 0 or 1
*/
[[nodiscard]] int compare(const item_value &value, bool icase = true) const noexcept
{
return this->value().compare(value, icase);
}
[[nodiscard]] int compare(const item_handle &value, bool icase = true) const noexcept
{
return compare(value.value(), icase);
}
/**
* @brief Compare the value contained with the value @a value and
* return true if both are equal.
*/
[[nodiscard]] bool operator==(const item_value &value) const noexcept
{
// TODO: icase or not icase?
return this->value().compare(value) == 0;
}
// We may not have C++20 yet...
/**
* @brief Compare the value contained with the value @a value and
* return true if both are not equal.
*/
template <typename T>
[[nodiscard]] bool operator!=(const T &value) const noexcept
{
return not operator==(value);
}
/**
* @brief Returns true if the content string is empty or
* only contains '.' meaning null or '?' meaning unknown
* in a mmCIF context
*/
[[nodiscard]] bool empty() const;
/** Easy way to test for an empty item */
explicit operator bool() const { return not empty(); }
/** Return a std::string_view for the contents */
[[nodiscard]] std::string_view text_() const;
/**
* @brief Construct a new item handle object
*
* @param item Item index
* @param row Reference to the row
*/
item_handle(category &cat, row &row, uint16_t item_ix)
: m_category(cat)
, m_row(row)
, m_item_ix(item_ix)
{
}
private:
category &m_category;
row &m_row;
uint16_t m_item_ix;
friend class parser;
void set(item_value value, bool updateLinked);
};
struct const_item_handle
{
public:
const_item_handle() = delete;
[[nodiscard]] const item_value &value() const;
[[nodiscard]] bool is_inapplicable() const noexcept { return value().type() == item_value_type::INAPPLICABLE; }
[[nodiscard]] bool is_missing() const noexcept { return value().type() == item_value_type::MISSING; }
[[nodiscard]] bool is_null() const noexcept { return is_inapplicable() or is_missing(); }
[[nodiscard]] bool is_string() const noexcept { return value().type() == item_value_type::TEXT; }
[[nodiscard]] bool is_number_int() const noexcept { return value().type() == item_value_type::INT; }
[[nodiscard]] bool is_number_float() const noexcept { return value().type() == item_value_type::FLOAT; }
[[nodiscard]] bool is_number() const noexcept { return is_number_int() or is_number_float(); }
[[nodiscard]] auto type() const { return value().type(); }
template <typename T>
[[nodiscard]] auto get() const
{
if (empty())
return T{};
else
return value().template get<T>();
}
template <typename T>
[[nodiscard]] auto as() const
{
if (empty())
return T{};
else
return value().template get<T>();
}
[[nodiscard]] auto str() const
{
return value().str();
}
[[nodiscard]] auto sv() const
{
return value().sv();
}
/** Return the contents of this item as type @tparam T or, if not
* set, use @a dv as the default value.
*/
template <typename T>
[[nodiscard]] auto value_or(const T &dv) const
{
return empty() ? dv : this->get<T>();
}
/**
* @brief Compare the contents of this item with value @a value
* optionally ignoring character case, if @a icase is true.
* Returns 0 if both are equal, -1 if this sorts before @a value
* and 1 if this sorts after @a value
*
* @tparam T Type of the value @a value
* @param value The value to compare with
* @param icase Flag indicating if we should compare character case sensitive
* @return -1, 0 or 1
*/
[[nodiscard]] int compare(const item_value &value, bool icase = true) const noexcept
{
return this->value().compare(value, icase);
}
[[nodiscard]] int compare(const const_item_handle &value, bool icase = true) const noexcept
{
if (empty() and value.empty())
return 0;
else if (empty())
return -1;
else if (value.empty())
return 1;
else
return compare(value.value(), icase);
}
/**
* @brief Compare the value contained with the value @a value and
* return true if both are equal.
*/
[[nodiscard]] bool operator==(const item_value &value) const noexcept
{
// TODO: icase or not icase?
return this->value().compare(value) == 0;
}
// We may not have C++20 yet...
/**
* @brief Compare the value contained with the value @a value and
* return true if both are not equal.
*/
template <typename T>
[[nodiscard]] bool operator!=(const T &value) const noexcept
{
return not operator==(value);
}
/**
* @brief Returns true if the content string is empty or
* only contains '.' meaning null or '?' meaning unknown
* in a mmCIF context
*/
[[nodiscard]] bool empty() const;
/** Easy way to test for an empty item */
explicit operator bool() const { return not empty(); }
/**
* @brief Construct a new item handle object
*
* @param item Item index
* @param row Reference to the row
*/
const_item_handle(const category &cat, const row &row, uint16_t item_ix)
: m_category(cat)
, m_row(row)
, m_item_ix(item_ix)
{
}
private:
const category &m_category;
const row &m_row;
uint16_t m_item_ix;
};
} // namespace cif
namespace std
{
/** @cond */
template <>
struct tuple_size<::cif::item>
: public std::integral_constant<std::size_t, 2>
{
};
template <>
struct tuple_element<0, ::cif::item>
{
using type = decltype(std::declval<::cif::item>().name());
};
template <>
struct tuple_element<1, ::cif::item>
{
using type = decltype(std::declval<::cif::item>().value());
};
} // namespace std

823
include/cif++/iterator.hpp Normal file
View File

@@ -0,0 +1,823 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/condition.hpp"
#include "cif++/row.hpp"
#include <array>
#include <cstdint>
#include <numeric>
#include <type_traits>
/**
* @file iterator.hpp
*
* This file contains several implementations of generic iterators.
*
* Using partial specialization we can have implementation for
* iterators that return row_handles, a single value or tuples of
* multiple values.
*
*/
namespace cif
{
class category;
// --------------------------------------------------------------------
/**
* @brief Implementation of an iterator that can return
* multiple values in a tuple. Of course, that tuple can
* then be used in structured binding to receive the values
* in a for loop e.g.
*
* @tparam Category The category for this iterator
* @tparam Ts The types this iterator can be dereferenced to
*/
template <bool Const, typename... Ts>
class iterator_impl_base
{
public:
/** @cond */
template <bool, typename...>
friend class iterator_impl_base;
friend class category;
/** @endcond */
/** variable that contains the number of elements in the tuple */
static constexpr std::size_t N = sizeof...(Ts);
/** @cond */
using tuple_type = std::tuple<Ts...>;
using row_handle_type = std::conditional_t<Const, const_row_handle, row_handle>;
using iterator_category = std::forward_iterator_tag;
using value_type = std::conditional_t<Const, const tuple_type, tuple_type>;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
using reference = value_type &;
iterator_impl_base() = default;
iterator_impl_base(const iterator_impl_base &rhs) = default;
iterator_impl_base(iterator_impl_base &&rhs) = default;
template <bool C, typename... T2s>
iterator_impl_base(const iterator_impl_base<C, T2s...> &rhs)
: m_current(rhs.m_current)
, m_value(rhs.m_value)
, m_item_ix(rhs.m_item_ix)
{
}
template <bool C>
iterator_impl_base(iterator_impl_base<C, Ts...> &rhs)
: m_current(rhs.m_current)
, m_value(rhs.m_value)
, m_item_ix(rhs.m_item_ix)
{
m_value = get(std::make_index_sequence<N>());
}
template <bool C>
iterator_impl_base(const iterator_impl_base<C> &rhs, const std::array<uint16_t, N> &cix)
: m_current(rhs.m_current)
, m_item_ix(cix)
{
m_value = get(std::make_index_sequence<N>());
}
iterator_impl_base &operator=(iterator_impl_base i)
{
std::swap(m_current, i.m_current);
std::swap(m_item_ix, i.m_item_ix);
std::swap(m_value, i.m_value);
return *this;
}
virtual ~iterator_impl_base() = default;
auto operator*()
{
return m_value;
}
auto operator*() const
{
return m_value;
}
auto operator->()
{
return &m_value;
}
auto operator->() const
{
return &m_value;
}
operator const_row_handle() const
{
return m_current;
}
operator row_handle_type()
{
return m_current;
}
iterator_impl_base &operator++()
{
if (m_current)
m_current.m_row = m_current.m_row->m_next;
m_value = get(std::make_index_sequence<N>());
return *this;
}
iterator_impl_base operator++(int)
{
iterator_impl_base result(*this);
this->operator++();
return result;
}
bool operator==(const iterator_impl_base &rhs) const { return m_current == rhs.m_current; }
bool operator!=(const iterator_impl_base &rhs) const { return m_current != rhs.m_current; }
template <bool C, typename... ITs>
bool operator==(const iterator_impl_base<C, ITs...> &rhs) const
{
return m_current == rhs.m_current;
}
template <bool C, typename... ITs>
bool operator!=(const iterator_impl_base<C, ITs...> &rhs) const
{
return m_current != rhs.m_current;
}
/** @endcond */
private:
template <std::size_t... Is>
[[nodiscard]] tuple_type get(std::index_sequence<Is...>) const
{
return m_current ? tuple_type{ m_current[m_item_ix[Is]].template as<Ts>()... } : tuple_type{};
}
row_handle_type m_current;
tuple_type m_value;
std::array<uint16_t, N> m_item_ix;
};
/**
* @brief Implementation of an iterator that returns
* only row_handles
*
* @tparam Category The category for this iterator
*/
template <bool Const>
class iterator_impl_base<Const>
{
public:
/** @cond */
template <bool, typename...>
friend class iterator_impl_base;
friend class category;
using category_type = std::conditional_t<Const, const category, category>;
using row_type = std::conditional_t<Const, const row, row>;
using row_handle_type = std::conditional_t<Const, const_row_handle, row_handle>;
using iterator_category = std::forward_iterator_tag;
using value_type = std::conditional_t<Const, const_row_handle, row_handle>;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
using reference = value_type &;
iterator_impl_base() = default;
iterator_impl_base(const iterator_impl_base &rhs) = default;
iterator_impl_base(iterator_impl_base &&rhs) = default;
template <bool C>
iterator_impl_base(const iterator_impl_base<C> &rhs)
: m_current(rhs.m_current)
{
}
iterator_impl_base(category_type &cat, row_type *current)
: m_current(cat, *current)
{
}
template <bool C>
iterator_impl_base(const iterator_impl_base<C> &rhs, const std::array<uint16_t, 0> &)
: m_current(rhs.m_current)
{
}
iterator_impl_base &operator=(iterator_impl_base i)
{
std::swap(m_current, i.m_current);
return *this;
}
virtual ~iterator_impl_base() = default;
auto operator*()
{
return m_current;
}
auto operator*() const
{
return m_current;
}
auto operator->()
{
return &m_current;
}
auto operator->() const
{
return &m_current;
}
operator const_row_handle() const
{
return m_current;
}
operator row_handle_type()
{
return m_current;
}
[[nodiscard]] int64_t row_id() const
{
return reinterpret_cast<int64_t>(m_current.m_row);
}
iterator_impl_base &operator++()
{
if (m_current)
m_current.m_row = m_current.m_row->m_next;
return *this;
}
iterator_impl_base operator++(int)
{
iterator_impl_base result(*this);
this->operator++();
return result;
}
bool operator==(const iterator_impl_base &rhs) const { return m_current == rhs.m_current; }
bool operator!=(const iterator_impl_base &rhs) const { return m_current != rhs.m_current; }
template <bool C, typename... ITs>
bool operator==(const iterator_impl_base<C, ITs...> &rhs) const
{
return m_current == rhs.m_current;
}
template <bool C, typename... ITs>
bool operator!=(const iterator_impl_base<C, ITs...> &rhs) const
{
return m_current != rhs.m_current;
}
/** @endcond */
private:
row_handle_type m_current;
};
/**
* @brief Implementation of an iterator that can return
* a single value.
*
* @tparam Category The category for this iterator
* @tparam T The type this iterator can be dereferenced to
*/
template <bool Const, typename T>
class iterator_impl_base<Const, T>
{
public:
/** @cond */
template <bool, typename...>
friend class iterator_impl_base;
friend class category;
using category_type = std::conditional_t<Const, const category, category>;
using row_handle_type = std::conditional_t<Const, const_row_handle, row_handle>;
using iterator_category = std::forward_iterator_tag;
using value_type = T;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
using reference = value_type &;
iterator_impl_base() = default;
iterator_impl_base(const iterator_impl_base &rhs) = default;
iterator_impl_base(iterator_impl_base &&rhs) = default;
template <bool C, typename T2>
iterator_impl_base(const iterator_impl_base<C, T2> &rhs)
: m_current(rhs.m_current)
, m_value(rhs.m_value)
, m_item_ix(rhs.m_item_ix)
{
}
template <bool C>
iterator_impl_base(iterator_impl_base<C, T> &rhs)
: m_current(rhs.m_current)
, m_value(rhs.m_value)
, m_item_ix(rhs.m_item_ix)
{
m_value = get();
}
template <bool C>
iterator_impl_base(const iterator_impl_base<C> &rhs, const std::array<uint16_t, 1> &cix)
: m_current(rhs.m_current)
, m_item_ix(cix[0])
{
m_value = get();
}
iterator_impl_base &operator=(iterator_impl_base i)
{
std::swap(m_current, i.m_current);
std::swap(m_item_ix, i.m_item_ix);
std::swap(m_value, i.m_value);
return *this;
}
virtual ~iterator_impl_base() = default;
auto operator*()
{
return m_value;
}
auto operator*() const
{
return m_value;
}
auto operator->()
{
return &m_value;
}
auto operator->() const
{
return &m_value;
}
operator const_row_handle() const
{
return m_current;
}
operator row_handle_type()
{
return m_current;
}
iterator_impl_base &operator++()
{
if (m_current)
m_current.m_row = m_current.m_row->m_next;
m_value = get();
return *this;
}
iterator_impl_base operator++(int)
{
iterator_impl_base result(*this);
this->operator++();
return result;
}
bool operator==(const iterator_impl_base &rhs) const { return m_current == rhs.m_current; }
bool operator!=(const iterator_impl_base &rhs) const { return m_current != rhs.m_current; }
template <bool C, typename... ITs>
bool operator==(const iterator_impl_base<C, ITs...> &rhs) const
{
return m_current == rhs.m_current;
}
template <bool C, typename... ITs>
bool operator!=(const iterator_impl_base<C, ITs...> &rhs) const
{
return m_current != rhs.m_current;
}
/** @endcond */
private:
[[nodiscard]] value_type get() const
{
return m_current ? m_current[m_item_ix].template get<value_type>() : value_type{};
}
row_handle_type m_current;
value_type m_value;
uint16_t m_item_ix;
};
// --------------------------------------------------------------------
template<typename ... Ts>
using iterator_impl = iterator_impl_base<false, Ts...>;
template<typename ... Ts>
using const_iterator_impl = iterator_impl_base<true, Ts...>;
// --------------------------------------------------------------------
// iterator proxy
/**
* @brief An iterator_proxy is used as a result type for methods that
* return a range of values you want to iterate over.
*
* E.g. the class cif::category contains the method cif::category::rows()
* that returns an iterator_proxy that allows you to iterate over
* all the rows in the category.
*
* @tparam Category The category for the iterators
* @tparam Ts The types the iterators return. See class: iterator
*/
template <bool Const, typename... Ts>
class iterator_proxy_base
{
public:
/** @cond */
static constexpr const std::size_t N = sizeof...(Ts);
using category_type = std::conditional_t<Const, const category, category>;
using iterator = iterator_impl_base<Const, Ts...>;
using row_iterator = iterator_impl_base<Const>;
iterator_proxy_base(category_type &cat, row_iterator pos, char const *const items[N]);
iterator_proxy_base(category_type &cat, row_iterator pos, std::initializer_list<char const *> items); // NOLINT(modernize-pass-by-value)
iterator_proxy_base(iterator_proxy_base &&p);
iterator_proxy_base &operator=(iterator_proxy_base &&p);
iterator_proxy_base(const iterator_proxy_base &) = delete;
iterator_proxy_base &operator=(const iterator_proxy_base &) = delete;
/** @endcond */
[[nodiscard]] iterator begin() const { return iterator(m_begin, m_item_ix); } ///< Return the iterator pointing to the first row
[[nodiscard]] iterator end() const { return iterator(m_end, m_item_ix); } ///< Return the iterator pointing past the last row
[[nodiscard]] bool empty() const { return m_begin == m_end; } ///< Return true if the range is empty
explicit operator bool() const { return not empty(); } ///< Easy way to detect if the range is empty
[[nodiscard]] std::size_t size() const { return std::distance(begin(), end()); } ///< Return size of the range
// row front() { return *begin(); }
// row back() { return *(std::prev(end())); }
[[nodiscard]] category_type &get_category() const { return *m_category; } ///< Return the category the iterator belong to
/** swap */
void swap(iterator_proxy_base &rhs)
{
std::swap(m_category, rhs.m_category);
std::swap(m_begin, rhs.m_begin);
std::swap(m_end, rhs.m_end);
std::swap(m_item_ix, rhs.m_item_ix);
}
protected:
iterator_proxy_base(category_type &cat);
private:
category_type *m_category;
row_iterator m_begin, m_end;
std::array<uint16_t, N> m_item_ix;
};
// --------------------------------------------------------------------
template <typename... Ts>
using iterator_proxy = iterator_proxy_base<false, Ts...>;
template <typename... Ts>
using const_iterator_proxy = iterator_proxy_base<true, Ts...>;
// --------------------------------------------------------------------
// conditional iterator proxy
/**
* @brief A conditional iterator proxy is similar to an iterator_proxy
* in that it can be used to return a range of rows you can iterate over.
* In the case of an conditional_iterator_proxy a cif::condition is used
* to filter out only those rows that match the condition.
*
* @tparam category_type The category the iterators belong to
* @tparam Ts The types to which the iterators can be dereferenced
*/
template <bool Const, typename... Ts>
class conditional_iterator_proxy_base
{
public:
/** @cond */
static constexpr const std::size_t N = sizeof...(Ts);
using category_type = std::conditional_t<Const, const category, category>;
using base_iterator = iterator_impl_base<Const, Ts...>;
using value_type = typename base_iterator::value_type;
using row_iterator = iterator_impl_base<Const>;
class conditional_iterator_impl
{
public:
using iterator_category = std::forward_iterator_tag;
using value_type = conditional_iterator_proxy_base::value_type;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
using reference = value_type;
conditional_iterator_impl() = default;
conditional_iterator_impl(category_type &cat, row_iterator pos, const condition &cond, const std::array<uint16_t, N> &cix);
conditional_iterator_impl(const conditional_iterator_impl &i) = default;
conditional_iterator_impl &operator=(const conditional_iterator_impl &i) = default;
virtual ~conditional_iterator_impl() = default;
auto operator*()
{
return *m_begin;
}
auto operator*() const
{
return *m_begin;
}
auto operator->()
{
m_current = *m_begin;
return &m_current;
}
auto operator->() const
{
m_current = *m_begin;
return &m_current;
}
conditional_iterator_impl &operator++()
{
while (m_begin != m_end)
{
if (++m_begin == m_end)
break;
if (m_condition->operator()(m_begin))
break;
}
return *this;
}
conditional_iterator_impl operator++(int)
{
conditional_iterator_impl result(*this);
this->operator++();
return result;
}
bool operator==(const conditional_iterator_impl &rhs) const { return m_begin == rhs.m_begin; }
bool operator!=(const conditional_iterator_impl &rhs) const { return m_begin != rhs.m_begin; }
bool operator==(const row_iterator &rhs) const { return m_begin == rhs; }
bool operator!=(const row_iterator &rhs) const { return m_begin != rhs; }
template <bool C, typename... ITs>
bool operator==(const iterator_impl_base<C, ITs...> &rhs) const { return m_begin == rhs; }
template <bool C, typename... ITs>
bool operator!=(const iterator_impl_base<C, ITs...> &rhs) const { return m_begin != rhs; }
private:
category_type *m_cat = nullptr;
base_iterator m_begin, m_end;
std::remove_cv_t<value_type> m_current;
const condition *m_condition;
};
using iterator = conditional_iterator_impl;
using reference = typename iterator::reference;
template <typename... Ns>
conditional_iterator_proxy_base(category_type &cat, row_iterator pos, condition &&cond, Ns... names); // NOLINT(modernize-pass-by-value)
conditional_iterator_proxy_base(conditional_iterator_proxy_base &&p)
{
swap(*this, p);
}
conditional_iterator_proxy_base &operator=(conditional_iterator_proxy_base &&p)
{
swap(*this, p);
return *this;
}
conditional_iterator_proxy_base(const conditional_iterator_proxy_base &) = delete;
conditional_iterator_proxy_base &operator=(const conditional_iterator_proxy_base &) = delete;
/** @endcond */
[[nodiscard]] iterator begin() const; ///< Return the iterator pointing to the first row
[[nodiscard]] iterator end() const; ///< Return the iterator pointing past the last row
[[nodiscard]] bool empty() const; ///< Return true if the range is empty
explicit operator bool() const { return not empty(); } ///< Easy way to detect if the range is empty
[[nodiscard]] std::size_t size() const { return std::distance(begin(), end()); } ///< Return size of the range
auto front() { return *begin(); } ///< Return reference to the first row
// row_handle back() { return *begin(); }
[[nodiscard]] category_type &get_category() const { return *m_cat; } ///< Category the iterators belong to
/** swap */
template <bool C2, typename ... T2s>
friend void swap(conditional_iterator_proxy_base<C2, T2s...> &lhs, conditional_iterator_proxy_base<C2, T2s...> &rhs);
private:
category_type *m_cat;
condition m_condition;
row_iterator mCBegin, mCEnd;
std::array<uint16_t, N> mCix;
};
// --------------------------------------------------------------------
template <typename... Ts>
using conditional_iterator_proxy = conditional_iterator_proxy_base<false, Ts...>;
template <typename... Ts>
using const_conditional_iterator_proxy = conditional_iterator_proxy_base<true, Ts...>;
// --------------------------------------------------------------------
/** @cond */
template <bool Const, typename... Ts>
iterator_proxy_base<Const, Ts...>::iterator_proxy_base(category_type &cat, row_iterator pos, char const *const items[N])
: m_category(&cat)
, m_begin(pos)
, m_end(cat.end())
{
for (uint16_t i = 0; i < N; ++i)
m_item_ix[i] = m_category->get_item_ix(items[i]);
}
template <bool Const, typename... Ts>
iterator_proxy_base<Const, Ts...>::iterator_proxy_base(category_type &cat, row_iterator pos, std::initializer_list<char const *> items)
: m_category(&cat)
, m_begin(pos)
, m_end(cat.end())
{
// static_assert(items.size() == N, "The list of item names should be exactly the same as the list of requested items");
std::uint16_t i = 0;
for (auto item : items)
m_item_ix[i++] = m_category->get_item_ix(item);
}
template <bool Const, typename... Ts>
iterator_proxy_base<Const, Ts...>::iterator_proxy_base(category_type &cat)
: m_category(&cat)
, m_begin(cat.begin())
, m_end(cat.end())
{
std::iota(m_item_ix.begin(), m_item_ix.end(), 0);
}
// --------------------------------------------------------------------
template <bool Const, typename... Ts>
conditional_iterator_proxy_base<Const, Ts...>::conditional_iterator_impl::conditional_iterator_impl(
category_type &cat, row_iterator pos, const condition &cond, const std::array<uint16_t, N> &cix)
: m_cat(&cat)
, m_begin(pos, cix)
, m_end(cat.end(), cix)
, m_condition(&cond)
{
if (m_condition == nullptr or m_condition->empty())
m_begin = m_end;
else
m_current = *m_begin;
}
template <bool Const, typename... Ts>
template <typename... Ns>
conditional_iterator_proxy_base<Const, Ts...>::conditional_iterator_proxy_base(category_type &cat, row_iterator pos, condition &&cond, Ns... names)
: m_cat(&cat)
, m_condition(std::move(cond))
, mCBegin(pos)
, mCEnd(cat.end())
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "Number of item names should be equal to number of requested value types");
if (m_condition and m_condition.prepare(cat))
{
while (mCBegin != mCEnd and not m_condition(*mCBegin))
++mCBegin;
}
else
mCBegin = mCEnd;
uint16_t i = 0;
((mCix[i++] = m_cat->get_item_ix(names)), ...);
}
template <bool Const, typename... Ts>
auto conditional_iterator_proxy_base<Const, Ts...>::begin() const -> iterator
{
return iterator{ *m_cat, mCBegin, m_condition, mCix };
}
template <bool Const, typename... Ts>
auto conditional_iterator_proxy_base<Const, Ts...>::end() const -> iterator
{
return iterator{ *m_cat, mCEnd, m_condition, mCix };
}
template <bool Const, typename... Ts>
bool conditional_iterator_proxy_base<Const, Ts...>::empty() const
{
return mCBegin == mCEnd;
}
template <bool Const, typename... Ts>
void swap(conditional_iterator_proxy_base<Const, Ts...> &lhs, conditional_iterator_proxy_base<Const, Ts...> &rhs)
{
std::swap(lhs.m_cat, rhs.m_cat);
std::swap(lhs.m_condition, rhs.m_condition);
std::swap(lhs.mCBegin, rhs.mCBegin);
std::swap(lhs.mCEnd, rhs.mCEnd);
std::swap(lhs.mCix, rhs.mCix);
}
// --------------------------------------------------------------------
// template <bool Const, typename... Ts>
/** @endcond */
} // namespace cif

745
include/cif++/matrix.hpp Normal file
View File

@@ -0,0 +1,745 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2023 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <array>
#include <cassert>
#include <cmath>
#include <ostream>
#include <type_traits>
#include <vector>
/**
* @file matrix.hpp
*
* Some basic matrix operations and classes to hold matrices.
*
* We're using expression templates for optimal performance.
*
*/
namespace cif
{
// --------------------------------------------------------------------
// We're using expression templates here
/**
* @brief Base for the matrix expression templates
* This all uses the Curiously recurring template pattern
*
* @tparam M The type of the derived class
*/
template <typename M>
class matrix_expression // NOLINT(bugprone-crtp-constructor-accessibility)
{
public:
[[nodiscard]] constexpr std::size_t dim_m() const { return static_cast<const M &>(*this).dim_m(); } ///< Return the size (dimension) in direction m
[[nodiscard]] constexpr std::size_t dim_n() const { return static_cast<const M &>(*this).dim_n(); } ///< Return the size (dimension) in direction n
[[nodiscard]] constexpr bool empty() const { return dim_m() == 0 or dim_n() == 0; } ///< Convenient way to test for empty matrices
/** Return a reference to element [ @a i, @a j ] */
[[nodiscard]] constexpr auto &operator()(std::size_t i, std::size_t j)
{
return static_cast<M &>(*this).operator()(i, j);
}
/** Return the value of element [ @a i, @a j ] */
[[nodiscard]] constexpr auto operator()(std::size_t i, std::size_t j) const
{
return static_cast<const M &>(*this).operator()(i, j);
}
/** Swap the contents of rows @a r1 and @a r2 */
void swap_row(std::size_t r1, std::size_t r2)
{
for (std::size_t c = 0; c < dim_m(); ++c)
{
auto v = operator()(r1, c);
operator()(r1, c) = operator()(r2, c);
operator()(r2, c) = v;
}
}
/** Swap the contents of columns @a c1 and @a c2 */
void swap_col(std::size_t c1, std::size_t c2)
{
for (std::size_t r = 0; r < dim_n(); ++r)
{
auto &a = operator()(r, c1);
auto &b = operator()(r, c2);
std::swap(a, b);
}
}
/** write the matrix @a m to std::ostream @a os */
friend std::ostream &operator<<(std::ostream &os, const matrix_expression &m)
{
os << '[';
for (std::size_t i = 0; i < m.dim_m(); ++i)
{
os << '[';
for (std::size_t j = 0; j < m.dim_n(); ++j)
{
os << m(i, j);
if (j + 1 < m.dim_n())
os << ", ";
}
if (i + 1 < m.dim_m())
os << ", ";
os << ']';
}
os << ']';
return os;
}
template <typename M2>
constexpr bool operator==(const matrix_expression<M2> &m) const
{
bool same = false;
if (dim_m() == m.dim_m() and dim_n() == m.dim_n())
{
same = true;
for (std::size_t i = 0; same and i < m.dim_m(); ++i)
{
for (std::size_t j = 0; same and j < m.dim_n(); ++j)
same = operator()(i, j) == m(i, j);
}
}
return same;
}
};
// --------------------------------------------------------------------
/**
* @brief Storage class implementation of matrix_expression.
*
* @tparam F The type of the stored values
*
* matrix is m x n, addressing i,j is 0 <= i < m and 0 <= j < n
* element m i,j is mapped to [i * n + j] and thus storage is row major
*/
template <typename F = float>
class matrix : public matrix_expression<matrix<F>>
{
public:
/** The value type */
using value_type = F;
/**
* @brief Copy construct a new matrix object using @a m
*
* @tparam M2 Type of @a m
* @param m The matrix expression to copy values from
*/
template <typename M2>
matrix(const matrix_expression<M2> &m)
: m_m(m.dim_m())
, m_n(m.dim_n())
, m_data(m_m * m_n)
{
for (std::size_t i = 0; i < m_m; ++i)
{
for (std::size_t j = 0; j < m_n; ++j)
operator()(i, j) = m(i, j);
}
}
/**
* @brief Construct a new matrix object with dimension @a m and @a n
* setting the values to @a v
*
* @param m Requested dimension M
* @param n Requested dimension N
* @param v Value to store in each element
*/
matrix(std::size_t m, std::size_t n, value_type v = 0)
: m_m(m)
, m_n(n)
, m_data(m_m * m_n)
{
std::fill(m_data.begin(), m_data.end(), v);
}
/** @cond */
matrix() = default;
matrix(matrix &&m) = default;
matrix(const matrix &m) = default;
matrix &operator=(matrix &&m) = default;
matrix &operator=(const matrix &m) = default;
/** @endcond */
[[nodiscard]] constexpr std::size_t dim_m() const { return m_m; } ///< Return dimension m
[[nodiscard]] constexpr std::size_t dim_n() const { return m_n; } ///< Return dimension n
/** Return the value of element [ @a i, @a j ] */
[[nodiscard]] constexpr value_type operator()(std::size_t i, std::size_t j) const
{
assert(i < m_m);
assert(j < m_n);
return m_data[i * m_n + j];
}
/** Return a reference to element [ @a i, @a j ] */
[[nodiscard]] constexpr value_type &operator()(std::size_t i, std::size_t j)
{
assert(i < m_m);
assert(j < m_n);
return m_data[i * m_n + j];
}
private:
std::size_t m_m = 0, m_n = 0;
std::vector<value_type> m_data;
};
// --------------------------------------------------------------------
// special case, 3x3 matrix
/**
* @brief Storage class implementation of matrix_expression
* with compile time fixed size.
*
* @tparam F The type of the stored values
*
* matrix is m x n, addressing i,j is 0 <= i < m and 0 <= j < n
* element m i,j is mapped to [i * n + j] and thus storage is row major
*/
template <typename F, std::size_t M, std::size_t N>
class matrix_fixed : public matrix_expression<matrix_fixed<F, M, N>>
{
public:
/** The value type */
using value_type = F;
/** The storage size */
static constexpr std::size_t kSize = M * N;
/** Copy constructor */
template <typename M2>
matrix_fixed(const M2 &m)
{
assert(M == m.dim_m() and N == m.dim_n());
for (std::size_t i = 0; i < M; ++i)
{
for (std::size_t j = 0; j < N; ++j)
operator()(i, j) = m(i, j);
}
}
/** default constructor */
matrix_fixed(value_type v = 0)
{
m_data.fill(v);
}
/** Alternate constructor taking an array of values to store */
matrix_fixed(const F (&v)[kSize])
{
fill(v, std::make_index_sequence<kSize>{});
}
/** @cond */
matrix_fixed(matrix_fixed &&m) = default;
matrix_fixed(const matrix_fixed &m) = default;
matrix_fixed &operator=(matrix_fixed &&m) = default;
matrix_fixed &operator=(const matrix_fixed &m) = default;
/** @endcond */
/** Store the values in @a a in the matrix */
template<std::size_t... Ixs>
matrix_fixed& fill(const F (&a)[kSize], std::index_sequence<Ixs...>)
{
m_data = { a[Ixs]... };
return *this;
}
[[nodiscard]] constexpr std::size_t dim_m() const { return M; } ///< Return dimension m
[[nodiscard]] constexpr std::size_t dim_n() const { return N; } ///< Return dimension n
/** Return the value of element [ @a i, @a j ] */
[[nodiscard]] constexpr value_type operator()(std::size_t i, std::size_t j) const
{
assert(i < M);
assert(j < N);
return m_data[i * N + j];
}
/** Return a reference to element [ @a i, @a j ] */
[[nodiscard]] constexpr value_type &operator()(std::size_t i, std::size_t j)
{
assert(i < M);
assert(j < N);
return m_data[i * N + j];
}
private:
std::array<value_type, M * N> m_data;
};
/** typedef of a fixed matrix of size 3x3 */
template <typename F>
using matrix3x3 = matrix_fixed<F, 3, 3>;
/** typedef of a fixed matrix of size 4x4 */
template <typename F>
using matrix4x4 = matrix_fixed<F, 4, 4>;
// --------------------------------------------------------------------
/**
* @brief Storage class implementation of symmetric matrix_expression
*
* @tparam F The type of the stored values
*
* matrix is m x n, addressing i,j is 0 <= i < m and 0 <= j < n
* element m i,j is mapped to [i * n + j] and thus storage is row major
*/
template <typename F = float>
class symmetric_matrix : public matrix_expression<symmetric_matrix<F>>
{
public:
/** The value type */
using value_type = F;
/** constructor for a matrix of size @a n x @a n elements with value @a v */
symmetric_matrix(std::size_t n, value_type v = 0)
: m_n(n)
, m_data((m_n * (m_n + 1)) / 2)
{
std::fill(m_data.begin(), m_data.end(), v);
}
/** @cond */
symmetric_matrix() = default;
symmetric_matrix(symmetric_matrix &&m) = default;
symmetric_matrix(const symmetric_matrix &m) = default;
symmetric_matrix &operator=(symmetric_matrix &&m) = default;
symmetric_matrix &operator=(const symmetric_matrix &m) = default;
/** @endcond */
[[nodiscard]] constexpr std::size_t dim_m() const { return m_n; } ///< Return dimension m
[[nodiscard]] constexpr std::size_t dim_n() const { return m_n; } ///< Return dimension n
/** Return the value of element [ @a i, @a j ] */
[[nodiscard]] constexpr value_type operator()(std::size_t i, std::size_t j) const
{
return i < j
? m_data[(j * (j + 1)) / 2 + i]
: m_data[(i * (i + 1)) / 2 + j];
}
/** Return a reference to element [ @a i, @a j ] */
[[nodiscard]] constexpr value_type &operator()(std::size_t i, std::size_t j)
{
if (i > j)
std::swap(i, j);
assert(j < m_n);
return m_data[(j * (j + 1)) / 2 + i];
}
private:
std::size_t m_n;
std::vector<value_type> m_data;
};
// --------------------------------------------------------------------
/**
* @brief Storage class implementation of symmetric matrix_expression
* with compile time fixed size.
*
* @tparam F The type of the stored values
*
* matrix is m x n, addressing i,j is 0 <= i < m and 0 <= j < n
* element m i,j is mapped to [i * n + j] and thus storage is row major
*/
template <typename F, std::size_t M>
class symmetric_matrix_fixed : public matrix_expression<symmetric_matrix_fixed<F, M>>
{
public:
/** The value type */
using value_type = F;
/** constructor with all elements set to value @a v */
symmetric_matrix_fixed(value_type v = 0)
{
std::fill(m_data.begin(), m_data.end(), v);
}
/** @cond */
symmetric_matrix_fixed(symmetric_matrix_fixed &&m) = default;
symmetric_matrix_fixed(const symmetric_matrix_fixed &m) = default;
symmetric_matrix_fixed &operator=(symmetric_matrix_fixed &&m) = default;
symmetric_matrix_fixed &operator=(const symmetric_matrix_fixed &m) = default;
/** @endcond */
[[nodiscard]] constexpr std::size_t dim_m() const { return M; } ///< Return dimension m
[[nodiscard]] constexpr std::size_t dim_n() const { return M; } ///< Return dimension n
/** Return the value of element [ @a i, @a j ] */
[[nodiscard]] constexpr value_type operator()(std::size_t i, std::size_t j) const
{
return i < j
? m_data[(j * (j + 1)) / 2 + i]
: m_data[(i * (i + 1)) / 2 + j];
}
/** Return a reference to element [ @a i, @a j ] */
[[nodiscard]] constexpr value_type &operator()(std::size_t i, std::size_t j)
{
if (i > j)
std::swap(i, j);
assert(j < M);
return m_data[(j * (j + 1)) / 2 + i];
}
private:
std::array<value_type, (M * (M + 1)) / 2> m_data;
};
/** typedef of a fixed symmetric matrix of size 3x3 */
template <typename F>
using symmetric_matrix3x3 = symmetric_matrix_fixed<F, 3>;
/** typedef of a fixed symmetric matrix of size 4x4 */
template <typename F>
using symmetric_matrix4x4 = symmetric_matrix_fixed<F, 4>;
// --------------------------------------------------------------------
/**
* @brief implementation of symmetric matrix_expression with a value
* of 1 for the diagonal values and 0 for all the others.
*
* @tparam F The type of the stored values
*
* matrix is m x n, addressing i,j is 0 <= i < m and 0 <= j < n
* element m i,j is mapped to [i * n + j] and thus storage is row major
*/
template <typename F = float>
class identity_matrix : public matrix_expression<identity_matrix<F>>
{
public:
/** the value type */
using value_type = F;
/** constructor taking a dimension @a n */
identity_matrix(std::size_t n)
: m_n(n)
{
}
[[nodiscard]] constexpr std::size_t dim_m() const { return m_n; } ///< Return dimension m
[[nodiscard]] constexpr std::size_t dim_n() const { return m_n; } ///< Return dimension n
/** Return the value of element [ @a i, @a j ] */
[[nodiscard]] constexpr value_type operator()(std::size_t i, std::size_t j) const
{
return static_cast<value_type>(i == j ? 1 : 0);
}
private:
std::size_t m_n;
};
// --------------------------------------------------------------------
// matrix functions, implemented as expression templates
/**
* @brief Implementation of a substraction operation as a matrix expression
*
* @tparam M1 Type of matrix 1
* @tparam M2 Type of matrix 2
*/
template <typename M1, typename M2>
class matrix_subtraction : public matrix_expression<matrix_subtraction<M1, M2>>
{
public:
/** constructor */
matrix_subtraction(const M1 &m1, const M2 &m2)
: m_m1(m1)
, m_m2(m2)
{
assert(m_m1.dim_m() == m_m2.dim_m());
assert(m_m1.dim_n() == m_m2.dim_n());
}
[[nodiscard]] constexpr std::size_t dim_m() const { return m_m1.dim_m(); } ///< Return dimension m
[[nodiscard]] constexpr std::size_t dim_n() const { return m_m1.dim_n(); } ///< Return dimension n
/** Access to the value of element [ @a i, @a j ] */
[[nodiscard]] constexpr auto operator()(std::size_t i, std::size_t j) const
{
return m_m1(i, j) - m_m2(i, j);
}
private:
const M1 &m_m1;
const M2 &m_m2;
};
/** operator to subtract two matrices and return a matrix expression */
template <typename M1, typename M2>
auto operator-(const matrix_expression<M1> &m1, const matrix_expression<M2> &m2)
{
return matrix_subtraction(m1, m2);
}
/**
* @brief Implementation of a multiplication operation as a matrix expression
*
* @tparam M1 Type of matrix 1
* @tparam M2 Type of matrix 2
*/
template <typename M1, typename M2>
class matrix_matrix_multiplication : public matrix_expression<matrix_matrix_multiplication<M1, M2>>
{
public:
/** constructor */
matrix_matrix_multiplication(const M1 &m1, const M2 &m2)
: m_m1(m1)
, m_m2(m2)
{
assert(m1.dim_m() == m2.dim_n());
}
[[nodiscard]] constexpr std::size_t dim_m() const { return m_m1.dim_m(); } ///< Return dimension m
[[nodiscard]] constexpr std::size_t dim_n() const { return m_m1.dim_n(); } ///< Return dimension n
/** Access to the value of element [ @a i, @a j ] */
[[nodiscard]] constexpr auto operator()(std::size_t i, std::size_t j) const
{
using value_type = decltype(m_m1(0, 0));
value_type result = {};
for (std::size_t k = 0; k < m_m1.dim_m(); ++k)
result += m_m1(i, k) * m_m2(k, j);
return result;
}
private:
const M1 &m_m1;
const M2 &m_m2;
};
/**
* @brief Implementation of a multiplication operation of a matrix and a scalar value as a matrix expression
*
* @tparam M1 Type of matrix
* @tparam M2 Type of scalar value
*/
template <typename M, typename T>
class matrix_scalar_multiplication : public matrix_expression<matrix_scalar_multiplication<M, T>>
{
public:
/** value type */
using value_type = T;
/** constructor */
matrix_scalar_multiplication(const M &m, value_type v)
: m_m(m)
, m_v(v)
{
}
[[nodiscard]] constexpr std::size_t dim_m() const { return m_m.dim_m(); } ///< Return dimension m
[[nodiscard]] constexpr std::size_t dim_n() const { return m_m.dim_n(); } ///< Return dimension n
/** Access to the value of element [ @a i, @a j ] */
[[nodiscard]] constexpr auto operator()(std::size_t i, std::size_t j) const
{
return m_m(i, j) * m_v;
}
private:
const M &m_m;
value_type m_v;
};
/** First implementation of operator*, enabled if the second parameter is a scalar */
template <typename M1, typename T>
auto operator*(const matrix_expression<M1> &m, T v)
requires (std::is_floating_point_v<T>) {
return matrix_scalar_multiplication(m, v);
}
/** First implementation of operator*, enabled if the second parameter is not a scalar and thus must be a matrix, right? */
template <typename M1, typename M2>
auto operator*(const matrix_expression<M1> &m1, const matrix_expression<M2> &m2)
requires (not std::is_floating_point_v<M2>) {
return matrix_matrix_multiplication(m1, m2);
}
// --------------------------------------------------------------------
template <typename M2>
class sub_matrix : public matrix_expression<sub_matrix<M2>>
{
public:
sub_matrix(const M2 &m, int i, int j)
: m_m(m)
, m_i(i)
, m_j(j)
{
}
[[nodiscard]] constexpr std::size_t dim_m() const { return m_m.dim_m() - 1; } ///< Return dimension m
[[nodiscard]] constexpr std::size_t dim_n() const { return m_m.dim_n() - 1; } ///< Return dimension n
/** Access to the value of element [ @a i, @a j ] */
[[nodiscard]] constexpr auto operator()(std::size_t i, std::size_t j) const
{
return m_m(
i >= m_i ? i + 1 : i,
j >= m_j ? j + 1 : j);
}
private:
const M2 &m_m;
std::size_t m_i, m_j;
};
// --------------------------------------------------------------------
/** Generic routine to calculate the determinant of a matrix
*
* @note This is currently only implemented for fixed matrices of size 3x3
*/
template <typename M>
auto determinant(const M &m);
/** Implementation of the determinant function for fixed size matrices of size 3x3 */
template <typename F = float>
auto determinant(const matrix3x3<F> &m)
{
return (m(0, 0) * ((m(1, 1) * m(2, 2) - m(1, 2) * m(2, 1))) +
m(0, 1) * ((m(1, 2) * m(2, 0) - m(1, 0) * m(2, 2))) +
m(0, 2) * ((m(1, 0) * m(2, 1) - m(1, 1) * m(2, 0))));
}
/** Implementation of the determinant function for fixed size matrices of size 4x4 */
template <typename F = float>
F determinant(const matrix4x4<F> &m)
{
return m(0, 0) * determinant(matrix3x3<F>(sub_matrix<decltype(m)>(m, 0, 0))) -
m(0, 1) * determinant(matrix3x3<F>(sub_matrix<decltype(m)>(m, 0, 1))) +
m(0, 2) * determinant(matrix3x3<F>(sub_matrix<decltype(m)>(m, 0, 2))) -
m(0, 3) * determinant(matrix3x3<F>(sub_matrix<decltype(m)>(m, 0, 3)));
}
// --------------------------------------------------------------------
/** Generic routine to calculate the inverse of a matrix
*
* @note This is currently only implemented for fixed matrices of size 3x3
*/
template <typename M>
M inverse(const M &m);
/** Implementation of the inverse function for fixed size matrices of size 3x3 */
template <typename F = float>
matrix3x3<F> inverse(const matrix3x3<F> &m)
{
F det = determinant(m);
matrix3x3<F> result;
result(0, 0) = (m(1, 1) * m(2, 2) - m(1, 2) * m(2, 1)) / det;
result(1, 0) = (m(1, 2) * m(2, 0) - m(1, 0) * m(2, 2)) / det;
result(2, 0) = (m(1, 0) * m(2, 1) - m(1, 1) * m(2, 0)) / det;
result(0, 1) = (m(2, 1) * m(0, 2) - m(2, 2) * m(0, 1)) / det;
result(1, 1) = (m(2, 2) * m(0, 0) - m(2, 0) * m(0, 2)) / det;
result(2, 1) = (m(2, 0) * m(0, 1) - m(2, 1) * m(0, 0)) / det;
result(0, 2) = (m(0, 1) * m(1, 2) - m(0, 2) * m(1, 1)) / det;
result(1, 2) = (m(0, 2) * m(1, 0) - m(0, 0) * m(1, 2)) / det;
result(2, 2) = (m(0, 0) * m(1, 1) - m(0, 1) * m(1, 0)) / det;
return result;
}
// --------------------------------------------------------------------
/**
* @brief Implementation of a cofactor calculation as a matrix expression
*
* @tparam M Type of matrix
*/
template <typename M>
class matrix_cofactors : public matrix_expression<matrix_cofactors<M>>
{
public:
/** constructor */
matrix_cofactors(const M &m)
: m_m(m)
{
}
[[nodiscard]] constexpr std::size_t dim_m() const { return m_m.dim_m(); } ///< Return dimension m
[[nodiscard]] constexpr std::size_t dim_n() const { return m_m.dim_n(); } ///< Return dimension n
/** Access to the value of element [ @a i, @a j ] */
[[nodiscard]] constexpr auto operator()(std::size_t i, std::size_t j) const
{
const std::size_t ixs[4][3] = {
{ 1, 2, 3 },
{ 0, 2, 3 },
{ 0, 1, 3 },
{ 0, 1, 2 }
};
const std::size_t *ix = ixs[i];
const std::size_t *iy = ixs[j];
auto result =
m_m(ix[0], iy[0]) * m_m(ix[1], iy[1]) * m_m(ix[2], iy[2]) +
m_m(ix[0], iy[1]) * m_m(ix[1], iy[2]) * m_m(ix[2], iy[0]) +
m_m(ix[0], iy[2]) * m_m(ix[1], iy[0]) * m_m(ix[2], iy[1]) -
m_m(ix[0], iy[2]) * m_m(ix[1], iy[1]) * m_m(ix[2], iy[0]) -
m_m(ix[0], iy[1]) * m_m(ix[1], iy[0]) * m_m(ix[2], iy[2]) -
m_m(ix[0], iy[0]) * m_m(ix[1], iy[2]) * m_m(ix[2], iy[1]);
return (i + j) % 2 == 1 ? -result : result;
}
private:
const M &m_m;
};
} // namespace cif

1245
include/cif++/model.hpp Normal file

File diff suppressed because it is too large Load Diff

477
include/cif++/parser.hpp Normal file
View File

@@ -0,0 +1,477 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/category.hpp"
#include "cif++/datablock.hpp"
#include "cif++/item.hpp"
#include "cif++/row.hpp"
#include "cif++/text.hpp"
#include "cif++/utilities.hpp"
#include <cstddef>
#include <cstdint>
#include <iostream>
#include <map>
#include <stdexcept>
#include <string>
#include <string_view>
#include <vector>
/**
* @file parser.hpp
*
* This file contains the declaration of an mmCIF parser
*/
namespace cif
{
class category;
class datablock;
class file;
class validator;
// --------------------------------------------------------------------
/** Exception that is thrown when the mmCIF file contains a parsing error */
class parse_error : public std::runtime_error
{
public:
/// \brief constructor
parse_error(uint32_t line_nr, const std::string &message)
: std::runtime_error("parse error at line " + std::to_string(line_nr) + ": " + message)
{
}
};
// --------------------------------------------------------------------
/**
* @brief The sac_parser is a similar to SAX parsers (Simple API for XML,
* in our case it is Simple API for CIF)
*
* This is a hand crafted, optimised parser for reading cif files,
* both cif 1.0 and cif 1.1 is supported. But version 2.0 is not.
* That means that the content of files strictly contains only
* ASCII characters. Anything else will generate an error.
*
* This class is an abstract base class. Derived classes should
* implement the produce_ methods.
*/
// TODO: Need to implement support for transformed long lines
class sac_parser
{
public:
/** @cond */
struct iless_op
{
bool operator()(std::string_view a, std::string_view b) const
{
return icompare(a, b) < 0;
}
};
using datablock_index = std::map<std::string, std::size_t, iless_op>;
virtual ~sac_parser() = default;
/** @endcond */
/// \brief The parser only supports ASCII so we can
/// create a table with character properties.
enum CharTraitsMask : uint8_t
{
kOrdinaryMask = 1 << 0, ///< The character is in the Ordinary class
kNonBlankMask = 1 << 1, ///< The character is in the NonBlank class
kTextLeadMask = 1 << 2, ///< The character is in the TextLead class
kAnyPrintMask = 1 << 3 ///< The character is in the AnyPrint class
};
/// \brief Return true if the character @a ch is a *space* character
static constexpr bool is_space(int ch)
{
return ch == ' ' or ch == '\t' or ch == '\r' or ch == '\n';
}
/// \brief Return true if the character @a ch is a *white* character
static constexpr bool is_white(int ch)
{
return is_space(ch) or ch == '#';
}
/// \brief Return true if the character @a ch is a *ordinary* character
static constexpr bool is_ordinary(int ch)
{
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kOrdinaryMask) != 0;
}
/// \brief Return true if the character @a ch is a *non_blank* character
static constexpr bool is_non_blank(int ch)
{
return ch > 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kNonBlankMask) != 0;
}
/// \brief Return true if the character @a ch is a *text_lead* character
static constexpr bool is_text_lead(int ch)
{
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kTextLeadMask) != 0;
}
/// \brief Return true if the character @a ch is a *any_print* character
static constexpr bool is_any_print(int ch)
{
return ch == '\t' or
(ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
}
/// \brief Return true if the string in @a text can safely be written without quotation
static bool is_unquoted_string(std::string_view text);
protected:
/** @cond */
static constexpr uint8_t kCharTraitsTable[128] = {
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
14,
15,
14,
14,
14,
15,
15,
14,
15,
15,
15,
15,
15,
15,
15,
15, // 2
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
10,
15,
15,
15,
15, // 3
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15, // 4
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
14,
15,
14,
15,
14, // 5
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15, // 6
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
15,
0, // 7
};
enum class CIFToken
{
UNKNOWN,
END_OF_FILE,
DATA,
LOOP,
GLOBAL,
SAVE_,
SAVE_NAME,
STOP,
ITEM_NAME,
VALUE_INAPPLICABLE,
VALUE_UNKNOWN,
VALUE_NUMERIC_INTEGER,
VALUE_NUMERIC_FLOAT,
VALUE_CHARSTRING,
VALUE_TEXTFIELD
};
static constexpr const char *get_token_name(CIFToken token)
{
switch (token)
{
case CIFToken::UNKNOWN: return "Unknown";
case CIFToken::END_OF_FILE: return "Eof";
case CIFToken::DATA: return "DATA";
case CIFToken::LOOP: return "LOOP";
case CIFToken::GLOBAL: return "GLOBAL";
case CIFToken::SAVE_: return "SAVE";
case CIFToken::SAVE_NAME: return "SAVE+name";
case CIFToken::STOP: return "STOP";
case CIFToken::ITEM_NAME:
return "Tag";
// case CIFToken::VALUE: return "Value";
case CIFToken::VALUE_INAPPLICABLE: return "Inapplicable value";
case CIFToken::VALUE_UNKNOWN: return "'Unknown' value (=null)";
case CIFToken::VALUE_NUMERIC_INTEGER: return "Integer value";
case CIFToken::VALUE_NUMERIC_FLOAT: return "Float value";
case CIFToken::VALUE_CHARSTRING: return "Charstring value";
case CIFToken::VALUE_TEXTFIELD: return "Textfield value";
default: return "Invalid token parameter";
}
}
// get_next_char takes the next character from the istream.
// This function also does carriage/linefeed translation.
int get_next_char();
// Put the last read character back into the istream
void retract();
CIFToken get_next_token();
void match(CIFToken token);
/** @endcond */
public:
/** \brief Parse only a single datablock in the string @a datablock
* The start of the datablock is first located and then data
* is parsed up until the next start of a datablock or the end of
* the data.
* */
bool parse_single_datablock(const std::string &datablock);
/** \brief Return an index for all the datablocks found, that is
* the index will contain the names and offsets for each.
*/
datablock_index index_datablocks();
/**
* @brief Parse the datablock named @a datablock
*
* This will first lookup the datablock's offset in the index @a index
* and then start parsing from that location until the next datablock.
*
* @param datablock Name of the datablock to parse
* @param index The index created using index_datablocks
* @return true If the datablock was found
* @return false If the datablock was not found
*/
bool parse_single_datablock(const std::string &datablock, const datablock_index &index);
/**
* @brief Parse the file
*
*/
void parse_file();
protected:
/** @cond */
sac_parser(std::istream &is, bool init = true);
void parse_global();
void parse_datablock();
virtual void parse_save_frame();
void error(const std::string &msg)
{
if (VERBOSE > 0)
std::cerr << "Error parsing mmCIF: " << msg << '\n';
throw parse_error(m_line_nr, msg);
}
void warning(const std::string &msg)
{
if (VERBOSE > 0)
std::cerr << "parser warning at line " << m_line_nr << ": " << msg << '\n';
}
// production methods, these are pure virtual here
virtual void produce_datablock(std::string_view name) = 0;
virtual void produce_category(std::string_view name) = 0;
virtual void produce_row() = 0;
virtual void produce_item(std::string_view category, std::string_view item, item_value value) = 0;
protected:
enum class State
{
Start,
White,
Esc,
Comment,
QuestionMark,
Dot,
QuotedString,
QuotedStringQuote,
UnquotedString,
ItemName,
TextItem,
TextItemNL,
Reserved,
Value,
TextItemBS,
TextItemBS2,
TextItemBSNL,
Numeric_Zero,
Numeric_Integer,
Numeric_Float,
Numeric_Exponent1,
Numeric_Exponent2
};
std::streambuf &m_source;
// Parser state
uint32_t m_line_nr;
bool m_bol;
bool m_backslash_strings = false;
CIFToken m_lookahead;
// token buffer
std::vector<char> m_token_buffer;
std::string_view m_token_value;
int64_t m_token_value_int;
double m_token_value_float;
int m_float_precision;
/** @endcond */
};
// --------------------------------------------------------------------
/**
* @brief An actual implementation of a sac_parser generating data in a file
*
* This parser will create the cif::file, cif::datablock and cif::category
* objects required to contain all data
*/
class parser : public sac_parser
{
public:
/// \brief constructor, generates data into @a file from @a is using validator @a v
parser(std::istream &is, file &file, const validator *v)
: sac_parser(is)
, m_file(file)
, m_validator(v)
{
}
/// \brief constructor, generates data into @a file from @a is
parser(std::istream &is, file &file)
: sac_parser(is)
, m_file(file)
{
}
/** @cond */
void produce_datablock(std::string_view name) override;
void produce_category(std::string_view name) override;
void produce_row() override;
void produce_item(std::string_view category, std::string_view item, item_value value) override;
protected:
file &m_file;
datablock *m_datablock = nullptr;
category *m_category = nullptr;
const validator *m_validator = nullptr;
row_handle m_row;
/** @endcond */
};
} // namespace cif

267
include/cif++/pdb.hpp Normal file
View File

@@ -0,0 +1,267 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2023 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/file.hpp"
#include "cif++/validate.hpp"
#include <system_error>
/**
* @file pdb.hpp
*
* This file presents the API to read and write files in the
* legacy and ancient PDB format.
*
* The code works on the basis of best effort since it is
* impossible to have correct round trip fidelity.
*
*/
namespace cif::pdb
{
/// --------------------------------------------------------------------
// PDB to mmCIF
/** @brief Read a file in either mmCIF or PDB format from file @a file,
* compressed or not, depending on the content.
*/
file read(const std::filesystem::path &file);
/** @brief Read a file in either mmCIF or PDB format from std::istream @a is,
* compressed or not, depending on the content.
*/
file read(std::istream &is);
/**
* @brief Read a file in legacy PDB format from std::istream @a is and
* put the data into @a cifFile
*/
file read_pdb_file(std::istream &pdbFile);
// mmCIF to PDB
/** @brief Write out the data in @a db in legacy PDB format
* to std::ostream @a os
*/
void write(std::ostream &os, const datablock &db);
/** @brief Write out the data in @a f in legacy PDB format
* to std::ostream @a os
*/
inline void write(std::ostream &os, const file &f)
{
write(os, f.front());
}
/** @brief Write out the data in @a db to file @a file
* in legacy PDB format or mmCIF format, depending on the
* filename extension.
*
* If extension of @a file is *.gz* the resulting file will
* be written in gzip compressed format.
*/
void write(const std::filesystem::path &file, const datablock &db);
/** @brief Write out the data in @a f to file @a file
* in legacy PDB format or mmCIF format, depending on the
* filename extension.
*
* If extension of @a file is *.gz* the resulting file will
* be written in gzip compressed format.
*/
inline void write(const std::filesystem::path &p, const file &f)
{
write(p, f.front());
}
// --------------------------------------------------------------------
/**
* @brief Quickly fix a PDB file that lacks some often needed categories
*
* This differs from reconstruct_pdbx which does a much more thorough job
*
* \param pdbx_file The cif::file that hopefully contains some valid data
*/
void fixup_pdbx(file &pdbx_file);
/**
* @brief Quickly fix a PDB file that lacks some often needed categories
*
* This differs from reconstruct_pdbx which does a much more thorough job
*
* \param pdbx_file The cif::file that hopefully contains some valid data
* \param v The validator to use
*/
void fixup_pdbx(file &pdbx_file, const validator &v);
/** \brief Reconstruct all missing categories for an assumed PDBx file.
*
* Some people believe that simply dumping some atom records is enough.
*
* This version uses the audit_conform information and falls back to
* using mmcif_pdbx.dic if not specified.
*
* \param pdbx_file The cif::file that hopefully contains some valid data
* \result Returns true if the resulting file is valid
*/
bool reconstruct_pdbx(file &pdbx_file);
/** \brief Reconstruct all missing categories for an assumed PDBx file.
*
* Some people believe that simply dumping some atom records is enough.
*
* \param pdbx_file The cif::file that hopefully contains some valid data
* \param v The validator to use
* \result Returns true if the resulting file is valid
*/
bool reconstruct_pdbx(file &pdbx_file, const validator &v);
/** \brief This is an extension to cif::validator, use the logic in common
* PDBx files to see if the file is internally consistent.
*
* This function for now checks if the following categories are consistent:
*
* atom_site -> pdbx_poly_seq_scheme -> entity_poly_seq -> entity_poly -> entity
*
* Use the common \ref cif::VERBOSE flag to turn on diagnostic messages.
*
* This function throws a std::system_error in case of an error
*
* \param pdbx_file The input file
* \result Returns true if the file was valid and consistent
*/
bool is_valid_pdbx_file(const file &pdbx_file);
/** \brief This is an extension to cif::validator, use the logic in common
* PDBx files to see if the file is internally consistent.
*
* This function for now checks if the following categories are consistent:
*
* atom_site -> pdbx_poly_seq_scheme -> entity_poly_seq -> entity_poly -> entity
*
* Use the common \ref cif::VERBOSE flag to turn on diagnostic messages.
*
* This function throws a std::system_error in case of an error
*
* \param pdbx_file The input file
* \param v The validator to use
* \result Returns true if the file was valid and consistent
*/
bool is_valid_pdbx_file(const file &pdbx_file, const validator &v);
/** \brief This is an extension to cif::validator, use the logic in common
* PDBx files to see if the file is internally consistent.
*
* This function for now checks if the following categories are consistent:
*
* atom_site -> pdbx_poly_seq_scheme -> entity_poly_seq -> entity_poly -> entity
*
* Use the common \ref cif::VERBOSE flag to turn on diagnostic messages.
*
* The dictionary is assumed to be specified in the file or to be the
* default mmcif_pdbx.dic dictionary.
*
* \param file The input file
* \result Returns true if the file was valid and consistent
*/
bool is_valid_pdbx_file(const file &pdbx_file, std::error_code &ec);
/** \brief This is an extension to cif::validator, use the logic in common
* PDBx files to see if the file is internally consistent.
*
* This function for now checks if the following categories are consistent:
*
* atom_site -> pdbx_poly_seq_scheme -> entity_poly_seq -> entity_poly -> entity
*
* Use the common \ref cif::VERBOSE flag to turn on diagnostic messages.
*
* \param file The input file
* \param v The validator to use
* \param ec The error_code in case something was wrong
* \result Returns true if the file was valid and consistent
*/
bool is_valid_pdbx_file(const file &pdbx_file, const validator &v,
std::error_code &ec);
// --------------------------------------------------------------------
// Other I/O related routines
/** @brief Return the HEADER line for the data in @a data
*
* The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program.
*
* @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned
*/
std::string get_HEADER_line(const datablock &data, std::string::size_type truncate_at = 127);
/** @brief Return the COMPND line for the data in @a data
*
* The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program.
*
* @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned
*/
std::string get_COMPND_line(const datablock &data, std::string::size_type truncate_at = 127);
/** @brief Return the SOURCE line for the data in @a data
*
* The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program.
*
* @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned
*/
std::string get_SOURCE_line(const datablock &data, std::string::size_type truncate_at = 127);
/** @brief Return the AUTHOR line for the data in @a data
*
* The line returned should be compatible with the legacy PDB
* format and is e.g. used in the DSSP program.
*
* @param data The datablock to use as source for the requested data
* @param truncate_at The maximum length of the line returned
*/
std::string get_AUTHOR_line(const datablock &data, std::string::size_type truncate_at = 127);
} // namespace cif::pdb

981
include/cif++/point.hpp Normal file
View File

@@ -0,0 +1,981 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <array>
#include <cmath>
#include <complex>
#include <cstdint>
#include <cstdlib>
#include <format>
#include <functional>
#include <limits>
#include <numbers>
#include <optional>
#include <ostream>
#include <tuple>
#include <type_traits>
#include <utility>
#include <valarray>
#include <vector>
#if __has_include(<clipper/core/coords.h>)
# define HAVE_LIBCLIPPER 1
# pragma GCC diagnostic push
# pragma GCC diagnostic ignored "-Wignored-qualifiers"
# include <clipper/core/clipper_types.h>
# include <clipper/core/coords.h>
# pragma GCC diagnostic pop
#endif
/** \file point.hpp
*
* This file contains the definition for *cif::point* as well as
* lots of routines and classes that can manipulate points.
*/
namespace cif
{
// --------------------------------------------------------------------
/// \brief Our value for Pi
const double
kPI = std::numbers::pi;
// --------------------------------------------------------------------
/**
* @brief A stripped down quaternion implementation, based on boost::math::quaternion
*
* We use quaternions to do rotations in 3d space. Quaternions are faster than
* matrix calculations and they also suffer less from drift caused by rounding
* errors.
*
* Like complex number, quaternions do have a meaningful notion of "real part",
* but unlike them there is no meaningful notion of "imaginary part".
* Instead there is an "unreal part" which itself is a quaternion, and usually
* nothing simpler (as opposed to the complex number case).
* However, for practicality, there are accessors for the other components
* (these are necessary for the templated copy constructor, for instance).
*
* @note Quaternion multiplication is *NOT* commutative;
* symbolically, "q *= rhs;" means "q = q * rhs;"
* and "q /= rhs;" means "q = q * inverse_of(rhs);"
*/
template <typename T>
class quaternion_type
{
public:
/// \brief the value type of the elements, usually this is float
using value_type = T;
/// \brief constructor with the four members
constexpr explicit quaternion_type(value_type const &value_a = {}, value_type const &value_b = {}, value_type const &value_c = {}, value_type const &value_d = {})
: a(value_a)
, b(value_b)
, c(value_c)
, d(value_d)
{
}
/// \brief constructor taking two complex values as input
constexpr explicit quaternion_type(std::complex<value_type> const &z0, std::complex<value_type> const &z1 = std::complex<value_type>())
: a(z0.real())
, b(z0.imag())
, c(z1.real())
, d(z1.imag())
{
}
constexpr quaternion_type(quaternion_type const &) = default; ///< Copy constructor
constexpr quaternion_type(quaternion_type &&) = default; ///< Copy constructor
/// \brief Copy constructor accepting a quaternion with a different value_type
template <typename X>
constexpr explicit quaternion_type(quaternion_type<X> const &rhs)
: a(static_cast<value_type>(rhs.a))
, b(static_cast<value_type>(rhs.b))
, c(static_cast<value_type>(rhs.c))
, d(static_cast<value_type>(rhs.d))
{
}
// accessors
/// \brief See class description, return the *real* part of the quaternion
[[nodiscard]] constexpr value_type real() const
{
return a;
}
/// \brief See class description, return the *unreal* part of the quaternion
[[nodiscard]] constexpr quaternion_type unreal() const
{
return { 0, b, c, d };
}
/// \brief swap
constexpr void swap(quaternion_type &o)
{
std::swap(a, o.a);
std::swap(b, o.b);
std::swap(c, o.c);
std::swap(d, o.d);
}
// assignment operators
/// \brief Assignment operator accepting a quaternion with optionally another value_type
template <typename X>
constexpr quaternion_type &operator=(quaternion_type<X> const &rhs)
{
a = static_cast<value_type>(rhs.a);
b = static_cast<value_type>(rhs.b);
c = static_cast<value_type>(rhs.c);
d = static_cast<value_type>(rhs.d);
return *this;
}
/// \brief Assignment operator
constexpr quaternion_type &operator=(quaternion_type const &rhs) = default;
/// \brief Assignment operator that sets the *real* part to @a rhs and the *unreal* parts to zero
constexpr quaternion_type &operator=(value_type const &rhs)
{
a = rhs;
b = c = d = static_cast<value_type>(0);
return *this;
}
/// \brief Assignment operator that sets the *real* part to the real part of @a rhs
/// and the first *unreal* part to the imaginary part of of @a rhs. The other *unreal*
// parts are set to zero.
constexpr quaternion_type &operator=(std::complex<value_type> const &rhs)
{
a = rhs.real();
b = rhs.imag();
c = d = static_cast<value_type>(0);
return *this;
}
// other assignment-related operators
/// \brief operator += adding value @a rhs to the *real* part
constexpr quaternion_type &operator+=(value_type const &rhs)
{
a += rhs;
return *this;
}
/// \brief operator += adding the real part of @a rhs to the *real* part
/// and the imaginary part of @a rhs to the first *unreal* part
constexpr quaternion_type &operator+=(std::complex<value_type> const &rhs)
{
a += std::real(rhs);
b += std::imag(rhs);
return *this;
}
/// \brief operator += adding the parts of @a rhs to the equivalent part of this
template <class X>
constexpr quaternion_type &operator+=(quaternion_type<X> const &rhs)
{
a += rhs.a;
b += rhs.b;
c += rhs.c;
d += rhs.d;
return *this;
}
/// \brief operator -= subtracting value @a rhs from the *real* part
constexpr quaternion_type &operator-=(value_type const &rhs)
{
a -= rhs;
return *this;
}
/// \brief operator -= subtracting the real part of @a rhs from the *real* part
/// and the imaginary part of @a rhs from the first *unreal* part
constexpr quaternion_type &operator-=(std::complex<value_type> const &rhs)
{
a -= std::real(rhs);
b -= std::imag(rhs);
return *this;
}
/// \brief operator -= subtracting the parts of @a rhs from the equivalent part of this
template <class X>
constexpr quaternion_type &operator-=(quaternion_type<X> const &rhs)
{
a -= rhs.a;
b -= rhs.b;
c -= rhs.c;
d -= rhs.d;
return *this;
}
/// \brief multiply all parts with value @a rhs
constexpr quaternion_type &operator*=(value_type const &rhs)
{
a *= rhs;
b *= rhs;
c *= rhs;
d *= rhs;
return *this;
}
/// \brief multiply with complex number @a rhs
constexpr quaternion_type &operator*=(std::complex<value_type> const &rhs)
{
value_type ar = rhs.real();
value_type br = rhs.imag();
quaternion_type result(a * ar - b * br, a * br + b * ar, c * ar + d * br, -c * br + d * ar);
swap(result);
return *this;
}
/// \brief multiply @a a with @a b and return the result
friend constexpr quaternion_type operator*(const quaternion_type &a, const quaternion_type &b)
{
auto result = a;
result *= b;
return result;
}
/// \brief multiply with quaternion @a rhs
template <typename X>
constexpr quaternion_type &operator*=(quaternion_type<X> const &rhs)
{
auto ar = static_cast<value_type>(rhs.a);
auto br = static_cast<value_type>(rhs.b);
auto cr = static_cast<value_type>(rhs.c);
auto dr = static_cast<value_type>(rhs.d);
quaternion_type result(a * ar - b * br - c * cr - d * dr, a * br + b * ar + c * dr - d * cr, a * cr - b * dr + c * ar + d * br, a * dr + b * cr - c * br + d * ar);
swap(result);
return *this;
}
/// \brief divide all parts by @a rhs
constexpr quaternion_type &operator/=(value_type const &rhs)
{
a /= rhs;
b /= rhs;
c /= rhs;
d /= rhs;
return *this;
}
/// \brief divide by complex number @a rhs
constexpr quaternion_type &operator/=(std::complex<value_type> const &rhs)
{
value_type ar = rhs.real();
value_type br = rhs.imag();
value_type denominator = ar * ar + br * br;
quaternion_type result((+a * ar + b * br) / denominator, (-a * br + b * ar) / denominator, (+c * ar - d * br) / denominator, (+c * br + d * ar) / denominator);
swap(result);
return *this;
}
/// \brief divide by quaternion @a rhs
template <typename X>
constexpr quaternion_type &operator/=(quaternion_type<X> const &rhs)
{
auto ar = static_cast<value_type>(rhs.a);
auto br = static_cast<value_type>(rhs.b);
auto cr = static_cast<value_type>(rhs.c);
auto dr = static_cast<value_type>(rhs.d);
value_type denominator = ar * ar + br * br + cr * cr + dr * dr;
quaternion_type result((+a * ar + b * br + c * cr + d * dr) / denominator, (-a * br + b * ar - c * dr + d * cr) / denominator, (-a * cr + b * dr + c * ar - d * br) / denominator, (-a * dr - b * cr + c * br + d * ar) / denominator);
swap(result);
return *this;
}
/// \brief normalise the values so that the length of the result is exactly 1
friend constexpr quaternion_type normalize(quaternion_type q)
{
std::valarray<value_type> t(4);
t[0] = q.a;
t[1] = q.b;
t[2] = q.c;
t[3] = q.d;
t *= t;
value_type length = std::sqrt(t.sum());
if (length > 0.001)
q /= static_cast<value_type>(length);
else
q = quaternion_type(1, 0, 0, 0);
return q;
}
/// \brief return the conjugate of this
friend constexpr quaternion_type conj(quaternion_type q)
{
return quaternion_type{ +q.a, -q.b, -q.c, -q.d };
}
[[nodiscard]] constexpr value_type get_a() const { return a; } ///< Return part a
[[nodiscard]] constexpr value_type get_b() const { return b; } ///< Return part b
[[nodiscard]] constexpr value_type get_c() const { return c; } ///< Return part c
[[nodiscard]] constexpr value_type get_d() const { return d; } ///< Return part d
/// \brief compare with @a rhs
constexpr bool operator==(const quaternion_type &rhs) const
{
return a == rhs.a and b == rhs.b and c == rhs.c and d == rhs.d;
}
/// \brief compare with @a rhs
constexpr bool operator!=(const quaternion_type &rhs) const
{
return a != rhs.a or b != rhs.b or c != rhs.c or d != rhs.d;
}
/// \brief test for all zero values
constexpr explicit operator bool() const
{
return a != 0 or b != 0 or c != 0 or d != 0;
}
/// \brief for debugging e.g.
friend std::ostream &operator<<(std::ostream &os, const quaternion_type &rhs)
{
os << std::format("{{ a: {}, b: {}, c: {}, d: {} }}", rhs.a, rhs.b, rhs.c, rhs.d);
return os;
}
private:
value_type a, b, c, d;
};
/**
* @brief This code is similar to the code in boost so I copy the documentation as well:
*
* > spherical is a simple transposition of polar, it takes as inputs a (positive)
* > magnitude and a point on the hypersphere, given by three angles. The first of
* > these, theta has a natural range of -pi to +pi, and the other two have natural
* > ranges of -pi/2 to +pi/2 (as is the case with the usual spherical coordinates in
* > **R**<sup>3</sup>). Due to the many symmetries and periodicities, nothing untoward happens if
* > the magnitude is negative or the angles are outside their natural ranges. The
* > expected degeneracies (a magnitude of zero ignores the angles settings...) do
* > happen however.
*/
template <typename T>
inline quaternion_type<T> spherical(T const &rho, T const &theta, T const &phi1, T const &phi2)
{
T cos_phi1 = std::cos(phi1);
T cos_phi2 = std::cos(phi2);
T a = std::cos(theta) * cos_phi1 * cos_phi2;
T b = std::sin(theta) * cos_phi1 * cos_phi2;
T c = std::sin(phi1) * cos_phi2;
T d = std::sin(phi2);
quaternion_type result(a, b, c, d);
result *= rho;
return result;
}
/// \brief By default we use the float version of a quaternion
using quaternion = quaternion_type<float>;
// --------------------------------------------------------------------
/**
* @brief 3D point: a location with x, y and z coordinates as floating point.
*
* Note that you can simply use structured binding to get access to the
* individual parts like so:
*
* @code{.cpp}
* float x, y, z;
* tie(x, y, z) = atom.get_location();
* @endcode
*/
template <typename F>
struct point_type
{
/// \brief the value type of the x, y and z members
using value_type = F;
value_type m_x, ///< The x part of the location
m_y, ///< The y part of the location
m_z; ///< The z part of the location
/// \brief default constructor, initialises the values to zero
constexpr point_type()
: m_x(0)
, m_y(0)
, m_z(0)
{
}
/// \brief constructor taking three values
constexpr point_type(value_type x, value_type y, value_type z)
: m_x(x)
, m_y(y)
, m_z(z)
{
}
/// \brief Copy constructor
template <typename PF>
constexpr point_type(const point_type<PF> &pt)
: m_x(static_cast<F>(pt.m_x))
, m_y(static_cast<F>(pt.m_y))
, m_z(static_cast<F>(pt.m_z))
{
}
/// \brief constructor taking a tuple of three values
constexpr point_type(const std::tuple<value_type, value_type, value_type> &pt)
: point_type(std::get<0>(pt), std::get<1>(pt), std::get<2>(pt))
{
}
#if HAVE_LIBCLIPPER
/// \brief Construct a point using the values in clipper coordinate @a pt
constexpr point_type(const clipper::Coord_orth &pt)
: m_x(pt[0])
, m_y(pt[1])
, m_z(pt[2])
{
}
/// \brief Assign a point using the values in clipper coordinate @a rhs
constexpr point_type &operator=(const clipper::Coord_orth &rhs)
{
m_x = rhs[0];
m_y = rhs[1];
m_z = rhs[2];
return *this;
}
#endif
/// \brief Assignment operator
template <typename PF>
constexpr point_type &operator=(const point_type<PF> &rhs)
{
m_x = static_cast<F>(rhs.m_x);
m_y = static_cast<F>(rhs.m_y);
m_z = static_cast<F>(rhs.m_z);
return *this;
}
[[nodiscard]] constexpr value_type &get_x() { return m_x; } ///< Get a reference to x
[[nodiscard]] constexpr value_type get_x() const { return m_x; } ///< Get the value of x
constexpr void set_x(value_type x) { m_x = x; } ///< Set the value of x to @a x
[[nodiscard]] constexpr value_type &get_y() { return m_y; } ///< Get a reference to y
[[nodiscard]] constexpr value_type get_y() const { return m_y; } ///< Get the value of y
constexpr void set_y(value_type y) { m_y = y; } ///< Set the value of y to @a y
[[nodiscard]] constexpr value_type &get_z() { return m_z; } ///< Get a reference to z
[[nodiscard]] constexpr value_type get_z() const { return m_z; } ///< Get the value of z
constexpr void set_z(value_type z) { m_z = z; } ///< Set the value of z to @a z
/// \brief add @a rhs
constexpr point_type &operator+=(const point_type &rhs)
{
m_x += rhs.m_x;
m_y += rhs.m_y;
m_z += rhs.m_z;
return *this;
}
/// \brief add @a d to all members
constexpr point_type &operator+=(value_type d)
{
m_x += d;
m_y += d;
m_z += d;
return *this;
}
/// \brief Add the points @a lhs and @a rhs and return the result
template <typename F2>
friend constexpr auto operator+(const point_type &lhs, const point_type<F2> &rhs)
{
return point_type<std::common_type_t<value_type, F2>>(lhs.m_x + rhs.m_x, lhs.m_y + rhs.m_y, lhs.m_z + rhs.m_z);
}
/// \brief subtract @a rhs
constexpr point_type &operator-=(const point_type &rhs)
{
m_x -= rhs.m_x;
m_y -= rhs.m_y;
m_z -= rhs.m_z;
return *this;
}
/// \brief subtract @a d from all members
constexpr point_type &operator-=(value_type d)
{
m_x -= d;
m_y -= d;
m_z -= d;
return *this;
}
/// \brief Subtract the points @a lhs and @a rhs and return the result
template <typename F2>
friend constexpr auto operator-(const point_type &lhs, const point_type<F2> &rhs)
{
return point_type<std::common_type_t<value_type, F2>>(lhs.m_x - rhs.m_x, lhs.m_y - rhs.m_y, lhs.m_z - rhs.m_z);
}
/// \brief Return the negative copy of @a pt
friend constexpr point_type operator-(const point_type &pt)
{
return point_type(-pt.m_x, -pt.m_y, -pt.m_z);
}
/// \brief multiply all members with @a rhs
constexpr point_type &operator*=(value_type rhs)
{
m_x *= rhs;
m_y *= rhs;
m_z *= rhs;
return *this;
}
/// \brief multiply point @a pt with value @a f and return the result
template <typename F2>
friend constexpr auto operator*(const point_type &pt, F2 f)
{
return point_type<std::common_type_t<value_type, F2>>(pt.m_x * f, pt.m_y * f, pt.m_z * f);
}
/// \brief multiply point @a pt with value @a f and return the result
template <typename F2>
friend constexpr auto operator*(F2 f, const point_type &pt)
{
return point_type<std::common_type_t<value_type, F2>>(pt.m_x * f, pt.m_y * f, pt.m_z * f);
}
/// \brief divide all members by @a rhs
constexpr point_type &operator/=(value_type rhs)
{
m_x /= rhs;
m_y /= rhs;
m_z /= rhs;
return *this;
}
/// \brief divide point @a pt by value @a f and return the result
template <typename F2>
friend constexpr auto operator/(const point_type &pt, F2 f)
{
return point_type<std::common_type_t<value_type, F2>>(pt.m_x / f, pt.m_y / f, pt.m_z / f);
}
/**
* @brief looking at this point as a vector, normalise it which
* means dividing all members by the length making the length
* effectively 1.
*
* @return The previous length of this vector
*/
constexpr value_type normalize()
{
auto length = m_x * m_x + m_y * m_y + m_z * m_z;
if (length > 0)
{
length = std::sqrt(length);
operator/=(length);
}
return length;
}
/// \brief Rotate this point using the quaterion @a q
constexpr void rotate(const quaternion &q)
{
quaternion_type<value_type> p(0, m_x, m_y, m_z);
p = q * p * conj(q);
m_x = p.get_b();
m_y = p.get_c();
m_z = p.get_d();
}
/// \brief Rotate this point using the quaterion @a q by first
/// moving the point to @a pivot and after rotating moving it
/// back
constexpr void rotate(const quaternion &q, point_type pivot)
{
operator-=(pivot);
rotate(q);
operator+=(pivot);
}
#if HAVE_LIBCLIPPER
/// \brief Make it possible to pass a point to clipper functions expecting a clipper coordinate
operator clipper::Coord_orth() const
{
return clipper::Coord_orth(m_x, m_y, m_z);
}
#endif
/// \brief Allow access to this point as if it is a tuple of three const value_type's
constexpr operator std::tuple<const value_type &, const value_type &, const value_type &>() const
{
return std::make_tuple(std::ref(m_x), std::ref(m_y), std::ref(m_z));
}
/// \brief Allow access to this point as if it is a tuple of three value_type's
constexpr operator std::tuple<value_type &, value_type &, value_type &>()
{
return std::make_tuple(std::ref(m_x), std::ref(m_y), std::ref(m_z));
}
#if defined(__cpp_impl_three_way_comparison)
/// \brief a default spaceship operator
constexpr auto operator<=>(const point_type &rhs) const = default;
#else
/// \brief a default equals operator
constexpr bool operator==(const point_type &rhs) const
{
return m_x == rhs.m_x and m_y == rhs.m_y and m_z == rhs.m_z;
}
/// \brief a default not-equals operator
constexpr bool operator!=(const point_type &rhs) const
{
return not operator==(rhs);
}
#endif
// consider point as a vector... perhaps I should rename point?
/// \brief looking at the point as if it is a vector, return the squared length
[[nodiscard]] constexpr value_type length_sq() const
{
return m_x * m_x + m_y * m_y + m_z * m_z;
}
/// \brief looking at the point as if it is a vector, return the length
[[nodiscard]] constexpr value_type length() const
{
return std::sqrt(length_sq());
}
/// \brief Print out the point @a pt to @a os
friend std::ostream &operator<<(std::ostream &os, const point_type &pt)
{
os << '(' << pt.m_x << ',' << pt.m_y << ',' << pt.m_z << ')';
return os;
}
};
/// \brief By default we use points with float value_type
using point = point_type<float>;
// --------------------------------------------------------------------
// several standard 3d operations
/// \brief return the squared distance between points @a a and @a b
template <typename F1, typename F2>
constexpr auto distance_squared(const point_type<F1> &a, const point_type<F2> &b)
{
return (a.m_x - b.m_x) * (a.m_x - b.m_x) +
(a.m_y - b.m_y) * (a.m_y - b.m_y) +
(a.m_z - b.m_z) * (a.m_z - b.m_z);
}
/// \brief return the distance between points @a a and @a b
template <typename F1, typename F2>
constexpr auto distance(const point_type<F1> &a, const point_type<F2> &b)
{
return std::sqrt(
(a.m_x - b.m_x) * (a.m_x - b.m_x) +
(a.m_y - b.m_y) * (a.m_y - b.m_y) +
(a.m_z - b.m_z) * (a.m_z - b.m_z));
}
/// \brief return the dot product between the vectors @a a and @a b
template <typename F1, typename F2>
inline constexpr auto dot_product(const point_type<F1> &a, const point_type<F2> &b)
{
return a.m_x * b.m_x + a.m_y * b.m_y + a.m_z * b.m_z;
}
/// \brief return the cross product between the vectors @a a and @a b
template <typename F1, typename F2>
inline constexpr auto cross_product(const point_type<F1> &a, const point_type<F2> &b)
{
return point_type<std::common_type_t<F1, F2>>(
a.m_y * b.m_z - b.m_y * a.m_z,
a.m_z * b.m_x - b.m_z * a.m_x,
a.m_x * b.m_y - b.m_x * a.m_y);
}
/// \brief return the squared norm of point @a p
template <typename F>
constexpr F norm_squared(const point_type<F> &p)
{
return p.m_x * p.m_x + p.m_y * p.m_y + p.m_z * p.m_z;
}
/// \brief return the norm of point @a p
template <typename F>
constexpr point_type<F> norm(const point_type<F> &p)
{
return std::sqrt(norm_squared(p));
}
/// \brief return the point where two lines intersect, or an empty value if they don't intersect at all
template <typename F>
std::optional<cif::point> line_line_intersection(const point_type<F> &p1,
const point_type<F> &p2, const point_type<F> &p3, const point_type<F> &p4)
{
auto p13 = p1 - p3;
auto p43 = p4 - p3;
if (std::abs(p43.m_x) < std::numeric_limits<F>::epsilon() and std::abs(p43.m_y) < std::numeric_limits<F>::epsilon() and std::abs(p43.m_z) < std::numeric_limits<F>::epsilon())
return {};
auto p21 = p2 - p1;
if (std::abs(p21.m_x) < std::numeric_limits<F>::epsilon() and std::abs(p21.m_y) < std::numeric_limits<F>::epsilon() and std::abs(p21.m_z) < std::numeric_limits<F>::epsilon())
return {};
auto d1343 = cif::dot_product(p43, p13);
auto d4321 = cif::dot_product(p43, p21);
auto d1321 = cif::dot_product(p13, p21);
auto d4343 = cif::dot_product(p43, p43);
auto d2121 = cif::dot_product(p21, p21);
auto denom = d2121 * d4343 - d4321 * d4321;
if (std::abs(denom) < std::numeric_limits<F>::epsilon())
return {};
auto numer = d1343 * d4321 - d1321 * d4343;
auto mua = numer / denom;
auto mub = (d1343 + d4321 * mua) / d4343;
auto pa = p1 + mua * p21;
auto pb = p3 + mub * p43;
return { (pa + pb) / 2 };
}
/// \brief return the angle in degrees between the vectors from point @a p2 to @a p1 and @a p2 to @a p3
template <typename F>
constexpr auto angle(const point_type<F> &p1, const point_type<F> &p2, const point_type<F> &p3)
{
point_type<F> v1 = p1 - p2;
point_type<F> v2 = p3 - p2;
return std::acos(dot_product(v1, v2) / (v1.length() * v2.length())) * 180 / kPI;
}
/// \brief return the dihedral angle in degrees for the four points @a p1, @a p2, @a p3 and @a p4
///
/// See https://en.wikipedia.org/wiki/Dihedral_angle for an explanation of what a dihedral angle is
template <typename F>
constexpr auto dihedral_angle(const point_type<F> &p1, const point_type<F> &p2, const point_type<F> &p3, const point_type<F> &p4)
{
point_type<F> v12 = p1 - p2; // vector from p2 to p1
point_type<F> v43 = p4 - p3; // vector from p3 to p4
point_type<F> z = p2 - p3; // vector from p3 to p2
point_type<F> p = cross_product(z, v12);
point_type<F> x = cross_product(z, v43);
point_type<F> y = cross_product(z, x);
auto u = dot_product(x, x);
auto v = dot_product(y, y);
F result = 360;
if (u > 0 and v > 0)
{
u = dot_product(p, x) / std::sqrt(u);
v = dot_product(p, y) / std::sqrt(v);
if (u != 0 or v != 0)
result = std::atan2(v, u) * static_cast<F>(180 / kPI);
}
return result;
}
/// \brief return the cosinus angle for the four points @a p1, @a p2, @a p3 and @a p4
template <typename F>
constexpr auto cosinus_angle(const point_type<F> &p1, const point_type<F> &p2, const point_type<F> &p3, const point_type<F> &p4)
{
point_type<F> v12 = p1 - p2;
point_type<F> v34 = p3 - p4;
auto x = dot_product(v12, v12) * dot_product(v34, v34);
return x > 0 ? dot_product(v12, v34) / std::sqrt(x) : 0;
}
/// \brief return the distance from point @a p to the line from @a l1 to @a l2
template <typename F>
constexpr auto distance_point_to_line(const point_type<F> &l1, const point_type<F> &l2, const point_type<F> &p)
{
auto line = l2 - l1;
auto p_to_l1 = p - l1;
auto p_to_l2 = p - l2;
auto cross = cross_product(p_to_l1, p_to_l2);
return cross.length() / line.length();
}
/// \brief return the smallest sphere around the points in @a pts
std::tuple<point, float> smallest_sphere_around_points(std::vector<point> pts);
// --------------------------------------------------------------------
/**
* @brief For e.g. simulated annealing, returns a new point that is moved in
* a random direction with a distance randomly chosen from a normal
* distribution with a stddev of offset.
*/
point nudge(point p, float offset);
// --------------------------------------------------------------------
/// \brief Return a quaternion created from angle @a angle and axis @a axis
quaternion construct_from_angle_axis(float angle, point axis);
/// \brief Return a tuple of an angle and an axis for quaternion @a q
std::tuple<double, point> quaternion_to_angle_axis(quaternion q);
/// @brief Given four points and an angle, return the quaternion required to rotate
/// point p4 along the p2-p3 axis and around point p3 to obtain the required within
/// an accuracy of esd
quaternion construct_for_dihedral_angle(point p1, point p2, point p3, point p4,
float angle, float esd);
/// \brief Return the point that is the centroid of all the points in @a pts
point centroid(const std::vector<point> &pts);
/// \brief Move all the points in @a pts so that their centroid is at the origin
/// (0, 0, 0) and return the offset used (the former centroid)
point center_points(std::vector<point> &pts);
/// \brief Returns how the two sets of points \a a and \b b can be aligned
///
/// \param a The first set of points
/// \param b The second set of points
/// \result The quaternion which should be applied to the points in \a a to
/// obtain the best superposition.
quaternion align_points(const std::vector<point> &a, const std::vector<point> &b);
/// \brief The RMSd for the points in \a a and \a b
double RMSd(const std::vector<point> &a, const std::vector<point> &b);
// --------------------------------------------------------------------
/**
* @brief Helper class to generate evenly divided points on a sphere
*
* We use a fibonacci sphere to calculate even distribution of the dots
*
* @tparam N The number of points on the sphere is 2 * N + 1
*/
template <int N>
class spherical_dots
{
public:
/// \brief the number of points
constexpr static int P = 2 * N * 1;
/// \brief the *weight* of the fibonacci sphere
constexpr static double W = (4 * kPI) / P;
/// \brief the internal storage type
using array_type = typename std::array<point, P>;
/// \brief iterator type
using iterator = typename array_type::const_iterator;
/// \brief singleton instance
static spherical_dots &instance()
{
static spherical_dots sInstance;
return sInstance;
}
/// \brief The number of points
[[nodiscard]] std::size_t size() const { return P; }
/// \brief Access a point by index
const point operator[](uint32_t inIx) const { return m_points[inIx]; }
/// \brief iterator pointing to the first point
[[nodiscard]] iterator begin() const { return m_points.begin(); }
/// \brief iterator pointing past the last point
[[nodiscard]] iterator end() const { return m_points.end(); }
/// \brief return the *weight*,
[[nodiscard]] double weight() const { return W; }
spherical_dots()
{
const double
kGoldenRatio = std::numbers::phi;
auto p = m_points.begin();
for (int32_t i = -N; i <= N; ++i)
{
double lat = std::asin((2.0 * i) / P);
double lon = std::fmod(i, kGoldenRatio) * 2 * kPI / kGoldenRatio;
p->m_x = std::sin(lon) * std::cos(lat);
p->m_y = std::cos(lon) * std::cos(lat);
p->m_z = std::sin(lat);
++p;
}
}
private:
array_type m_points;
};
} // namespace cif

595
include/cif++/row.hpp Normal file
View File

@@ -0,0 +1,595 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/item.hpp"
#include <array>
#include <cstddef>
#include <cstdint>
#include <initializer_list>
#include <string>
#include <string_view>
#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>
/**
* @file row.hpp
*
* The class cif::row should be an opaque type. It is used to store the
* internal data per row in a category. You should use cif::row_handle
* to get access to the contents in a row.
*
* One could think of rows as vectors of cif::item. But internally
* that's not the case.
*
* You can access the values of stored items by name or index.
* The return value of operator[] is a reference to a cif::item_value object.
*
* @code {.cpp}
* cif::category &atom_site = my_db["atom_site"];
* cif::row_handle rh = atom_site.front();
*
* // by name:
* std::string name = rh["label_atom_id"].as<std::string>();
*
* // by index:
* uint16_t ix = atom_site.get_item_ix("label_atom_id");
* assert(rh[ix].as<std::string() == name);
* @endcode
*
* There some template magic here to allow easy extracting of data
* from rows. This can be done using cif::tie e.g.:
*
* @code {.cpp}
* std::string name;
* float x, y, z;
*
* cif::tie(name, x, y, z) = rh.get("label_atom_id", "cartn_x", "cartn_y", "cartn_z");
* @endcode
*
* However, a more modern way uses structured binding:
*
* @code {.cpp}
* const auto &[name, x, y, z] = rh.get<std::string,float,float,float>("label_atom_id", "cartn_x", "cartn_y", "cartn_z");
* @endcode
*
*
*
*/
namespace cif
{
class category;
namespace cql
{
struct connection_impl;
}
namespace detail
{
template <typename... C>
struct get_row_result;
}
// --------------------------------------------------------------------
/// \brief the row class, this one is not directly accessible from the outside
class row : public std::vector<item_value>
{
public:
row() = default;
private:
/**
* @brief Return the item_value pointer for item at index @a ix
*/
item_value *get(uint16_t ix)
{
if (ix >= size())
resize(ix + 1);
return &data()[ix];
}
/**
* @brief Return the const item_value pointer for item at index @a ix
*/
[[nodiscard]] const item_value *get(uint16_t ix) const
{
return ix < size() ? &data()[ix] : nullptr;
}
void set(uint16_t ix, item_value v)
{
if (ix >= size())
resize(ix + 1);
operator[](ix) = std::move(v);
}
friend class category;
friend class category_index;
template <bool, typename...>
friend class iterator_impl_base;
row *m_next = nullptr;
};
// --------------------------------------------------------------------
/// \brief row_handle is the way to access data stored in rows
class row_handle
{
public:
/** @cond */
template <bool>
friend struct item_handle_base;
friend class category;
friend class category_index;
friend class row_initializer;
friend class const_row_handle;
template <bool, typename...>
friend class iterator_impl_base;
row_handle() = default;
virtual ~row_handle() = default;
row_handle(const row_handle &) = default;
row_handle(row_handle &&) = default;
row_handle &operator=(const row_handle &) = default;
row_handle &operator=(row_handle &&) = default;
/** @endcond */
/// \brief constructor taking a category @a cat and a row @a r
row_handle(category &cat, row &r)
: m_category(&cat)
, m_row(&r)
{
}
/// \brief return the category this row belongs to
[[nodiscard]] category &get_category() const
{
return *m_category;
}
/// \brief return the row ID
[[nodiscard]] int64_t row_id() const
{
return reinterpret_cast<int64_t>(m_row);
}
/// \brief Return true if the row is empty or uninitialised
[[nodiscard]] bool empty() const
{
return m_category == nullptr or m_row == nullptr;
}
/// \brief convenience method to test for empty()
explicit operator bool() const
{
return not empty();
}
/// \brief return the count of the items
[[nodiscard]] size_t size() const { return m_row->size(); }
/// \brief return a cif::item_handle to the item in item @a item_ix
item_handle operator[](uint16_t item_ix)
{
return { *m_category, *m_row, item_ix };
}
/// \brief return a cif::item_handle to the item in item @a item_ix
const_item_handle operator[](uint16_t item_ix) const
{
return { *m_category, *m_row, item_ix };
}
/// \brief return a cif::item_handle to the item in the item named @a item_name
item_handle operator[](std::string_view item_name)
{
return { *m_category, *m_row, add_item(item_name) };
}
/// \brief return a cif::item_handle to the item in the item named @a item_name
const_item_handle operator[](std::string_view item_name) const
{
return { *m_category, *m_row, get_item_ix(item_name) };
}
/// \brief assign each of the items named in @a values to their respective value
void assign(const std::vector<item> &values, bool updateLinked = true)
{
for (auto &value : values)
assign(value, updateLinked);
}
/** \brief assign the value @a value to the item named @a name
*
* If updateLinked it true, linked records are updated as well.
* That means that if item @a name is part of the link definition
* and the link results in a linked record in another category
* this record in the linked category is updated as well.
*
* If validate is true, which is default, the assigned value is
* checked to see if it conforms to the rules defined in the dictionary
*/
void assign(std::string_view name, item_value value, bool updateLinked, bool validate = true)
{
assign(add_item(name), std::move(value), updateLinked, validate);
}
/** \brief assign the value @a value to item at index @a item
*
* If updateLinked it true, linked records are updated as well.
* That means that if item @a item is part of the link definition
* and the link results in a linked record in another category
* this record in the linked category is updated as well.
*
* If validate is true, which is default, the assigned value is
* checked to see if it conforms to the rules defined in the dictionary
*/
void assign(uint16_t item, item_value value, bool updateLinked, bool validate = true);
/// \brief Return an object that can be used in combination with cif::tie
/// to assign the values for the items @a items
template <typename... C>
[[nodiscard]] auto get(C... items) const
{
return detail::get_row_result<C...>(*this, { get_item_ix(items)... });
}
/// \brief Return a tuple of values of types @a Ts for the items @a items
template <typename... Ts, typename... C>
std::tuple<Ts...> get(C... items) const
requires(sizeof...(Ts) == sizeof...(C) and sizeof...(C) != 1)
{
return detail::get_row_result<Ts...>(*this, { get_item_ix(items)... });
}
/// \brief Get the value of item @a item cast to type @a T
template <typename T>
[[nodiscard]] T get(std::string_view item) const
{
return operator[](get_item_ix(item)).template get<T>();
}
/// \brief compare two rows
bool operator==(const row_handle &rhs) const { return m_category == rhs.m_category and m_row == rhs.m_row; }
/// \brief compare two rows
bool operator!=(const row_handle &rhs) const { return m_category != rhs.m_category or m_row != rhs.m_row; }
protected:
[[nodiscard]] uint16_t get_item_ix(std::string_view name) const;
[[nodiscard]] std::string_view get_item_name(uint16_t ix) const;
friend cql::connection_impl;
[[nodiscard]] auto get_row() const
{
return m_row;
}
// void swap(uint16_t item, row_handle &r) noexcept(false);
// {
// if (not m_category)
// throw std::runtime_error("uninitialized row");
//
// m_category->swap_item(item, *this, b);
// }
category *m_category = nullptr;
row *m_row = nullptr;
private:
uint16_t add_item(std::string_view name);
void assign(const item &i, bool updateLinked)
{
assign(i.name(), i.value(), updateLinked);
}
};
class const_row_handle
{
public:
/** @cond */
template <bool>
friend struct item_handle_base;
friend class category;
friend class category_index;
friend class row_initializer;
template <bool, typename...>
friend class iterator_impl_base;
const_row_handle() = default;
virtual ~const_row_handle() = default;
const_row_handle(const const_row_handle &) = default;
const_row_handle(const_row_handle &&) = default;
const_row_handle &operator=(const const_row_handle &) = default;
const_row_handle &operator=(const_row_handle &&) = default;
const_row_handle(row_handle rh)
: m_category(rh.m_category)
, m_row(rh.m_row)
{
}
/** @endcond */
/// \brief constructor taking a category @a cat and a row @a r
const_row_handle(const category &cat, const row &r)
: m_category(&cat)
, m_row(&r)
{
}
/// \brief return the category this row belongs to
[[nodiscard]] const category &get_category() const
{
return *m_category;
}
/// \brief return the row ID
[[nodiscard]] int64_t row_id() const
{
return reinterpret_cast<int64_t>(m_row);
}
/// \brief Return true if the row is empty or uninitialised
[[nodiscard]] bool empty() const
{
return m_category == nullptr or m_row == nullptr;
}
/// \brief convenience method to test for empty()
explicit operator bool() const
{
return not empty();
}
/// \brief return the count of the items
[[nodiscard]] size_t size() const { return m_row->size(); }
/// \brief return a cif::item_handle to the item in item @a item_ix
const_item_handle operator[](uint16_t item_ix) const
{
return { *m_category, *m_row, item_ix };
}
/// \brief return a cif::item_handle to the item in the item named @a item_name
const_item_handle operator[](std::string_view item_name) const
{
return operator[](get_item_ix(item_name));
}
/// \brief Return an object that can be used in combination with cif::tie
/// to assign the values for the items @a items
template <typename... C>
[[nodiscard]] auto get(C... items) const
{
return detail::get_row_result<C...>(*this, { get_item_ix(items)... });
}
/// \brief Return a tuple of values of types @a Ts for the items @a items
template <typename... Ts, typename... C>
std::tuple<Ts...> get(C... items) const
requires(sizeof...(Ts) == sizeof...(C) and sizeof...(C) != 1)
{
return detail::get_row_result<Ts...>(*this, { get_item_ix(items)... });
}
/// \brief Get the value of item @a item cast to type @a T
template <typename T>
[[nodiscard]] T get(std::string_view item) const
{
return operator[](get_item_ix(item)).template get<T>();
}
/// \brief compare two rows
// bool operator==(const const_row_handle &rhs) const { return m_category == rhs.m_category and m_row == rhs.m_row; }
friend bool operator==(const_row_handle a, const_row_handle b)
{
return a.m_category == b.m_category and a.m_row == b.m_row;
}
/// \brief compare two rows
bool operator!=(const const_row_handle &rhs) const { return m_category != rhs.m_category or m_row != rhs.m_row; }
protected:
[[nodiscard]] uint16_t get_item_ix(std::string_view name) const;
[[nodiscard]] std::string_view get_item_name(uint16_t ix) const;
friend cql::connection_impl;
[[nodiscard]] auto get_row() const
{
return m_row;
}
// void swap(uint16_t item, const_row_handle &r) noexcept(false);
// {
// if (not m_category)
// throw std::runtime_error("uninitialized row");
//
// m_category->swap_item(item, *this, b);
// }
const category *m_category = nullptr;
const row *m_row = nullptr;
};
namespace detail
{
// some helper classes to help create tuple result types
template <typename... C>
struct get_row_result
{
static constexpr std::size_t N = sizeof...(C);
get_row_result(const_row_handle r, std::array<uint16_t, N> &&items)
: m_row(std::move(r))
, m_items(std::move(items))
{
}
const item_handle operator[](uint16_t ix) const
{
return m_row[m_items[ix]];
}
template <typename... Ts>
operator std::tuple<Ts...>() const
requires(N == sizeof...(Ts))
{
return get<Ts...>(std::index_sequence_for<Ts...>{});
}
template <typename... Ts, std::size_t... Is>
[[nodiscard]] std::tuple<Ts...> get(std::index_sequence<Is...>) const
{
return std::tuple<Ts...>{ m_row[m_items[Is]].template get<Ts>()... };
}
const_row_handle m_row;
std::array<uint16_t, N> m_items;
};
// we want to be able to tie some variables to a get_row_result, for this we use tiewraps
template <typename... Ts>
struct tie_wrap
{
tie_wrap(Ts... args)
: m_value(args...)
{
}
template <typename RR>
void operator=(const RR &&rr)
{
// get_row_result will do the conversion, but only if the types
// are compatible. That means the number of parameters to the get()
// of the row should be equal to the number of items in the tuple
// you are trying to tie.
using RType = std::tuple<std::remove_reference_t<Ts>...>;
m_value = static_cast<RType>(rr);
}
std::tuple<Ts...> m_value;
};
} // namespace detail
/// \brief similar to std::tie, assign values to each element in @a v from the
/// result of a get on a row_handle.
template <typename... Ts>
auto tie(Ts &...v)
{
return detail::tie_wrap<Ts &...>(std::forward<Ts &>(v)...);
}
// --------------------------------------------------------------------
/**
* @brief The class row_initializer is a list of cif::item's.
*
* This class is used to construct new rows, it allows to
* group a list of item name and value pairs and pass it
* in one go to the constructing function.
*/
class row_initializer : public std::vector<item>
{
public:
/** @cond */
friend class category;
row_initializer() = default;
row_initializer(const row_initializer &) = default;
row_initializer(row_initializer &&) = default;
row_initializer &operator=(const row_initializer &) = default;
row_initializer &operator=(row_initializer &&) = default;
/** @endcond */
/// \brief constructor taking a std::initializer_list of items
row_initializer(std::initializer_list<item> items)
: std::vector<item>(items)
{
}
/// \brief constructor taking a range of items
template <typename ItemIter>
row_initializer(ItemIter b, ItemIter e)
requires(std::is_same_v<typename ItemIter::value_type, item>)
: std::vector<item>(b, e)
{
}
/// \brief constructor taking the values of an existing row
row_initializer(row_handle rh)
: cif::row_initializer(const_row_handle{ rh })
{
}
row_initializer(const_row_handle rh);
/// \brief set the value for item name @a name to @a value
void set_value(std::string name, item_value value);
/// \brief set the value for item based on @a i
void set_value(const item &i)
{
set_value(i.name(), i.value());
}
/// \brief set the value for item name @a name to @a value, but only if the item did not have a value already
void set_value_if_empty(std::string name, item_value value);
/// \brief set the value for item @a i, but only if the item did not have a value already
void set_value_if_empty(const item &i)
{
set_value_if_empty(i.name(), i.value());
}
};
} // namespace cif

543
include/cif++/symmetry.hpp Normal file
View File

@@ -0,0 +1,543 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/exports.hpp"
#include "cif++/matrix.hpp"
#include "cif++/point.hpp"
#include <array>
#include <cstdint>
#include <string>
#if defined(__cpp_impl_three_way_comparison)
# include <utility>
#endif
/** \file cif++/symmetry.hpp
*
* This file contains code to do symmetry operations based on the
* operations as specified in the International Tables.
*/
namespace cif
{
// --------------------------------------------------------------------
/// \brief Apply matrix transformation @a m on point @a pt and return the result
inline point operator*(const matrix3x3<float> &m, const point &pt)
{
return {
m(0, 0) * pt.m_x + m(0, 1) * pt.m_y + m(0, 2) * pt.m_z,
m(1, 0) * pt.m_x + m(1, 1) * pt.m_y + m(1, 2) * pt.m_z,
m(2, 0) * pt.m_x + m(2, 1) * pt.m_y + m(2, 2) * pt.m_z
};
}
// --------------------------------------------------------------------
/// \brief the space groups we know
enum class space_group_name
{
full, ///< The *full* spacegroup
xHM, ///< The *xHM* spacegroup
Hall ///< The *Hall* spacegroup
};
/// \brief For each known spacegroup we define a structure like this
struct space_group
{
const char *name; ///< The name according to *full*
const char *xHM; ///< The name according to *xHM*
const char *Hall; ///< The name according to *Hall*
int nr; ///< The number for this spacegroup
};
/// \brief Global list of spacegroups
extern CIFPP_EXPORT const space_group kSpaceGroups[];
/// \brief Global for the size of the list of spacegroups
extern CIFPP_EXPORT const std::size_t kNrOfSpaceGroups;
// --------------------------------------------------------------------
/**
* @brief Helper class to efficiently pack the data that
* makes up a symmetry operation
*
*/
struct symop_data
{
/// \brief constructor
constexpr symop_data(const std::array<int, 15> &data)
: m_packed((data[0] bitand 0x03ULL) << 34 bitor
(data[1] bitand 0x03ULL) << 32 bitor
(data[2] bitand 0x03ULL) << 30 bitor
(data[3] bitand 0x03ULL) << 28 bitor
(data[4] bitand 0x03ULL) << 26 bitor
(data[5] bitand 0x03ULL) << 24 bitor
(data[6] bitand 0x03ULL) << 22 bitor
(data[7] bitand 0x03ULL) << 20 bitor
(data[8] bitand 0x03ULL) << 18 bitor
(data[9] bitand 0x07ULL) << 15 bitor
(data[10] bitand 0x07ULL) << 12 bitor
(data[11] bitand 0x07ULL) << 9 bitor
(data[12] bitand 0x07ULL) << 6 bitor
(data[13] bitand 0x07ULL) << 3 bitor
(data[14] bitand 0x07ULL) << 0)
{
}
/// \brief compare
bool operator==(const symop_data &rhs) const
{
return m_packed == rhs.m_packed;
}
/// \brief sorting order
bool operator<(const symop_data &rhs) const
{
return m_packed < rhs.m_packed;
}
/// \brief return an int representing the value stored in the two bits at offset @a offset
[[nodiscard]] inline constexpr int unpack3(int offset) const
{
int result = static_cast<int>((m_packed >> offset) bitand 0x03);
return result == 3 ? -1 : result;
}
/// \brief return an int representing the value stored in the three bits at offset @a offset
[[nodiscard]] inline constexpr int unpack7(int offset) const
{
return static_cast<int>((m_packed >> offset) bitand 0x07);
}
/// \brief return an array of 15 ints representing the values stored
[[nodiscard]] constexpr std::array<int, 15> data() const
{
return {
unpack3(34),
unpack3(32),
unpack3(30),
unpack3(28),
unpack3(26),
unpack3(24),
unpack3(22),
unpack3(20),
unpack3(18),
unpack7(15),
unpack7(12),
unpack7(9),
unpack7(6),
unpack7(3),
unpack7(0)
};
}
private:
friend struct symop_datablock;
const uint64_t kPackMask = (~0ULL >> (64 - 36));
symop_data(uint64_t v)
: m_packed(v bitand kPackMask)
{
}
uint64_t m_packed;
};
/**
* @brief For each symmetry operator defined in the international tables
* we have an entry in this struct type. It contains the spacegroup
* number, the symmetry operations and the rotational number.
*/
struct symop_datablock
{
/// \brief constructor
constexpr symop_datablock(int spacegroup, int rotational_number, const std::array<int, 15> &rt_data)
: m_v((spacegroup bitand 0xffffULL) << 48 bitor
(rotational_number bitand 0xffULL) << 40 bitor
symop_data(rt_data).m_packed)
{
}
[[nodiscard]] int spacegroup() const { return m_v >> 48; } ///< Return the spacegroup
[[nodiscard]] symop_data symop() const { return { m_v }; } ///< Return the symmetry operation
[[nodiscard]] uint8_t rotational_number() const { return (m_v >> 40) bitand 0xff; } ///< Return the rotational_number
private:
uint64_t m_v;
};
static_assert(sizeof(symop_datablock) == sizeof(uint64_t), "Size of symop_data is wrong");
/// \brief Global containing the list of known symmetry operations
extern CIFPP_EXPORT const symop_datablock kSymopNrTable[];
/// \brief Size of the list of known symmetry operations
extern CIFPP_EXPORT const std::size_t kSymopNrTableSize;
// --------------------------------------------------------------------
// Some more symmetry related stuff here.
class datablock;
class cell;
class spacegroup;
class rtop;
struct sym_op;
/** @brief A class that encapsulates the symmetry operations as used in PDB files,
* i.e. a rotational number and a translation vector.
*
* The syntax in string format follows the syntax as used in mmCIF files, i.e.
* rotational number followed by underscore and the three translations where 5 is
* no movement.
*
* So the string 1_555 means no symmetry movement at all since the rotational number
* 1 always corresponds to the symmetry operation [x, y, z].
*/
struct sym_op
{
public:
/// \brief constructor
sym_op(uint8_t nr = 1, uint8_t ta = 5, uint8_t tb = 5, uint8_t tc = 5)
: m_nr(nr)
, m_ta(ta)
, m_tb(tb)
, m_tc(tc)
{
}
/// \brief construct a sym_op based on the contents encoded in string @a s
explicit sym_op(std::string_view s);
/** @cond */
sym_op(const sym_op &) = default;
sym_op(sym_op &&) = default;
sym_op &operator=(const sym_op &) = default;
sym_op &operator=(sym_op &&) = default;
/** @endcond */
/// \brief return true if this sym_op is the identity operator
[[nodiscard]] constexpr bool is_identity() const
{
return m_nr == 1 and m_ta == 5 and m_tb == 5 and m_tc == 5;
}
/// \brief quick test for unequal to identity
explicit constexpr operator bool() const
{
return not is_identity();
}
/// \brief return the content encoded in a string
[[nodiscard]] std::string string() const;
#if defined(__cpp_impl_three_way_comparison)
/// \brief a default spaceship operator
constexpr auto operator<=>(const sym_op &rhs) const = default;
#else
/// \brief a default equals operator
constexpr bool operator==(const sym_op &rhs) const
{
return m_nr == rhs.m_nr and m_ta == rhs.m_ta and m_tb == rhs.m_tb and m_tc == rhs.m_tc;
}
/// \brief a default not-equals operator
constexpr bool operator!=(const sym_op &rhs) const
{
return not operator==(rhs);
}
#endif
/// @cond
uint8_t m_nr;
uint8_t m_ta, m_tb, m_tc;
/// @endcond
};
static_assert(sizeof(sym_op) == 4, "Sym_op should be four bytes");
namespace literals
{
/**
* @brief This operator allows you to write code like this:
*
* @code {.cpp}
* using namespace cif::literals;
*
* cif::sym_op so = "1_555"_symop;
* @endcode
*
*/
inline sym_op operator""_symop(const char *text, std::size_t length)
{
return sym_op({ text, length });
}
} // namespace literals
// --------------------------------------------------------------------
// The transformation class
/**
* @brief A class you can use to apply symmetry transformations on points
*
* Transformations consist of two operations, a matrix transformation which
* is often a rotation followed by a translation.
*
* In case the matrix transformation is a pure rotation a quaternion
* is created to do the actual calculations. That's faster and more
* precise.
*/
class transformation
{
public:
/// \brief constructor taking a symop_data object @a data
transformation(const symop_data &data);
/// \brief constructor taking a rotation matrix @a r and a translation vector @a t
transformation(const matrix3x3<float> &r, const cif::point &t);
/** @cond */
transformation(const transformation &) = default;
transformation(transformation &&) = default;
transformation &operator=(const transformation &) = default;
transformation &operator=(transformation &&) = default;
/** @endcond */
/// \brief operator() to perform the transformation on point @a pt and return the result
point operator()(point pt) const
{
if (m_q)
pt.rotate(m_q);
else
pt = m_rotation * pt;
return pt + m_translation;
}
/// \brief return a transformation object that is the result of applying @a rhs after @a lhs
friend transformation operator*(const transformation &lhs, const transformation &rhs);
/// \brief return the inverse transformation for @a t
friend transformation inverse(const transformation &t);
/// \brief return the inverse tranformation for this
transformation operator-() const
{
return inverse(*this);
}
friend class spacegroup;
private:
// Most rotation matrices provided by the International Tables
// are really rotation matrices, in those cases we can construct
// a quaternion. Unfortunately, that doesn't work for all of them
void try_create_quaternion();
matrix3x3<float> m_rotation;
quaternion m_q;
point m_translation;
};
// --------------------------------------------------------------------
// class cell
/**
* @brief The cell class describes the dimensions and angles of a unit cell
* in a crystal
*/
class cell
{
public:
/// \brief constructor
cell(float a, float b, float c, float alpha = 90.f, float beta = 90.f, float gamma = 90.f);
/// \brief constructor that takes the appropriate values from the *cell* category in datablock @a db
cell(const datablock &db);
[[nodiscard]] float get_a() const { return m_a; } ///< return dimension a
[[nodiscard]] float get_b() const { return m_b; } ///< return dimension b
[[nodiscard]] float get_c() const { return m_c; } ///< return dimension c
[[nodiscard]] float get_alpha() const { return m_alpha; } ///< return angle alpha
[[nodiscard]] float get_beta() const { return m_beta; } ///< return angle beta
[[nodiscard]] float get_gamma() const { return m_gamma; } ///< return angle gamma
[[nodiscard]] float get_volume() const; ///< return the calculated volume for this cell
[[nodiscard]] matrix3x3<float> get_orthogonal_matrix() const { return m_orthogonal; } ///< return the matrix to use to transform coordinates from fractional to orthogonal
[[nodiscard]] matrix3x3<float> get_fractional_matrix() const { return m_fractional; } ///< return the matrix to use to transform coordinates from orthogonal to fractional
private:
void init();
float m_a, m_b, m_c, m_alpha, m_beta, m_gamma;
matrix3x3<float> m_orthogonal, m_fractional;
};
// --------------------------------------------------------------------
/// \brief Return the spacegroup number from the *symmetry* category in datablock @a db
int get_space_group_number(const datablock &db);
/// \brief Return the spacegroup number for spacegroup named @a spacegroup
int get_space_group_number(std::string_view spacegroup);
/// \brief Return the spacegroup number for spacegroup named @a spacegroup assuming space_group_name @a type
int get_space_group_number(std::string_view spacegroup, space_group_name type);
/**
* @brief class to encapsulate the list of transformations making up a spacegroup
*
*/
class spacegroup : public std::vector<transformation>
{
public:
/// \brief constructor using the information in the *symmetry* category in datablock @a db
spacegroup(const datablock &db)
: spacegroup(get_space_group_number(db))
{
}
/// \brief constructor using the spacegroup named @a name
spacegroup(std::string_view name)
: spacegroup(get_space_group_number(name))
{
}
/// \brief constructor using the spacegroup named @a name assuming space_group_name @a type
spacegroup(std::string_view name, space_group_name type)
: spacegroup(get_space_group_number(name, type))
{
}
/// \brief constructor using the spacegroup number @a nr
spacegroup(int nr);
[[nodiscard]] int get_nr() const { return m_nr; } ///< Return the nr
[[nodiscard]] std::string get_name() const; ///< Return the name
/** \brief perform a spacegroup operation on point @a pt using
* cell @a c and sym_op @a symop
*/
point operator()(const point &pt, const cell &c, sym_op symop) const;
/** \brief perform an inverse spacegroup operation on point @a pt using
* cell @a c and sym_op @a symop
*/
[[nodiscard]] point inverse(const point &pt, const cell &c, sym_op symop) const;
private:
int m_nr;
std::size_t m_index;
};
// --------------------------------------------------------------------
/**
* @brief A crystal combines a cell and a spacegroup.
*
* The information in cell and spacegroup together make up all
* information you need to do symmetry calculations in a crystal
*/
class crystal
{
public:
/// \brief constructor using the information found in datablock @a db
crystal(const datablock &db)
: m_cell(db)
, m_spacegroup(db)
{
}
/// \brief constructor using cell @a c and spacegroup @a sg
crystal(const cell &c, spacegroup sg)
: m_cell(c)
, m_spacegroup(std::move(sg))
{
}
/** @cond */
crystal(const crystal &) = default;
crystal(crystal &&) = default;
crystal &operator=(const crystal &) = default;
crystal &operator=(crystal &&) = default;
/** @endcond */
[[nodiscard]] const cell &get_cell() const { return m_cell; } ///< Return the cell
[[nodiscard]] const spacegroup &get_spacegroup() const { return m_spacegroup; } ///< Return the spacegroup
/// \brief Return the symmetry copy of point @a pt using symmetry operation @a symop
[[nodiscard]] point symmetry_copy(const point &pt, sym_op symop) const
{
return m_spacegroup(pt, m_cell, symop);
}
/// \brief Return the symmetry copy of point @a pt using the inverse of symmetry operation @a symop
[[nodiscard]] point inverse_symmetry_copy(const point &pt, sym_op symop) const
{
return m_spacegroup.inverse(pt, m_cell, symop);
}
/// \brief Return a tuple consisting of distance, new location and symmetry operation
/// for the point @a b with respect to point @a a.
[[nodiscard]] std::tuple<float, point, sym_op> closest_symmetry_copy(point a, point b) const;
private:
cell m_cell;
spacegroup m_spacegroup;
};
// --------------------------------------------------------------------
// Symmetry operations on points
/// \brief convenience function returning the fractional point @a pt in orthogonal coordinates for cell @a c
inline point orthogonal(const point &pt, const cell &c)
{
return c.get_orthogonal_matrix() * pt;
}
/// \brief convenience function returning the orthogonal point @a pt in fractional coordinates for cell @a c
inline point fractional(const point &pt, const cell &c)
{
return c.get_fractional_matrix() * pt;
}
// --------------------------------------------------------------------
} // namespace cif

392
include/cif++/text.hpp Normal file
View File

@@ -0,0 +1,392 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/exports.hpp"
#include <charconv>
#include <cstddef>
#include <cstdint>
#include <iterator>
#include <set>
#include <sstream>
#include <string>
#include <string_view>
#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>
#if __has_include(<experimental/type_traits>)
# include <experimental/type_traits>
namespace std_experimental = std::experimental;
#else
// A quick hack to work around the missing is_detected in MSVC
namespace std_experimental
{
namespace detail
{
template <class AlwaysVoid, template <class...> class Op, class... Args>
struct detector
{
using value_t = std::false_type;
};
template <template <class...> class Op, class... Args>
struct detector<std::void_t<Op<Args...>>, Op, Args...>
{
using value_t = std::true_type;
};
} // namespace detail
template <template <class...> class Op, class... Args>
using is_detected = typename detail::detector<void, Op, Args...>::value_t;
template <template <class...> class Op, class... Args>
const auto is_detected_v = is_detected<Op, Args...>::value;
} // namespace std_experimental
#endif
/**
* \file text.hpp
*
* Various text manipulating routines
*/
namespace cif
{
// --------------------------------------------------------------------
// some basic utilities: Since we're using ASCII input only, we define for optimisation
// our own case conversion routines.
/// \brief return whether string @a is equal to string @a b ignoring changes in character case
bool iequals(std::string_view a, std::string_view b) noexcept;
/// \brief compare string @a is to string @a b ignoring changes in character case
int icompare(std::string_view a, std::string_view b) noexcept;
/// \brief return whether string @a is equal to string @a b ignoring changes in character case
bool iequals(const char *a, const char *b) noexcept;
/// \brief compare string @a is to string @a b ignoring changes in character case
int icompare(const char *a, const char *b) noexcept;
/// \brief convert the string @a s to lower case in situ
void to_lower(std::string &s);
/// \brief return a lower case copy of string @a s
std::string to_lower_copy(std::string_view s);
/// \brief convert the string @a s to upper case in situ
void to_upper(std::string &s);
/**
* @brief Join the strings in the range [ @a a, @a e ) using
* @a sep as separator
*
* Example usage:
*
* @code {.cpp}
* std::vector<std::string> v{ "aap", "noot", "mies" };
*
* assert(cif::join(v.begin(), v.end(), ", ") == "aap, noot, mies");
* @endcode
*
*/
template <typename IterType>
std::string join(IterType b, IterType e, std::string_view sep)
{
std::ostringstream s;
if (b != e)
{
auto ai = b;
auto ni = std::next(ai);
for (;;)
{
s << *ai;
if (ni == e)
break;
ai = ni;
ni = std::next(ai);
s << sep;
}
}
return s.str();
}
/**
* @brief Join the strings in the array @a arr using @a sep as separator
*
* Example usage:
*
* @code {.cpp}
* std::list<std::string> v{ "aap", "noot", "mies" };
*
* assert(cif::join(v, ", ") == "aap, noot, mies");
* @endcode
*
*/
template <typename V>
std::string join(const V &arr, std::string_view sep)
{
return join(arr.begin(), arr.end(), sep);
}
/**
* @brief Split the string in @a s based on the characters in @a separators
*
* Each of the characters in @a separators induces a split.
*
* When suppress_empty is true, empty strings are not produced in the
* resulting array.
*
* Example:
*
* @code {.cpp}
* auto v = cif::split("aap:noot,,mies", ":,", true);
*
* assert(v == std::vector{"aap", "noot", "mies"});
* @endcode
*
*/
template <typename StringType = std::string_view>
std::vector<StringType> split(std::string_view s, std::string_view separators, bool suppress_empty = false)
{
std::vector<StringType> result;
auto b = s.data();
auto e = b;
while (e != s.data() + s.length())
{
if (separators.find(*e) != std::string_view::npos)
{
if (e > b or not suppress_empty)
result.emplace_back(b, e - b);
b = e = e + 1;
continue;
}
++e;
}
if (e > b or not suppress_empty)
result.emplace_back(b, e - b);
return result;
}
/**
* @brief Replace all occurrences of @a what in string @a s with the string @a with
*
* The string @a with may be empty in which case each occurrence of @a what is simply
* deleted.
*/
void replace_all(std::string &s, std::string_view what, std::string_view with = {});
#if defined(__cpp_lib_starts_ends_with)
/// \brief return whether string @a s starts with @a with
inline bool starts_with(std::string s, std::string_view with)
{
return s.starts_with(with);
}
/// \brief return whether string @a s ends with @a with
inline bool ends_with(std::string_view s, std::string_view with)
{
return s.ends_with(with);
}
#else
/// \brief return whether string @a s starts with @a with
inline bool starts_with(std::string s, std::string_view with)
{
return s.compare(0, with.length(), with) == 0;
}
/// \brief return whether string @a s ends with @a with
inline bool ends_with(std::string_view s, std::string_view with)
{
return s.length() >= with.length() and s.compare(s.length() - with.length(), with.length(), with) == 0;
}
#endif
#if defined(__cpp_lib_string_contains)
/// \brief return whether string @a s contains @a q
inline bool contains(std::string_view s, std::string_view q)
{
return s.contains(q);
}
#else
/// \brief return whether string @a s contains @a q
inline bool contains(std::string_view s, std::string_view q)
{
return s.find(q) != std::string_view::npos;
}
#endif
/// \brief return whether string @a s contains @a q ignoring character case
bool icontains(std::string_view s, std::string_view q);
/// \brief trim white space at the start of string @a s in situ
void trim_left(std::string &s);
/// \brief trim white space at the end of string @a s in situ
void trim_right(std::string &s);
/// \brief trim white space at both the start and the end of string @a s in situ
void trim(std::string &s);
/// \brief return a string trimmed of white space at the start of string @a s
std::string trim_left_copy(std::string_view s);
/// \brief return a string trimmed of white space at the end of string @a s
std::string trim_right_copy(std::string_view s);
/// \brief return a string trimmed of white space at both the start and the end of string @a s
std::string trim_copy(std::string_view s);
// To make life easier, we also define iless and iset using iequals
/// \brief an operator object you can use to compare strings ignoring their character case
struct iless
{
/// \brief return the result of icompare for @a a and @a b
bool operator()(const std::string &a, const std::string &b) const
{
return icompare(a, b) < 0;
}
};
/// iset is a std::set of std::string but with a comparator that
/// ignores character case.
using iset = std::set<std::string, iless>;
// --------------------------------------------------------------------
// This really makes a difference, having our own tolower routines
/// \brief global list containing the lower case version of each ASCII character
extern CIFPP_EXPORT const uint8_t kCharToLowerMap[256];
/// \brief a very fast tolower implementation
inline char tolower(int ch)
{
return static_cast<char>(kCharToLowerMap[static_cast<uint8_t>(ch)]);
}
// --------------------------------------------------------------------
/** \brief return a tuple consisting of the category and item name for @a item_name
*
* The category name is stripped of its leading underscore character.
*
* If no dot character was found, the category name is empty. That's for
* cif 1.0 formatted data.
*/
[[deprecated("use split_item_name instead")]]
std::tuple<std::string, std::string> split_tag_name(std::string_view item_name);
/** \brief return a tuple consisting of the category and item name for @a item_name
*
* The category name is stripped of its leading underscore character.
*
* If no dot character was found, the category name is empty. That's for
* cif 1.0 formatted data.
*/
std::tuple<std::string, std::string> split_item_name(std::string_view item_name);
// --------------------------------------------------------------------
/// \brief generate a cif name, used e.g. to generate asym_id's
std::string cif_id_for_number(int number);
// --------------------------------------------------------------------
/** \brief custom word wrapping routine.
*
* Wrap the text in @a text based on a maximum line width @a width using
* a dynamic programming approach to get the most efficient filling of
* the space.
*/
std::vector<std::string> word_wrap(const std::string &text, std::size_t width);
// --------------------------------------------------------------------
template <typename T>
using from_chars_function = decltype(std::from_chars(std::declval<const char *>(), std::declval<const char *>(), std::declval<T &>()));
template <typename T>
struct std_charconv
{
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
{
return std::from_chars(a, b, d);
}
};
template <typename T, typename = void>
struct ff_charconv;
template <typename T>
struct ff_charconv<T, typename std::enable_if_t<std::is_floating_point_v<T>>>
{
static std::from_chars_result from_chars(const char *a, const char *b, T &v);
};
template <typename T>
using charconv = typename std::conditional_t<std_experimental::is_detected_v<from_chars_function, T>, std_charconv<T>, ff_charconv<T>>;
template <typename T>
constexpr auto from_chars(const char *s, const char *e, T &v)
{
return charconv<T>::from_chars(s, e, v);
}
} // namespace cif

382
include/cif++/utilities.hpp Normal file
View File

@@ -0,0 +1,382 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/exports.hpp"
#include <cstdint>
#include <filesystem>
#include <iostream>
#include <memory>
#include <string>
#include <string_view>
#include <type_traits>
#ifndef STDOUT_FILENO
/// @brief For systems that lack this value
# define STDOUT_FILENO 1
#endif
#ifndef STDERR_FILENO
/// @brief For systems that lack this value
# define STDERR_FILENO 2
#endif
#if _WIN32
# include <io.h>
# define isatty _isatty
#else
# include <unistd.h>
#endif
#if _MSC_VER
# pragma warning(disable : 4996) // unsafe function or variable (strcpy e.g.)
# pragma warning(disable : 4068) // unknown pragma
# pragma warning(disable : 4100) // unreferenced formal parameter
# pragma warning(disable : 4101) // unreferenced local variable
# pragma warning(disable : 4702) // unreachable code (too bad, this one. Happens in for loops)
// Truncation warnings: yes, perhaps, but I think they are okay
# pragma warning(disable : 4244)
# pragma warning(disable : 4267)
# pragma warning(disable : 4305)
# define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING 1
#endif
/** \file utilities.hpp
*
* This file contains code that is very generic in nature like a progress_bar
* and classes you can use to colourise output text.
*/
namespace cif
{
/**
* @brief The global variable VERBOSE contains the level of verbosity
* requested. A value of 0 is normal, with some output on error conditions.
* A value > 0 will result in more output, the higher the value, the more
* output. A value < 0 will make the library silent, even in error
* conditions.
*/
extern CIFPP_EXPORT int VERBOSE;
/// return the git 'build' number
[[nodiscard]] std::string get_version_nr();
/// return the width of the current output terminal, or 80 if it cannot be determined
[[nodiscard]] uint32_t get_terminal_width();
// --------------------------------------------------------------------
namespace colour
{
/// @brief The defined colours
enum colour_type
{
black,
red,
green,
yellow,
blue,
magenta,
cyan,
white,
_unused,
none
};
/// @brief The defined styles
enum style_type
{
bold = 1,
underlined = 4,
blink = 5,
inverse = 7,
regular = 22,
};
namespace detail
{
/**
* @brief Struct for delimited strings.
*/
struct coloured_string_t
{
/**
* @brief Construct a new coloured string t object
*/
coloured_string_t(std::string_view s, colour_type fc, colour_type bc, style_type st)
: m_str(s)
, m_fore_colour(static_cast<int>(fc) + 30)
, m_back_colour(static_cast<int>(bc) + 40)
, m_style(static_cast<int>(st))
{
}
coloured_string_t(coloured_string_t &) = delete;
coloured_string_t &operator=(coloured_string_t &) = delete;
/**
* @brief Write out the string, either coloured or not
*/
template <typename char_type, typename traits_type>
friend std::basic_ostream<char_type, traits_type> &operator<<(
std::basic_ostream<char_type, traits_type> &os, const coloured_string_t &cs)
{
if ((os.rdbuf() == std::cout.rdbuf() and isatty(STDOUT_FILENO)) or (os.rdbuf() == std::cerr.rdbuf() and isatty(STDERR_FILENO)))
{
os << "\033[" << cs.m_fore_colour << ';' << cs.m_style << ';' << cs.m_back_colour << 'm'
<< cs.m_str
<< "\033[0m";
}
else
os << cs.m_str;
return os;
}
/// @cond
std::string_view m_str;
int m_fore_colour, m_back_colour;
int m_style;
/// @endcond
};
} // namespace detail
} // namespace colour
/**
* @brief Manipulator for coloured strings.
*
* When writing out text to the terminal it is often useful to have
* some of the text colourised. But only if the output is really a
* terminal since colouring text is done using escape sequences
* an if output is redirected to a file, these escape sequences end up
* in the file making the real text less easy to read.
*
* The code presented here is rather basic. It mimics the std::quoted
* manipulator in that it will colour a string with optionally
* requested colours and text style.
*
* Example:
*
* @code {.cpp}
* using namespace cif::colour;
* std::cout << cif::coloured("Hello, world!", white, red, bold) << '\n';
* @endcode
* @param str String to quote.
* @param fg Foreground (=text) colour to use
* @param bg Background colour to use
* @param st Text style to use
*/
template <typename T>
requires std::is_assignable_v<std::string_view, T>
inline auto coloured(T str,
colour::colour_type fg, colour::colour_type bg = colour::colour_type::none,
colour::style_type st = colour::style_type::regular)
{
return colour::detail::coloured_string_t(str, fg, bg, st);
}
// --------------------------------------------------------------------
// A progress bar
/**
* @brief A simple progress bar class for terminal based output
*
* Using a progress bar is very convenient for the end user when
* you have long running code. It gives feed back on how fast an
* operation is performed and may give an indication how long it
* will take before it is finished.
*
* Using this cif::progress_bar implementation is straightforward:
*
* @code {.cpp}
* using namespace std::chrono_literals;
*
* cif::progress_bar pb(10, "counting to ten");
*
* for (int i = 1; i <= 10; ++i)
* {
* pb.consumed(1);
* std::this_thread::sleep_for(1s);
* }
*
* @endcode
*
* When the progress_bar is created, it first checks
* to see if stdout is to a real TTY and if the VERBOSE
* flag is not less than zero (quiet mode). If this passes
* a thread is started that waits for updates.
*
* The first two seconds, nothing is written to the screen
* so if the work is finished within those two seconds
* the screen stays clean.
*
* After this time, a progress bar is printed that may look
* like this:
*
* @code
* step 3 ========================-------------------------------- 40% ⢁
* @endcode
*
* The first characters contain the initial action name or
* the message text if it was used afterwards.
*
* The thermometer is made up with '=' and '-' characters.
*
* A percentage is also shown and at the end there is a spinner
* that gives feedback that the program is really still working.
*
* The progress bar is removed if the max has been reached
* or if the progress bar is destructed. If any output has
* been generated, the initial action is printed out along
* with the total time spent.
*/
class progress_bar
{
public:
progress_bar(const progress_bar &) = delete;
progress_bar &operator=(const progress_bar &) = delete;
/**
* @brief Construct a new progress bar object
*
* Progress ranges from 0 (zero) to @a inMax
*
* The action in @a inAction is used for display
*
* @param inMax The maximum value
* @param inAction The description of what is
* going on
*/
progress_bar(int64_t inMax, const std::string &inAction);
/**
* @brief Destroy the progress bar object
*
*/
~progress_bar();
/**
* @brief Notify the progress bar that @a inConsumed
* should be added to the internal progress counter
*/
void consumed(int64_t inConsumed); // consumed is relative
/**
* @brief Notify the progress bar that the internal
* progress counter should be updated to @a inProgress
*/
void progress(int64_t inProgress); // progress is absolute
/**
* @brief Replace the action string in the progress bar
* with @a inMessage
*/
void message(const std::string &inMessage);
/**
* @brief Flush the progress bar to the output stream
*/
void flush();
private:
struct progress_bar_impl *m_impl = nullptr;
};
// --------------------------------------------------------------------
// Resources
/**
* @brief Load a resource from disk or the compiled in resources
*
* @verbatim embed:rst
.. note::
See the :doc:`documentation on resources </resources>` for more information.
@endverbatim
*
* @param name The named resource to load
* @return std::unique_ptr<std::istream> A pointer to the std::istream or empty if not found
*/
std::unique_ptr<std::istream> load_resource(std::filesystem::path name);
/**
* @brief Add a file specified by @a dataFile as the data for resource @a name
*
* @verbatim embed:rst
.. note::
See the :doc:`documentation on resources </resources>` for more information.
@endverbatim
*
* @param name The name of the resource to specify
* @param dataFile Path to a file containing the data
*/
void add_file_resource(const std::string &name, std::filesystem::path dataFile);
/**
* @brief List all the file resources added with cif::add_file_resource.
*
* @param os The std::ostream to write the directories to
*/
void list_file_resources(std::ostream &os);
/**
* @brief Add a directory to the list of search directories. This list is
* searched in a last-in-first-out order.
*
* @verbatim embed:rst
.. note::
See the :doc:`documentation on resources </resources>` for more information.
@endverbatim
*/
void add_data_directory(std::filesystem::path dataDir);
/**
* @brief List all the data directories, for error reporting on missing resources.
*
* @param os The std::ostream to write the directories to
*/
void list_data_directories(std::ostream &os);
} // namespace cif

586
include/cif++/validate.hpp Normal file
View File

@@ -0,0 +1,586 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include "cif++/category.hpp"
#include "cif++/item.hpp"
#include "cif++/text.hpp"
#include <cassert>
#include <iosfwd>
#include <list>
#include <memory>
#include <mutex>
#include <optional>
#include <set>
#include <stdexcept>
#include <string>
#include <string_view>
#include <system_error>
#include <utility>
#include <vector>
/**
* @file validate.hpp
*
* Support for validating mmCIF files based on a dictionary. These dictionaries
* contain information about the categories and items therein, what they may
* contain and how this should be formatted. There's also information on links
* between parent and child categories.
*
*/
namespace cif
{
class category;
struct category_validator;
// --------------------------------------------------------------------
// New: error_code
/**
* @enum validation_error
*
* @brief A stronly typed class containing the error codes reported by @ref cif::validator and friends
*/
enum class validation_error
{
value_does_not_match_rx = 1, /**< The value of an item does not conform to the regular expression specified for it */
value_is_not_in_enumeration_list, /**< The value of an item is not in the list of values allowed */
value_is_not_a_number, /**< The value is not a number */
value_is_not_a_char_string, /**< The value is not a character string */
not_a_known_primitive_type, /**< The type is not a known primitive type */
undefined_category, /**< Category has no definition in the dictionary */
unknown_item, /**< The item is not defined to be part of the category */
incorrect_item_validator, /**< Incorrectly specified validator for item */
missing_mandatory_items, /**< Missing mandatory items */
missing_key_items, /**< An index could not be constructed due to missing key items */
item_not_allowed_in_category, /**< Requested item allowed in category according to dictionary */
empty_file, /**< The file contains no datablocks */
empty_datablock, /**< The datablock contains no categories */
empty_category, /**< The category is empty */
not_valid_pdbx, /**< The file is not a valid PDBx file */
};
/**
* @brief The implementation for @ref validation_category error messages
*
*/
class validation_category_impl : public std::error_category
{
public:
/**
* @brief User friendly name
*
* @return const char*
*/
[[nodiscard]] const char *name() const noexcept override
{
return "cif::validation";
}
/**
* @brief Provide the error message as a string for the error code @a ev
*
* @param ev The error code
* @return std::string
*/
[[nodiscard]] std::string message(int ev) const override
{
switch (static_cast<validation_error>(ev))
{
case validation_error::value_does_not_match_rx:
return "Value in item does not match regular expression";
case validation_error::value_is_not_in_enumeration_list:
return "Value is not in the enumerated list of valid values";
case validation_error::value_is_not_a_number:
return "Value is not a number";
case validation_error::value_is_not_a_char_string:
return "Value is not a character string";
case validation_error::not_a_known_primitive_type:
return "The type is not a known primitive type";
case validation_error::undefined_category:
return "Category has no definition in the dictionary";
case validation_error::unknown_item:
return "Item is not defined to be part of the category";
case validation_error::incorrect_item_validator:
return "Incorrectly specified validator for item";
case validation_error::missing_mandatory_items:
return "Missing mandatory items";
case validation_error::missing_key_items:
return "An index could not be constructed due to missing key items";
case validation_error::item_not_allowed_in_category:
return "Requested item not allowed in category according to dictionary";
case validation_error::empty_file:
return "The file contains no datablocks";
case validation_error::empty_datablock:
return "The datablock contains no categories";
case validation_error::empty_category:
return "The category is empty";
case validation_error::not_valid_pdbx:
return "The file is not a valid PDBx file";
default:
assert(false);
return "unknown error code";
}
}
/**
* @brief Return whether two error codes are equivalent, always false in this case
*
*/
[[nodiscard]] bool equivalent(const std::error_code & /*code*/, int /*condition*/) const noexcept override
{
return false;
}
};
/**
* @brief Return the implementation for the validation_category
*
* @return std::error_category&
*/
inline std::error_category &validation_category()
{
static validation_category_impl instance;
return instance;
}
inline std::error_code make_error_code(validation_error e)
{
return { static_cast<int>(e), validation_category() };
}
inline std::error_condition make_error_condition(validation_error e)
{
return { static_cast<int>(e), validation_category() };
}
// --------------------------------------------------------------------
class validation_exception : public std::runtime_error
{
public:
validation_exception(validation_error err)
: validation_exception(make_error_code(err))
{
}
validation_exception(validation_error err, std::string_view category)
: validation_exception(make_error_code(err), category)
{
}
validation_exception(validation_error err, std::string_view category, std::string_view item)
: validation_exception(make_error_code(err), category, item)
{
}
validation_exception(std::error_code ec);
validation_exception(std::error_code ec, std::string_view category);
validation_exception(std::error_code ec, std::string_view category, std::string_view item);
};
// --------------------------------------------------------------------
/** @brief the primitive types known */
enum class DDL_PrimitiveType
{
Char, ///< Text
UChar, ///< Text that is compared ignoring the character case
Numb ///< Nummeric values
};
/// @brief Return the DDL_PrimitiveType encoded in @a s
DDL_PrimitiveType map_to_primitive_type(std::string_view s);
/// @brief Return the DDL_PrimitiveType encoded in @a s, error reporting variant
DDL_PrimitiveType map_to_primitive_type(std::string_view s, std::error_code &ec) noexcept;
struct regex_impl;
/**
* @brief For each defined type in a dictionary a type_validator is created
*
* A type validator can check if the contents of an item are conforming the
* specification. The check is done using regular expressions.
*
* A type_validator can also be used to compare two values that conform to
* this type. Comparison is of course based on the primitive type.
*
*/
struct type_validator
{
std::string m_name; ///< The name of the type
DDL_PrimitiveType m_primitive_type; ///< The primitive_type of the type
std::shared_ptr<regex_impl> m_rx; ///< The regular expression for the type
type_validator() = delete;
/// @brief Constructor
type_validator(std::string_view name, DDL_PrimitiveType type, std::string_view rx);
/// @brief Copy constructor
type_validator(const type_validator &tv) = default;
/// @brief Move constructor
type_validator(type_validator &&rhs)
{
swap(*this, rhs);
}
/// @brief Move constructor
type_validator &operator=(type_validator rhs)
{
swap(*this, rhs);
return *this;
}
/// @brief Destructor
~type_validator() = default;
friend void swap(type_validator &a, type_validator &b)
{
std::swap(a.m_name, b.m_name);
std::swap(a.m_primitive_type, b.m_primitive_type);
std::swap(a.m_rx, b.m_rx);
}
/// @brief Return the sorting order
bool operator<(const type_validator &rhs) const
{
return icompare(m_name, rhs.m_name) < 0;
}
/// @brief Compare the contents of @a a and @a b based on the
/// primitive type of this type. A value of zero indicates the
/// values are equal. Less than zero means @a a sorts before @a b
/// and a value larger than zero likewise means the opposite
[[nodiscard]] int compare(const item_value &a, const item_value &b) const;
};
/** @brief Item alias, items can be renamed over time
*/
struct item_alias
{
item_alias(std::string alias_name, std::string dictionary, std::string version)
: m_name(std::move(alias_name))
, m_dict(std::move(dictionary))
, m_vers(std::move(version))
{
}
item_alias(const item_alias &) = default;
item_alias &operator=(const item_alias &) = default;
std::string m_name; ///< The alias_name
std::string m_dict; ///< The dictionary in which it was known
std::string m_vers; ///< The version of the dictionary
};
/**
* @brief An item_validator binds a type_validator to an item in
* a category along with other information found in the dictionary.
*
* mmCIF dictionaries may indicate an item is e.g. mandatory or
* consists of a certain list of allowed values. Even default
* values can be provided.
*
*/
struct item_validator
{
std::string m_item_name; ///< The item name
bool m_mandatory; ///< Flag indicating this item is mandatory
const type_validator *m_type; ///< The type for this item
cif::iset m_enums; ///< If filled, the set of allowed values
std::string m_default; ///< If filled, a default value for this item
std::string m_category; ///< The category this item_validator belongs to
std::vector<item_alias> m_aliases; ///< The aliases for this item
/// @brief Compare based on the name
bool operator<(const item_validator &rhs) const
{
return icompare(m_item_name, rhs.m_item_name) < 0;
}
/// @brief Compare based on the name
bool operator==(const item_validator &rhs) const
{
return iequals(m_item_name, rhs.m_item_name);
}
/// @brief Validate value @a value, throws if invalid
void validate_value(const item_value &value) const;
/// @brief Validate value @a value and return potential error in @a ec
bool validate_value(const item_value &value, std::error_code &ec) const noexcept;
/// @brief Validate value @a value and return potential error in @a ec
bool validate_value(std::string_view value, std::error_code &ec) const noexcept;
};
/**
* @brief A validator for categories
*
* Categories can have a key, a set of items that in combination
* should be unique.
*/
struct category_validator
{
std::string m_name; ///< The name of the category
std::vector<std::string> m_keys; ///< The list of items that make up the key
cif::iset m_groups; ///< The category groups this category belongs to
cif::iset m_mandatory_items; ///< The mandatory items for this category
std::vector<item_validator> m_item_validators; ///< The item validators for the items in this category
/// @brief return true if this category sorts before @a rhs
bool operator<(const category_validator &rhs) const
{
return icompare(m_name, rhs.m_name) < 0;
}
/// @brief Add item_validator @a v to the list of item validators
void add_item_validator(item_validator &&v);
/// @brief Return the item_validator for item @a item_name, may return nullptr
[[nodiscard]] const item_validator *get_validator_for_item(std::string_view item_name) const;
/// @brief Return the item_validator for an item that has as alias name @a item_name, may return nullptr
[[nodiscard]] const item_validator *get_validator_for_aliased_item(std::string_view item_name) const;
};
/**
* @brief A validator for links between categories
*
* Links are defined as a set of pairs of item names in a
* parent category and a corresponding item in a child
* category. This means that the size of m_parent_keys
* is always equal to the size of m_child_keys.
*
* Multiple links may be defined between two categories.
*
*/
struct link_validator
{
int m_link_group_id; ///< The link group ID
std::string m_parent_category; ///< The name of the parent category
std::vector<std::string> m_parent_keys; ///< The items in the parent category making up the set of linked items
std::string m_child_category; ///< The name of the child category
std::vector<std::string> m_child_keys; ///< The items in the child category making up the set of linked items
std::string m_link_group_label; ///< The group label assigned to this link
};
// --------------------------------------------------------------------
/**
* @brief The validator class combines all the link, category and item validator classes
*
*/
class validator
{
public:
/**
* @brief Construct a new validator object
*
* @param name The name of the underlying dictionary
*/
validator()
: m_audit_conform("audit_conform")
{
}
/**
* @brief Construct a new validator object
*
* @param name The name of the underlying dictionary
* @param is The data to parse
*/
validator(std::istream &is)
: m_audit_conform("audit_conform")
{
parse(is);
}
/// @brief destructor
~validator() = default;
validator(const validator &rhs) = default;
/// @brief move constructor
validator(validator &&rhs)
{
swap(*this, rhs);
}
validator &operator=(validator rhs)
{
swap(*this, rhs);
return *this;
}
friend void swap(validator &a, validator &b) noexcept;
friend class dictionary_parser;
friend class validator_factory;
/// @brief Parse dictionary in @a is and put content in this validator, optionally extending it
/// @param is The stream containing a valid cif dictionary
void parse(std::istream &is);
/// @brief Add type_validator @a v to the list of type validators
void add_type_validator(type_validator &&v);
/// @brief Return the type validator for @a type_code, may return nullptr
[[nodiscard]] const type_validator *get_validator_for_type(std::string_view type_code) const;
/// @brief Add category_validator @a v to the list of category validators
void add_category_validator(category_validator &&v);
/// @brief Return the category validator for @a category, may return nullptr
[[nodiscard]] const category_validator *get_validator_for_category(std::string_view category) const;
/// @brief Add link_validator @a v to the list of link validators
void add_link_validator(link_validator &&v);
/// @brief Return the list of link validators for which the parent is @a category
[[nodiscard]] std::vector<const link_validator *> get_links_for_parent(std::string_view category) const;
/// @brief Return the list of link validators for which the child is @a category
[[nodiscard]] std::vector<const link_validator *> get_links_for_child(std::string_view category) const;
/// @brief Bottleneck function to report an error in validation
void report_error(validation_error err, bool fatal = true) const
{
report_error(make_error_code(err), fatal);
}
/// @brief Bottleneck function to report an error in validation
void report_error(std::error_code ec, bool fatal = true) const;
/// @brief Bottleneck function to report an error in validation
void report_error(validation_error err, std::string_view category,
std::string_view item, bool fatal = true) const
{
report_error(make_error_code(err), category, item, fatal);
}
/// @brief Bottleneck function to report an error in validation
void report_error(std::error_code ec, std::string_view category,
std::string_view item, bool fatal = true) const;
/// @brief Write out the audit_conform data for this validator
/// @param audit_conform
void fill_audit_conform(category &audit_conform) const;
/// @brief Return true if this validator matches @a audit_conform
[[nodiscard]] bool matches_audit_conform(const category &audit_conform) const;
/// @brief Add info
void append_audit_conform(const std::string &name, const std::optional<std::string> &version);
private:
// name is fully qualified here:
[[nodiscard]] item_validator *get_validator_for_item(std::string_view name) const;
category m_audit_conform;
bool m_strict = false;
std::set<type_validator> m_type_validators;
std::set<category_validator> m_category_validators;
std::vector<link_validator> m_link_validators;
};
// --------------------------------------------------------------------
/**
* @brief Validators are globally unique objects, use the validator_factory
* class to construct them. This class is a singleton.
*/
class validator_factory
{
public:
/// @brief Return the singleton instance
static validator_factory &instance();
/// @brief Return validator with info recorded in @a audit_conform
const validator *get(const category &audit_conform);
/// @brief Return the single-file validator with name @a dictionary_name
/// and the dictionary name may be a set of dictionaries separated by comma
const validator *get(std::string_view dictionary_name);
/// @brief Return validator with info recorded in @a audit_conform
const validator &operator[](const category &audit_conform);
/// @brief Return the single-file validator with name @a dictionary_name
/// and the dictionary name may be a set of dictionaries separated by comma
const validator &operator[](std::string_view dictionary_name);
/// @brief Return true if the version @a found is equal or higher than @a expected for dictionary @a name
static bool check_version(std::string_view name, std::string_view expected, std::string_view found);
/// @brief Add validator @a v to the list of known validators
const validator &add(validator &&v)
{
std::unique_lock lock(m_mutex);
return m_validators.emplace_back(std::move(v));
}
// #if __cplusplus >= 202302L
// /// @brief Return validator with info recorded in @a audit_conform
// static validator &operator[](const category &audit_conform)
// {
// return instance()[audit_conform];
// }
// /// @brief Return the single-file validator with name @a dictionary_name
// /// and the dictionary name may be a set of dictionaries separated by comma
// static validator &operator[](std::string_view dict)
// {
// return instance()[dict];
// }
// #endif
private:
validator_factory() = default;
/// @brief Construct a new validator with name @a name from the data in @a is with at least version @a version if specified
validator construct_validator(std::string_view name, std::optional<std::string> version);
std::mutex m_mutex;
std::list<validator> m_validators;
};
} // namespace cif

View File

@@ -1,12 +0,0 @@
prefix=@prefix@
exec_prefix=@exec_prefix@
libdir=@libdir@
includedir=@includedir@
datalibdir=@datarootdir@/libcifpp
Name: libcifpp
Description: C++ library for the manipulation of mmCIF files.
Version: @PACKAGE_VERSION@
Libs: -L${libdir} -lcifpp -lboost_regex -lboost_iostreams
Cflags: -I${includedir} -pthread

316
pcre2-simple/CMakeLists.txt Normal file
View File

@@ -0,0 +1,316 @@
# SPDX-License-Identifier: BSD-2-Clause
#
# Copyright (c) 2025 Maarten L. Hekkelman
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# A simplified wrapper CMakeLists.txt file for PCRE2
#
# This will generate an OBJECT library so it can be linked into another library
cmake_minimum_required(VERSION 3.25)
include(FetchContent)
project(pcre2s VERSION 1.0.0 LANGUAGES C CXX)
# The original code:
file(DOWNLOAD https://github.com/PCRE2Project/pcre2/releases/download/pcre2-10.46/pcre2-10.46.tar.gz
${CMAKE_CURRENT_BINARY_DIR}/pcre2-code.tgz
EXPECTED_HASH SHA256=8d28d7f2c3b970c3a4bf3776bcbb5adfc923183ce74bc8df1ebaad8c1985bd07)
file(ARCHIVE_EXTRACT INPUT ${CMAKE_CURRENT_BINARY_DIR}/pcre2-code.tgz
DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
set(PCRE2_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/pcre2-10.46)
set(PCRE2_MAJOR 10)
set(PCRE2_MINOR 46)
set(PCRE2_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}")
set(PCRE2_DATE "2024-06-09")
# Some needed configuration options
# option(PCRE2_BUILD_PCRE2_8 "Build 8 bit PCRE2 library" ON)
# option(PCRE2_BUILD_PCRE2_16 "Build 16 bit PCRE2 library" OFF)
# option(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF)
option(PCRE2_STATIC_PIC "Build the static library with the option position independent code enabled." OFF)
set(PCRE2_NEWLINE "LF" CACHE STRING "What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF, NUL)." FORCE)
set_property(CACHE PCRE2_NEWLINE PROPERTY STRINGS "CR" "LF" "CRLF" "ANY" "ANYCRLF" "NUL")
set(PCRE2_LINK_SIZE "2" CACHE STRING "Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
set_property(CACHE PCRE2_LINK_SIZE PROPERTY STRINGS "2" "3" "4")
set(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING "Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
set(PCRE2_HEAP_LIMIT "20000000" CACHE STRING "Default limit on heap memory (kibibytes). See HEAP_LIMIT in config.h.in for details.")
set(PCRE2_MAX_VARLOOKBEHIND "255" CACHE STRING "Default limit on variable lookbehinds.")
set(PCRE2_MATCH_LIMIT "10000000" CACHE STRING "Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
set(PCRE2_MATCH_LIMIT_DEPTH "MATCH_LIMIT" CACHE STRING "Default limit on internal depth of search. See MATCH_LIMIT_DEPTH in config.h.in for details.")
set(PCRE2GREP_BUFSIZE "20480" CACHE STRING "Buffer starting size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details.")
set(PCRE2GREP_MAX_BUFSIZE "1048576" CACHE STRING "Buffer maximum size parameter for pcre2grep. See PCRE2GREP_MAX_BUFSIZE in config.h.in for details.")
set(PCRE2_SUPPORT_JIT OFF CACHE BOOL "Enable support for Just-in-time compiling.")
if(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
set(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL "Enable SELinux compatible execmem allocator in JIT (experimental).")
else()
set(PCRE2_SUPPORT_JIT_SEALLOC IGNORE)
endif()
set(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL "Enable use of Just-in-time compiling in pcre2grep.")
set(PCRE2GREP_SUPPORT_CALLOUT ON CACHE BOOL "Enable callout string support in pcre2grep.")
set(PCRE2GREP_SUPPORT_CALLOUT_FORK ON CACHE BOOL "Enable callout string fork support in pcre2grep.")
set(PCRE2_SUPPORT_UNICODE ON CACHE BOOL "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.")
set(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL "ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
set(PCRE2_NEVER_BACKSLASH_C OFF CACHE BOOL "If ON, backslash-C (upper case C) is locked out.")
set(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL "Enable Valgrind support.")
if(MINGW)
option(NON_STANDARD_LIB_PREFIX "ON=Shared libraries built in mingw will be named pcre2.dll, etc., instead of libpcre2.dll, etc." OFF)
option(NON_STANDARD_LIB_SUFFIX "ON=Shared libraries built in mingw will be named libpcre2-0.dll, etc., instead of libpcre2.dll, etc." OFF)
endif()
#
set(NEWLINE_DEFAULT "")
if(PCRE2_NEWLINE STREQUAL "CR")
set(NEWLINE_DEFAULT "1")
elseif(PCRE2_NEWLINE STREQUAL "LF")
set(NEWLINE_DEFAULT "2")
elseif(PCRE2_NEWLINE STREQUAL "CRLF")
set(NEWLINE_DEFAULT "3")
elseif(PCRE2_NEWLINE STREQUAL "ANY")
set(NEWLINE_DEFAULT "4")
elseif(PCRE2_NEWLINE STREQUAL "ANYCRLF")
set(NEWLINE_DEFAULT "5")
elseif(PCRE2_NEWLINE STREQUAL "NUL")
set(NEWLINE_DEFAULT "6")
else()
message(FATAL_ERROR "The PCRE2_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\".")
endif()
# Some tests
include(CheckCSourceCompiles)
include(CheckFunctionExists)
include(CheckSymbolExists)
include(CheckIncludeFile)
check_include_file(assert.h HAVE_ASSERT_H)
check_include_file(dirent.h HAVE_DIRENT_H)
check_include_file(sys/stat.h HAVE_SYS_STAT_H)
check_include_file(sys/types.h HAVE_SYS_TYPES_H)
check_include_file(unistd.h HAVE_UNISTD_H)
check_include_file(windows.h HAVE_WINDOWS_H)
check_symbol_exists(bcopy "strings.h" HAVE_BCOPY)
check_symbol_exists(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE)
check_symbol_exists(memmove "string.h" HAVE_MEMMOVE)
check_symbol_exists(secure_getenv "stdlib.h" HAVE_SECURE_GETENV)
check_symbol_exists(strerror "string.h" HAVE_STRERROR)
check_c_source_compiles(
"int main(void) { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }"
HAVE_ATTRIBUTE_UNINITIALIZED
)
check_c_source_compiles(
[=[
extern __attribute__ ((visibility ("default"))) int f(void);
int main(void) { return f(); }
int f(void) { return 42; }
]=]
HAVE_VISIBILITY
)
if(HAVE_VISIBILITY)
set(PCRE2_EXPORT [=[__attribute__ ((visibility ("default")))]=])
else()
set(PCRE2_EXPORT)
endif()
check_c_source_compiles("int main(void) { __assume(1); return 0; }" HAVE_BUILTIN_ASSUME)
check_c_source_compiles(
[=[
#include <stddef.h>
int main(void) { int a,b; size_t m; __builtin_mul_overflow(a,b,&m); return 0; }
]=]
HAVE_BUILTIN_MUL_OVERFLOW
)
check_c_source_compiles(
"int main(int c, char *v[]) { if (c) __builtin_unreachable(); return (int)(*v[0]); }"
HAVE_BUILTIN_UNREACHABLE
)
# # Check whether Intel CET is enabled, and if so, adjust compiler flags. This
# # code was written by PH, trying to imitate the logic from the autotools
# # configuration.
# check_c_source_compiles(
# [=[
# #ifndef __CET__
# #error CET is not enabled
# #endif
# int main() { return 0; }
# ]=]
# INTEL_CET_ENABLED
# )
# if(INTEL_CET_ENABLED)
# set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk")
# endif()
# Set up some dependencies first
configure_file(
${PCRE2_SOURCE_DIR}/src/pcre2_chartables.c.dist
${CMAKE_CURRENT_BINARY_DIR}/pcre2_chartables.c
COPYONLY
)
configure_file(
${PCRE2_SOURCE_DIR}/config-cmake.h.in
${CMAKE_CURRENT_BINARY_DIR}/interface/config.h
@ONLY
)
configure_file(
${PCRE2_SOURCE_DIR}/src/pcre2.h.in
${CMAKE_CURRENT_BINARY_DIR}/interface/pcre2.h
@ONLY
)
# Define our library
list(APPEND PCRE2_HEADERS
${CMAKE_CURRENT_BINARY_DIR}/interface/pcre2.h)
list(APPEND PCRE2_SOURCES
${PCRE2_SOURCE_DIR}/src/pcre2_auto_possess.c
${CMAKE_CURRENT_BINARY_DIR}/pcre2_chartables.c
${PCRE2_SOURCE_DIR}/src/pcre2_chkdint.c
${PCRE2_SOURCE_DIR}/src/pcre2_compile.c
${PCRE2_SOURCE_DIR}/src/pcre2_compile_class.c
${PCRE2_SOURCE_DIR}/src/pcre2_config.c
${PCRE2_SOURCE_DIR}/src/pcre2_context.c
${PCRE2_SOURCE_DIR}/src/pcre2_convert.c
${PCRE2_SOURCE_DIR}/src/pcre2_dfa_match.c
${PCRE2_SOURCE_DIR}/src/pcre2_error.c
${PCRE2_SOURCE_DIR}/src/pcre2_extuni.c
${PCRE2_SOURCE_DIR}/src/pcre2_find_bracket.c
${PCRE2_SOURCE_DIR}/src/pcre2_jit_compile.c
${PCRE2_SOURCE_DIR}/src/pcre2_maketables.c
${PCRE2_SOURCE_DIR}/src/pcre2_match.c
${PCRE2_SOURCE_DIR}/src/pcre2_match_data.c
${PCRE2_SOURCE_DIR}/src/pcre2_newline.c
${PCRE2_SOURCE_DIR}/src/pcre2_ord2utf.c
${PCRE2_SOURCE_DIR}/src/pcre2_pattern_info.c
${PCRE2_SOURCE_DIR}/src/pcre2_script_run.c
${PCRE2_SOURCE_DIR}/src/pcre2_serialize.c
${PCRE2_SOURCE_DIR}/src/pcre2_string_utils.c
${PCRE2_SOURCE_DIR}/src/pcre2_study.c
${PCRE2_SOURCE_DIR}/src/pcre2_substitute.c
${PCRE2_SOURCE_DIR}/src/pcre2_substring.c
${PCRE2_SOURCE_DIR}/src/pcre2_tables.c
${PCRE2_SOURCE_DIR}/src/pcre2_ucd.c
${PCRE2_SOURCE_DIR}/src/pcre2_valid_utf.c
${PCRE2_SOURCE_DIR}/src/pcre2_xclass.c
)
add_library(pcre2s OBJECT)
target_sources(pcre2s
PRIVATE ${PCRE2_SOURCES}
PUBLIC
FILE_SET pcre2_headers TYPE HEADERS
BASE_DIRS ${PCRE2_SOURCE_DIR}/include ${CMAKE_CURRENT_BINARY_DIR}/interface
FILES ${PCRE2_HEADERS}
)
target_compile_definitions(pcre2s PUBLIC PCRE2_CODE_UNIT_WIDTH=8 HAVE_CONFIG_H)
if(NOT BUILD_SHARED_LIBS)
target_compile_definitions(pcre2s PUBLIC PCRE2_STATIC)
endif()
target_include_directories(pcre2s PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/interface ${PCRE2_SOURCE_DIR}/src)
if(PCRE2_STATIC_PIC)
set_target_properties(pcre2s PROPERTIES POSITION_INDEPENDENT_CODE 1)
endif()
# # Installation and config files
# include(CMakePackageConfigHelpers)
# include(GenerateExportHeader)
# # Install rules
# install(TARGETS pcre2s
# EXPORT pcre2s
# FILE_SET pcre2_headers DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
# if(MSVC AND BUILD_SHARED_LIBS)
# install(
# FILES $<TARGET_PDB_FILE:pcre2s>
# DESTINATION ${CMAKE_INSTALL_LIBDIR}
# OPTIONAL)
# endif()
# install(EXPORT pcre2s
# NAMESPACE pcre2s::
# FILE "pcre2s-targets.cmake"
# DESTINATION lib/cmake/pcre2s)
# configure_package_config_file(
# ${CMAKE_CURRENT_SOURCE_DIR}/pcre2s-config.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/pcre2s/pcre2s-config.cmake
# INSTALL_DESTINATION lib/cmake/pcre2s)
# install(
# FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2s/pcre2s-config.cmake"
# "${CMAKE_CURRENT_BINARY_DIR}/pcre2s/pcre2s-config-version.cmake"
# DESTINATION lib/cmake/pcre2s)
# set_target_properties(
# pcre2s
# PROPERTIES VERSION ${PCRE2_VERSION}
# SOVERSION ${PCRE2_VERSION}
# INTERFACE_pcre2s_MAJOR_VERSION ${PCRE2_MAJOR})
# set_property(
# TARGET pcre2s
# APPEND
# PROPERTY COMPATIBLE_INTERFACE_STRING pcre2s_MAJOR_VERSION)
# write_basic_package_version_file(
# "${CMAKE_CURRENT_BINARY_DIR}/pcre2s/pcre2s-config-version.cmake"
# VERSION "${PCRE2_VERSION}"
# COMPATIBILITY AnyNewerVersion)
# # Testing
# if(PROJECT_IS_TOP_LEVEL)
# include(CTest)
# if(BUILD_TESTING)
# add_subdirectory(test)
# endif()
# endif()

View File

@@ -2394,3 +2394,414 @@ VAL "Create component" 1999-07-08 RCSB
VAL "Modify descriptor" 2011-06-04 RCSB
#
data_NAG
#
_chem_comp.id NAG
_chem_comp.name 2-acetamido-2-deoxy-beta-D-glucopyranose
_chem_comp.type "D-saccharide, beta linking"
_chem_comp.pdbx_type ATOMS
_chem_comp.formula "C8 H15 N O6"
_chem_comp.mon_nstd_parent_comp_id ?
_chem_comp.pdbx_synonyms
;N-acetyl-beta-D-glucosamine; 2-acetamido-2-deoxy-beta-D-glucose; 2-acetamido-2-deoxy-D-glucose;
2-acetamido-2-deoxy-glucose; N-ACETYL-D-GLUCOSAMINE
;
_chem_comp.pdbx_formal_charge 0
_chem_comp.pdbx_initial_date 1999-07-08
_chem_comp.pdbx_modified_date 2020-07-17
_chem_comp.pdbx_ambiguous_flag N
_chem_comp.pdbx_release_status REL
_chem_comp.pdbx_replaced_by ?
_chem_comp.pdbx_replaces ?
_chem_comp.formula_weight 221.208
_chem_comp.one_letter_code ?
_chem_comp.three_letter_code NAG
_chem_comp.pdbx_model_coordinates_details ?
_chem_comp.pdbx_model_coordinates_missing_flag N
_chem_comp.pdbx_ideal_coordinates_details Corina
_chem_comp.pdbx_ideal_coordinates_missing_flag N
_chem_comp.pdbx_model_coordinates_db_code 8PCH
_chem_comp.pdbx_subcomponent_list ?
_chem_comp.pdbx_processing_site RCSB
# #
loop_
_pdbx_chem_comp_synonyms.ordinal
_pdbx_chem_comp_synonyms.comp_id
_pdbx_chem_comp_synonyms.name
_pdbx_chem_comp_synonyms.provenance
_pdbx_chem_comp_synonyms.type
1 NAG N-acetyl-beta-D-glucosamine PDB ?
2 NAG 2-acetamido-2-deoxy-beta-D-glucose PDB ?
3 NAG 2-acetamido-2-deoxy-D-glucose PDB ?
4 NAG 2-acetamido-2-deoxy-glucose PDB ?
5 NAG N-ACETYL-D-GLUCOSAMINE PDB ?
# #
loop_
_chem_comp_atom.comp_id
_chem_comp_atom.atom_id
_chem_comp_atom.alt_atom_id
_chem_comp_atom.type_symbol
_chem_comp_atom.charge
_chem_comp_atom.pdbx_align
_chem_comp_atom.pdbx_aromatic_flag
_chem_comp_atom.pdbx_leaving_atom_flag
_chem_comp_atom.pdbx_stereo_config
_chem_comp_atom.model_Cartn_x
_chem_comp_atom.model_Cartn_y
_chem_comp_atom.model_Cartn_z
_chem_comp_atom.pdbx_model_Cartn_x_ideal
_chem_comp_atom.pdbx_model_Cartn_y_ideal
_chem_comp_atom.pdbx_model_Cartn_z_ideal
_chem_comp_atom.pdbx_component_atom_id
_chem_comp_atom.pdbx_component_comp_id
_chem_comp_atom.pdbx_ordinal
NAG C1 C1 C 0 1 N N R 7.396 28.163 26.662 0.185 1.082 -0.421 C1 NAG 1
NAG C2 C2 C 0 1 N N R 6.973 29.233 27.644 0.790 -0.220 0.112 C2 NAG 2
NAG C3 C3 C 0 1 N N R 7.667 29.055 29.000 -0.124 -1.390 -0.265 C3 NAG 3
NAG C4 C4 C 0 1 N N S 7.573 27.588 29.490 -1.526 -1.129 0.294 C4 NAG 4
NAG C5 C5 C 0 1 N N R 7.902 26.592 28.373 -2.042 0.207 -0.246 C5 NAG 5
NAG C6 C6 C 0 1 N N N 7.599 25.173 28.797 -3.417 0.504 0.355 C6 NAG 6
NAG C7 C7 C 0 1 N N N 6.291 31.299 26.595 3.197 0.157 0.076 C7 NAG 7
NAG C8 C8 C 0 1 N N N 6.684 32.649 26.036 4.559 -0.052 -0.533 C8 NAG 8
NAG N2 N2 N 0 1 N N N 7.268 30.545 27.089 2.114 -0.422 -0.480 N2 NAG 9
NAG O1 O1 O 0 1 N Y N 6.676 28.363 25.419 1.003 2.185 -0.024 O1 NAG 10
NAG O3 O3 O 0 1 N N N 7.038 29.909 29.947 0.395 -2.600 0.291 O3 NAG 11
NAG O4 O4 O 0 1 N N N 8.494 27.358 30.574 -2.405 -2.180 -0.114 O4 NAG 12
NAG O5 O5 O 0 1 N N N 7.104 26.875 27.206 -1.130 1.248 0.113 O5 NAG 13
NAG O6 O6 O 0 1 N N N 6.232 25.040 29.165 -3.949 1.691 -0.236 O6 NAG 14
NAG O7 O7 O 0 1 N N N 5.114 30.936 26.562 3.074 0.845 1.067 O7 NAG 15
NAG H1 H1 H 0 1 N N N 8.477 28.257 26.481 0.133 1.040 -1.509 H1 NAG 16
NAG H2 H2 H 0 1 N N N 5.888 29.146 27.803 0.879 -0.163 1.197 H2 NAG 17
NAG H3 H3 H 0 1 N N N 8.729 29.321 28.892 -0.174 -1.478 -1.350 H3 NAG 18
NAG H4 H4 H 0 1 N N N 6.544 27.403 29.831 -1.483 -1.091 1.382 H4 NAG 19
NAG H5 H5 H 0 1 N N N 8.971 26.674 28.128 -2.123 0.154 -1.332 H5 NAG 20
NAG H61 H61 H 0 1 N N N 7.816 24.492 27.961 -4.088 -0.333 0.157 H61 NAG 21
NAG H62 H62 H 0 1 N N N 8.232 24.910 29.657 -3.320 0.645 1.431 H62 NAG 22
NAG H81 H81 H 0 1 N N N 5.791 33.159 25.646 4.560 0.320 -1.558 H81 NAG 23
NAG H82 H82 H 0 1 N N N 7.136 33.258 26.833 5.305 0.490 0.050 H82 NAG 24
NAG H83 H83 H 0 1 N N N 7.411 32.511 25.222 4.799 -1.115 -0.532 H83 NAG 25
NAG HN2 HN2 H 0 1 N N N 8.210 30.881 27.079 2.212 -0.973 -1.273 HN2 NAG 26
NAG HO1 HO1 H 0 1 N Y N 6.933 27.696 24.793 0.679 3.044 -0.328 HO1 NAG 27
NAG HO3 HO3 H 0 1 N Y N 7.459 29.809 30.793 -0.135 -3.384 0.091 HO3 NAG 28
NAG HO4 HO4 H 0 1 N Y N 8.425 26.456 30.863 -3.312 -2.079 0.206 HO4 NAG 29
NAG HO6 HO6 H 0 1 N Y N 6.060 24.143 29.428 -4.822 1.940 0.099 HO6 NAG 30
# #
loop_
_chem_comp_bond.comp_id
_chem_comp_bond.atom_id_1
_chem_comp_bond.atom_id_2
_chem_comp_bond.value_order
_chem_comp_bond.pdbx_aromatic_flag
_chem_comp_bond.pdbx_stereo_config
_chem_comp_bond.pdbx_ordinal
NAG C1 C2 SING N N 1
NAG C1 O1 SING N N 2
NAG C1 O5 SING N N 3
NAG C1 H1 SING N N 4
NAG C2 C3 SING N N 5
NAG C2 N2 SING N N 6
NAG C2 H2 SING N N 7
NAG C3 C4 SING N N 8
NAG C3 O3 SING N N 9
NAG C3 H3 SING N N 10
NAG C4 C5 SING N N 11
NAG C4 O4 SING N N 12
NAG C4 H4 SING N N 13
NAG C5 C6 SING N N 14
NAG C5 O5 SING N N 15
NAG C5 H5 SING N N 16
NAG C6 O6 SING N N 17
NAG C6 H61 SING N N 18
NAG C6 H62 SING N N 19
NAG C7 C8 SING N N 20
NAG C7 N2 SING N N 21
NAG C7 O7 DOUB N N 22
NAG C8 H81 SING N N 23
NAG C8 H82 SING N N 24
NAG C8 H83 SING N N 25
NAG N2 HN2 SING N N 26
NAG O1 HO1 SING N N 27
NAG O3 HO3 SING N N 28
NAG O4 HO4 SING N N 29
NAG O6 HO6 SING N N 30
# #
loop_
_pdbx_chem_comp_descriptor.comp_id
_pdbx_chem_comp_descriptor.type
_pdbx_chem_comp_descriptor.program
_pdbx_chem_comp_descriptor.program_version
_pdbx_chem_comp_descriptor.descriptor
NAG SMILES ACDLabs 12.01 "O=C(NC1C(O)C(O)C(OC1O)CO)C"
NAG InChI InChI 1.03 "InChI=1S/C8H15NO6/c1-3(11)9-5-7(13)6(12)4(2-10)15-8(5)14/h4-8,10,12-14H,2H2,1H3,(H,9,11)/t4-,5-,6-,7-,8-/m1/s1"
NAG InChIKey InChI 1.03 OVRNDRQMDRJTHS-FMDGEEDCSA-N
NAG SMILES_CANONICAL CACTVS 3.370 "CC(=O)N[C@H]1[C@H](O)O[C@H](CO)[C@@H](O)[C@@H]1O"
NAG SMILES CACTVS 3.370 "CC(=O)N[CH]1[CH](O)O[CH](CO)[CH](O)[CH]1O"
NAG SMILES_CANONICAL "OpenEye OEToolkits" 1.7.6 "CC(=O)N[C@@H]1[C@H]([C@@H]([C@H](O[C@H]1O)CO)O)O"
NAG SMILES "OpenEye OEToolkits" 1.7.6 "CC(=O)NC1C(C(C(OC1O)CO)O)O"
# #
loop_
_pdbx_chem_comp_identifier.comp_id
_pdbx_chem_comp_identifier.type
_pdbx_chem_comp_identifier.program
_pdbx_chem_comp_identifier.program_version
_pdbx_chem_comp_identifier.identifier
NAG "SYSTEMATIC NAME" ACDLabs 12.01 "2-(acetylamino)-2-deoxy-beta-D-glucopyranose"
NAG "SYSTEMATIC NAME" "OpenEye OEToolkits" 1.7.6 "N-[(2R,3R,4R,5S,6R)-6-(hydroxymethyl)-2,4,5-tris(oxidanyl)oxan-3-yl]ethanamide"
NAG "CONDENSED IUPAC CARBOHYDRATE SYMBOL" GMML 1.0 DGlcpNAcb
NAG "COMMON NAME" GMML 1.0 N-acetyl-b-D-glucopyranosamine
NAG "IUPAC CARBOHYDRATE SYMBOL" PDB-CARE 1.0 b-D-GlcpNAc
NAG "SNFG CARBOHYDRATE SYMBOL" GMML 1.0 GlcNAc
# #
loop_
_pdbx_chem_comp_feature.comp_id
_pdbx_chem_comp_feature.type
_pdbx_chem_comp_feature.value
_pdbx_chem_comp_feature.source
_pdbx_chem_comp_feature.support
NAG "CARBOHYDRATE ISOMER" D PDB ?
NAG "CARBOHYDRATE RING" pyranose PDB ?
NAG "CARBOHYDRATE ANOMER" beta PDB ?
NAG "CARBOHYDRATE PRIMARY CARBONYL GROUP" aldose PDB ?
# #
loop_
_pdbx_chem_comp_audit.comp_id
_pdbx_chem_comp_audit.action_type
_pdbx_chem_comp_audit.date
_pdbx_chem_comp_audit.processing_site
NAG "Create component" 1999-07-08 RCSB
NAG "Modify descriptor" 2011-06-04 RCSB
NAG "Modify leaving atom flag" 2011-07-01 RCSB
NAG "Modify leaving atom flag" 2012-11-26 RCSB
NAG "Other modification" 2019-08-12 RCSB
NAG "Other modification" 2019-12-19 RCSB
NAG "Other modification" 2020-07-03 RCSB
NAG "Modify name" 2020-07-17 RCSB
NAG "Modify synonyms" 2020-07-17 RCSB
##
data_HIS
#
_chem_comp.id HIS
_chem_comp.name HISTIDINE
_chem_comp.type "L-PEPTIDE LINKING"
_chem_comp.pdbx_type ATOMP
_chem_comp.formula "C6 H10 N3 O2"
_chem_comp.mon_nstd_parent_comp_id ?
_chem_comp.pdbx_synonyms ?
_chem_comp.pdbx_formal_charge 1
_chem_comp.pdbx_initial_date 1999-07-08
_chem_comp.pdbx_modified_date 2011-06-04
_chem_comp.pdbx_ambiguous_flag N
_chem_comp.pdbx_release_status REL
_chem_comp.pdbx_replaced_by ?
_chem_comp.pdbx_replaces ?
_chem_comp.formula_weight 156.162
_chem_comp.one_letter_code H
_chem_comp.three_letter_code HIS
_chem_comp.pdbx_model_coordinates_details ?
_chem_comp.pdbx_model_coordinates_missing_flag N
_chem_comp.pdbx_ideal_coordinates_details "OpenEye/OEToolkits V1.4.2"
_chem_comp.pdbx_ideal_coordinates_missing_flag N
_chem_comp.pdbx_model_coordinates_db_code ?
_chem_comp.pdbx_subcomponent_list ?
_chem_comp.pdbx_processing_site EBI
#
loop_
_chem_comp_atom.comp_id
_chem_comp_atom.atom_id
_chem_comp_atom.alt_atom_id
_chem_comp_atom.type_symbol
_chem_comp_atom.charge
_chem_comp_atom.pdbx_align
_chem_comp_atom.pdbx_aromatic_flag
_chem_comp_atom.pdbx_leaving_atom_flag
_chem_comp_atom.pdbx_stereo_config
_chem_comp_atom.model_Cartn_x
_chem_comp_atom.model_Cartn_y
_chem_comp_atom.model_Cartn_z
_chem_comp_atom.pdbx_model_Cartn_x_ideal
_chem_comp_atom.pdbx_model_Cartn_y_ideal
_chem_comp_atom.pdbx_model_Cartn_z_ideal
_chem_comp_atom.pdbx_component_atom_id
_chem_comp_atom.pdbx_component_comp_id
_chem_comp_atom.pdbx_ordinal
HIS N N N 0 1 N N N 33.472 42.685 -4.610 -0.040 -1.210 0.053 N HIS 1
HIS CA CA C 0 1 N N S 33.414 41.686 -5.673 1.172 -1.709 0.652 CA HIS 2
HIS C C C 0 1 N N N 33.773 42.279 -7.040 1.083 -3.207 0.905 C HIS 3
HIS O O O 0 1 N N N 33.497 43.444 -7.337 0.040 -3.770 1.222 O HIS 4
HIS CB CB C 0 1 N N N 32.005 41.080 -5.734 1.484 -0.975 1.962 CB HIS 5
HIS CG CG C 0 1 Y N N 31.888 39.902 -6.651 2.940 -1.060 2.353 CG HIS 6
HIS ND1 ND1 N 1 1 Y N N 32.539 38.710 -6.414 3.380 -2.075 3.129 ND1 HIS 7
HIS CD2 CD2 C 0 1 Y N N 31.199 39.734 -7.804 3.960 -0.251 2.046 CD2 HIS 8
HIS CE1 CE1 C 0 1 Y N N 32.251 37.857 -7.382 4.693 -1.908 3.317 CE1 HIS 9
HIS NE2 NE2 N 0 1 Y N N 31.439 38.453 -8.237 5.058 -0.801 2.662 NE2 HIS 10
HIS OXT OXT O 0 1 N Y N 34.382 41.455 -7.879 2.247 -3.882 0.744 OXT HIS 11
HIS H H H 0 1 N N N 33.485 42.227 -3.721 -0.102 -1.155 -0.950 H HIS 12
HIS H2 HN2 H 0 1 N Y N 34.301 43.234 -4.714 -0.715 -0.741 0.634 H2 HIS 13
HIS HA HA H 0 1 N N N 34.155 40.908 -5.439 1.965 -1.558 -0.089 HA HIS 14
HIS HB2 1HB H 0 1 N N N 31.733 40.750 -4.721 1.215 0.087 1.879 HB2 HIS 15
HIS HB3 2HB H 0 1 N N N 31.337 41.860 -6.127 0.859 -1.368 2.775 HB3 HIS 16
HIS HD1 HD1 H 0 1 N N N 33.135 38.521 -5.633 2.828 -2.838 3.511 HD1 HIS 17
HIS HD2 HD2 H 0 1 N N N 30.577 40.470 -8.292 4.108 0.647 1.479 HD2 HIS 18
HIS HE1 HE1 H 0 1 N N N 32.618 36.844 -7.461 5.340 -2.550 3.892 HE1 HIS 19
HIS HE2 HE2 H 0 1 N N N 31.061 38.039 -9.065 6.002 -0.428 2.627 HE2 HIS 20
HIS HXT HXT H 0 1 N Y N 34.553 41.905 -8.698 2.188 -4.848 0.901 HXT HIS 21
#
loop_
_chem_comp_bond.comp_id
_chem_comp_bond.atom_id_1
_chem_comp_bond.atom_id_2
_chem_comp_bond.value_order
_chem_comp_bond.pdbx_aromatic_flag
_chem_comp_bond.pdbx_stereo_config
_chem_comp_bond.pdbx_ordinal
HIS N CA SING N N 1
HIS N H SING N N 2
HIS N H2 SING N N 3
HIS CA C SING N N 4
HIS CA CB SING N N 5
HIS CA HA SING N N 6
HIS C O DOUB N N 7
HIS C OXT SING N N 8
HIS CB CG SING N N 9
HIS CB HB2 SING N N 10
HIS CB HB3 SING N N 11
HIS CG ND1 SING Y N 12
HIS CG CD2 DOUB Y N 13
HIS ND1 CE1 DOUB Y N 14
HIS ND1 HD1 SING N N 15
HIS CD2 NE2 SING Y N 16
HIS CD2 HD2 SING N N 17
HIS CE1 NE2 SING Y N 18
HIS CE1 HE1 SING N N 19
HIS NE2 HE2 SING N N 20
HIS OXT HXT SING N N 21
#
loop_
_pdbx_chem_comp_descriptor.comp_id
_pdbx_chem_comp_descriptor.type
_pdbx_chem_comp_descriptor.program
_pdbx_chem_comp_descriptor.program_version
_pdbx_chem_comp_descriptor.descriptor
HIS SMILES ACDLabs 10.04 "O=C(O)C(N)Cc1cnc[nH+]1"
HIS SMILES_CANONICAL CACTVS 3.341 "N[C@@H](Cc1c[nH]c[nH+]1)C(O)=O"
HIS SMILES CACTVS 3.341 "N[CH](Cc1c[nH]c[nH+]1)C(O)=O"
HIS SMILES_CANONICAL "OpenEye OEToolkits" 1.5.0 "c1c([nH+]c[nH]1)C[C@@H](C(=O)O)N"
HIS SMILES "OpenEye OEToolkits" 1.5.0 "c1c([nH+]c[nH]1)CC(C(=O)O)N"
HIS InChI InChI 1.03 "InChI=1S/C6H9N3O2/c7-5(6(10)11)1-4-2-8-3-9-4/h2-3,5H,1,7H2,(H,8,9)(H,10,11)/p+1/t5-/m0/s1"
HIS InChIKey InChI 1.03 HNDVDQJCIGZPNO-YFKPBYRVSA-O
#
loop_
_pdbx_chem_comp_identifier.comp_id
_pdbx_chem_comp_identifier.type
_pdbx_chem_comp_identifier.program
_pdbx_chem_comp_identifier.program_version
_pdbx_chem_comp_identifier.identifier
HIS "SYSTEMATIC NAME" ACDLabs 10.04 "3-(1H-imidazol-3-ium-4-yl)-L-alanine"
HIS "SYSTEMATIC NAME" "OpenEye OEToolkits" 1.5.0 "(2S)-2-amino-3-(1H-imidazol-3-ium-4-yl)propanoic acid"
#
loop_
_pdbx_chem_comp_audit.comp_id
_pdbx_chem_comp_audit.action_type
_pdbx_chem_comp_audit.date
_pdbx_chem_comp_audit.processing_site
HIS "Create component" 1999-07-08 EBI
HIS "Modify descriptor" 2011-06-04 RCSB
#
data_HOH
#
_chem_comp.id HOH
_chem_comp.name WATER
_chem_comp.type NON-POLYMER
_chem_comp.pdbx_type HETAS
_chem_comp.formula "H2 O"
_chem_comp.mon_nstd_parent_comp_id ?
_chem_comp.pdbx_synonyms ?
_chem_comp.pdbx_formal_charge 0
_chem_comp.pdbx_initial_date 1999-07-08
_chem_comp.pdbx_modified_date 2011-06-04
_chem_comp.pdbx_ambiguous_flag N
_chem_comp.pdbx_release_status REL
_chem_comp.pdbx_replaced_by ?
_chem_comp.pdbx_replaces MTO
_chem_comp.formula_weight 18.015
_chem_comp.one_letter_code ?
_chem_comp.three_letter_code HOH
_chem_comp.pdbx_model_coordinates_details ?
_chem_comp.pdbx_model_coordinates_missing_flag N
_chem_comp.pdbx_ideal_coordinates_details ?
_chem_comp.pdbx_ideal_coordinates_missing_flag N
_chem_comp.pdbx_model_coordinates_db_code 1NHE
_chem_comp.pdbx_subcomponent_list ?
_chem_comp.pdbx_processing_site RCSB
# #
loop_
_chem_comp_atom.comp_id
_chem_comp_atom.atom_id
_chem_comp_atom.alt_atom_id
_chem_comp_atom.type_symbol
_chem_comp_atom.charge
_chem_comp_atom.pdbx_align
_chem_comp_atom.pdbx_aromatic_flag
_chem_comp_atom.pdbx_leaving_atom_flag
_chem_comp_atom.pdbx_stereo_config
_chem_comp_atom.model_Cartn_x
_chem_comp_atom.model_Cartn_y
_chem_comp_atom.model_Cartn_z
_chem_comp_atom.pdbx_model_Cartn_x_ideal
_chem_comp_atom.pdbx_model_Cartn_y_ideal
_chem_comp_atom.pdbx_model_Cartn_z_ideal
_chem_comp_atom.pdbx_component_atom_id
_chem_comp_atom.pdbx_component_comp_id
_chem_comp_atom.pdbx_ordinal
HOH O O O 0 1 N N N -23.107 18.401 -21.626 -0.064 0.000 0.000 O HOH 1
HOH H1 1H H 0 1 N N N -22.157 18.401 -21.626 0.512 0.000 -0.776 H1 HOH 2
HOH H2 2H H 0 1 N N N -23.424 18.401 -20.730 0.512 0.000 0.776 H2 HOH 3
# #
loop_
_chem_comp_bond.comp_id
_chem_comp_bond.atom_id_1
_chem_comp_bond.atom_id_2
_chem_comp_bond.value_order
_chem_comp_bond.pdbx_aromatic_flag
_chem_comp_bond.pdbx_stereo_config
_chem_comp_bond.pdbx_ordinal
HOH O H1 SING N N 1
HOH O H2 SING N N 2
# #
loop_
_pdbx_chem_comp_descriptor.comp_id
_pdbx_chem_comp_descriptor.type
_pdbx_chem_comp_descriptor.program
_pdbx_chem_comp_descriptor.program_version
_pdbx_chem_comp_descriptor.descriptor
HOH SMILES ACDLabs 10.04 O
HOH SMILES_CANONICAL CACTVS 3.341 O
HOH SMILES CACTVS 3.341 O
HOH SMILES_CANONICAL "OpenEye OEToolkits" 1.5.0 O
HOH SMILES "OpenEye OEToolkits" 1.5.0 O
HOH InChI InChI 1.03 InChI=1S/H2O/h1H2
HOH InChIKey InChI 1.03 XLYOFNOQVPJJNP-UHFFFAOYSA-N
# #
loop_
_pdbx_chem_comp_identifier.comp_id
_pdbx_chem_comp_identifier.type
_pdbx_chem_comp_identifier.program
_pdbx_chem_comp_identifier.program_version
_pdbx_chem_comp_identifier.identifier
HOH "SYSTEMATIC NAME" ACDLabs 10.04 water
HOH "SYSTEMATIC NAME" "OpenEye OEToolkits" 1.5.0 oxidane
# #
loop_
_pdbx_chem_comp_audit.comp_id
_pdbx_chem_comp_audit.action_type
_pdbx_chem_comp_audit.date
_pdbx_chem_comp_audit.processing_site
HOH "Create component" 1999-07-08 RCSB
HOH "Modify descriptor" 2011-06-04 RCSB
##

150867
rsrc/mmcif_ma.dic Normal file

File diff suppressed because it is too large Load Diff

174427
rsrc/mmcif_pdbx.dic Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

2080
sql-92.bnf Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,627 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <fstream>
#include <algorithm>
#include <mutex>
#include "cif++/Cif++.hpp"
#include "cif++/Compound.hpp"
#include "cif++/CifUtils.hpp"
#include "cif++/BondMap.hpp"
namespace mmcif
{
namespace
{
union IDType
{
IDType() : id_n(0){}
IDType(const IDType& rhs) : id_n(rhs.id_n) {}
IDType(const std::string& s)
: IDType()
{
assert(s.length() <= 4);
if (s.length() > 4)
throw BondMapException("Atom ID '" + s + "' is too long");
std::copy(s.begin(), s.end(), id_s);
}
IDType& operator=(const IDType& rhs)
{
id_n = rhs.id_n;
return *this;
}
IDType& operator=(const std::string& s)
{
id_n = 0;
assert(s.length() <= 4);
if (s.length() > 4)
throw BondMapException("Atom ID '" + s + "' is too long");
std::copy(s.begin(), s.end(), id_s);
return *this;
}
bool operator<(const IDType& rhs) const
{
return id_n < rhs.id_n;
}
bool operator<=(const IDType& rhs) const
{
return id_n <= rhs.id_n;
}
bool operator==(const IDType& rhs) const
{
return id_n == rhs.id_n;
}
bool operator!=(const IDType& rhs) const
{
return id_n != rhs.id_n;
}
char id_s[4];
uint32_t id_n;
};
static_assert(sizeof(IDType) == 4, "atom_id_type should be 4 bytes");
}
// // --------------------------------------------------------------------
// void createBondInfoFile(const fs::path& components, const fs::path& infofile)
// {
// std::ofstream outfile(infofile.string() + ".tmp", std::ios::binary);
// if (not outfile.is_open())
// throw BondMapException("Could not create bond info file " + infofile.string() + ".tmp");
// cif::File infile(components);
// std::set<atom_id_type> atomIDs;
// std::vector<atom_id_type> compoundIDs;
// for (auto& db: infile)
// {
// auto chem_comp_bond = db.get("chem_comp_bond");
// if (not chem_comp_bond)
// {
// if (cif::VERBOSE > 1)
// std::cerr << "Missing chem_comp_bond category in data block " << db.getName() << std::endl;
// continue;
// }
// for (const auto& [atom_id_1, atom_id_2]: chem_comp_bond->rows<std::string,std::string>({"atom_id_1", "atom_id_2"}))
// {
// atomIDs.insert(atom_id_1);
// atomIDs.insert(atom_id_2);
// }
// compoundIDs.push_back({ db.getName() });
// }
// if (cif::VERBOSE)
// std::cout << "Number of unique atom names is " << atomIDs.size() << std::endl
// << "Number of unique residue names is " << compoundIDs.size() << std::endl;
// CompoundBondInfoFileHeader header = {};
// header.indexEntries = compoundIDs.size();
// header.atomEntries = atomIDs.size();
// outfile << header;
// for (auto atomID: atomIDs)
// outfile << atomID;
// auto dataOffset = outfile.tellp();
// std::vector<CompoundBondInfo> entries;
// entries.reserve(compoundIDs.size());
// std::map<atom_id_type, uint16_t> atomIDMap;
// for (auto& atomID: atomIDs)
// atomIDMap[atomID] = atomIDMap.size();
// for (auto& db: infile)
// {
// auto chem_comp_bond = db.get("chem_comp_bond");
// if (not chem_comp_bond)
// continue;
// std::set<uint16_t> bondedAtoms;
// for (const auto& [atom_id_1, atom_id_2]: chem_comp_bond->rows<std::string,std::string>({"atom_id_1", "atom_id_2"}))
// {
// bondedAtoms.insert(atomIDMap[atom_id_1]);
// bondedAtoms.insert(atomIDMap[atom_id_2]);
// }
// std::map<uint16_t, int32_t> bondedAtomMap;
// for (auto id: bondedAtoms)
// bondedAtomMap[id] = static_cast<int32_t>(bondedAtomMap.size());
// CompoundBondInfo info = {
// db.getName(),
// static_cast<uint32_t>(bondedAtomMap.size()),
// outfile.tellp() - dataOffset
// };
// entries.push_back(info);
// // An now first write the array of atom ID's in this compound
// for (uint16_t id: bondedAtoms)
// write(outfile, id);
// // And then the symmetric matrix with bonds
// size_t N = bondedAtoms.size();
// size_t M = (N * (N - 1)) / 2;
// size_t K = M / 8;
// if (M % 8)
// K += 1;
// std::vector<uint8_t> m(K);
// for (const auto& [atom_id_1, atom_id_2]: chem_comp_bond->rows<std::string,std::string>({"atom_id_1", "atom_id_2"}))
// {
// auto a = bondedAtomMap[atomIDMap[atom_id_1]];
// auto b = bondedAtomMap[atomIDMap[atom_id_2]];
// assert(a != b);
// assert((int)b < (int)N);
// if (a > b)
// std::swap(a, b);
// size_t ix = ((b - 1) * b) / 2 + a;
// assert(ix < M);
// auto Bix = ix / 8;
// auto bix = ix % 8;
// m[Bix] |= 1 << bix;
// }
// outfile.write(reinterpret_cast<char*>(m.data()), m.size());
// }
// header.dataSize = outfile.tellp() - dataOffset;
// std::sort(entries.begin(), entries.end(), [](CompoundBondInfo& a, CompoundBondInfo& b)
// {
// return a.id < b.id;
// });
// for (auto& info: entries)
// outfile << info;
// outfile.seekp(0);
// outfile << header;
// // compress
// outfile.close();
// std::ifstream in(infofile.string() + ".tmp", std::ios::binary);
// std::ofstream out(infofile, std::ios::binary);
// {
// io::filtering_stream<io::output> os;
// os.push(io::gzip_compressor());
// os.push(out);
// io::copy(in, os);
// }
// in.close();
// out.close();
// fs::remove(infofile.string() + ".tmp");
// }
// --------------------------------------------------------------------
struct CompoundBondInfo
{
IDType mID;
std::set<std::tuple<uint32_t,uint32_t>> mBonded;
bool bonded(uint32_t a1, uint32_t a2) const
{
return mBonded.count({ a1, a2 }) > 0;
}
};
// --------------------------------------------------------------------
class CompoundBondMap
{
public:
static CompoundBondMap &instance()
{
static std::unique_ptr<CompoundBondMap> s_instance(new CompoundBondMap);
return *s_instance;
}
bool bonded(const std::string& compoundID, const std::string& atomID1, const std::string& atomID2);
private:
CompoundBondMap() {}
uint32_t getAtomID(const std::string& atomID)
{
IDType id(atomID);
uint32_t result;
auto i = mAtomIDIndex.find(id);
if (i == mAtomIDIndex.end())
{
result = uint32_t(mAtomIDIndex.size());
mAtomIDIndex[id] = result;
}
else
result = i->second;
return result;
}
std::map<IDType,uint32_t> mAtomIDIndex;
std::vector<CompoundBondInfo> mCompounds;
std::mutex mMutex;
};
bool CompoundBondMap::bonded(const std::string &compoundID, const std::string& atomID1, const std::string& atomID2)
{
std::lock_guard lock(mMutex);
using namespace std::literals;
IDType id(compoundID);
uint32_t a1 = getAtomID(atomID1);
uint32_t a2 = getAtomID(atomID2);
if (a1 > a2)
std::swap(a1, a2);
for (auto &bi: mCompounds)
{
if (bi.mID != id)
continue;
return bi.bonded(a1, a2);
}
bool result = false;
// not found in our cache, calculate
CompoundBondInfo bondInfo{ id };
auto compound = mmcif::CompoundFactory::instance().create(compoundID);
if (not compound)
std::cerr << "Missing compound bond info for " << compoundID << std::endl;
else
{
for (auto &atom: compound->bonds())
{
uint32_t ca1 = getAtomID(atom.atomID[0]);
uint32_t ca2 = getAtomID(atom.atomID[1]);
if (ca1 > ca2)
std::swap(ca1, ca2);
bondInfo.mBonded.insert({ca1, ca2});
result = result or (a1 == ca1 and a2 == ca2);
}
}
mCompounds.push_back(bondInfo);
return result;
}
// --------------------------------------------------------------------
BondMap::BondMap(const Structure& p)
{
auto& compoundBondInfo = CompoundBondMap::instance();
auto atoms = p.atoms();
dim = uint32_t(atoms.size());
// bond = std::vector<bool>(dim * (dim - 1), false);
for (auto& atom: atoms)
index[atom.id()] = uint32_t(index.size());
auto bindAtoms = [this](const std::string& a, const std::string& b)
{
uint32_t ixa = index[a];
uint32_t ixb = index[b];
bond.insert(key(ixa, ixb));
};
auto linkAtoms = [this,&bindAtoms](const std::string& a, const std::string& b)
{
bindAtoms(a, b);
link[a].insert(b);
link[b].insert(a);
};
cif::Datablock& db = p.getFile().data();
// collect all compounds first
std::set<std::string> compounds;
for (auto c: db["chem_comp"])
compounds.insert(c["id"].as<std::string>());
// make sure we also have all residues in the polyseq
for (auto m: db["entity_poly_seq"])
{
std::string c = m["mon_id"].as<std::string>();
if (compounds.count(c))
continue;
if (cif::VERBOSE > 1)
std::cerr << "Warning: mon_id " << c << " is missing in the chem_comp category" << std::endl;
compounds.insert(c);
}
cif::Progress progress(compounds.size(), "Creating bond map");
// some helper indices to speed things up a bit
std::map<std::tuple<std::string,int,std::string>,std::string> atomMapByAsymSeqAndAtom;
for (auto& a: p.atoms())
{
auto key = make_tuple(a.labelAsymID(), a.labelSeqID(), a.labelAtomID());
atomMapByAsymSeqAndAtom[key] = a.id();
}
// first link all residues in a polyseq
std::string lastAsymID;
int lastSeqID = 0;
for (auto r: db["pdbx_poly_seq_scheme"])
{
std::string asymID;
int seqID;
cif::tie(asymID, seqID) = r.get("asym_id", "seq_id");
if (asymID != lastAsymID) // first in a new sequece
{
lastAsymID = asymID;
lastSeqID = seqID;
continue;
}
auto c = atomMapByAsymSeqAndAtom[make_tuple(asymID, lastSeqID, "C")];
auto n = atomMapByAsymSeqAndAtom[make_tuple(asymID, seqID, "N")];
if (not (c.empty() or n.empty()))
bindAtoms(c, n);
lastSeqID = seqID;
}
for (auto l: db["struct_conn"])
{
std::string asym1, asym2, atomId1, atomId2;
int seqId1 = 0, seqId2 = 0;
cif::tie(asym1, asym2, atomId1, atomId2, seqId1, seqId2) =
l.get("ptnr1_label_asym_id", "ptnr2_label_asym_id",
"ptnr1_label_atom_id", "ptnr2_label_atom_id",
"ptnr1_label_seq_id", "ptnr2_label_seq_id");
std::string a = atomMapByAsymSeqAndAtom[make_tuple(asym1, seqId1, atomId1)];
std::string b = atomMapByAsymSeqAndAtom[make_tuple(asym2, seqId2, atomId2)];
if (not (a.empty() or b.empty()))
linkAtoms(a, b);
}
// then link all atoms in the compounds
for (auto c: compounds)
{
if (c == "HOH" or c == "H2O" or c == "WAT")
{
if (cif::VERBOSE)
std::cerr << "skipping water in bond map calculation" << std::endl;
continue;
}
auto bonded = [c, &compoundBondInfo](const Atom& a, const Atom& b)
{
auto label_a = a.labelAtomID();
auto label_b = b.labelAtomID();
return compoundBondInfo.bonded(c, label_a, label_b);
};
// loop over poly_seq_scheme
for (auto r: db["pdbx_poly_seq_scheme"].find(cif::Key("mon_id") == c))
{
std::string asymID;
int seqID;
cif::tie(asymID, seqID) = r.get("asym_id", "seq_id");
std::vector<Atom> rAtoms;
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
[&](auto& a) { return a.labelAsymID() == asymID and a.labelSeqID() == seqID; });
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
{
for (uint32_t j = i + 1; j < rAtoms.size(); ++j)
{
if (bonded(rAtoms[i], rAtoms[j]))
bindAtoms(rAtoms[i].id(), rAtoms[j].id());
}
}
}
// loop over pdbx_nonpoly_scheme
for (auto r: db["pdbx_nonpoly_scheme"].find(cif::Key("mon_id") == c))
{
std::string asymID;
cif::tie(asymID) = r.get("asym_id");
std::vector<Atom> rAtoms;
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
[&](auto& a) { return a.labelAsymID() == asymID; });
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
{
for (uint32_t j = i + 1; j < rAtoms.size(); ++j)
{
if (bonded(rAtoms[i], rAtoms[j]))
{
uint32_t ixa = index[rAtoms[i].id()];
uint32_t ixb = index[rAtoms[j].id()];
bond.insert(key(ixa, ixb));
}
}
}
}
// loop over pdbx_branch_scheme
for (auto r: db["pdbx_branch_scheme"].find(cif::Key("mon_id") == c))
{
std::string asymID;
cif::tie(asymID) = r.get("asym_id");
std::vector<Atom> rAtoms;
copy_if(atoms.begin(), atoms.end(), back_inserter(rAtoms),
[&](auto& a) { return a.labelAsymID() == asymID; });
for (uint32_t i = 0; i + 1 < rAtoms.size(); ++i)
{
for (uint32_t j = i + 1; j < rAtoms.size(); ++j)
{
if (bonded(rAtoms[i], rAtoms[j]))
{
uint32_t ixa = index[rAtoms[i].id()];
uint32_t ixb = index[rAtoms[j].id()];
bond.insert(key(ixa, ixb));
}
}
}
}
}
// start by creating an index for single bonds
std::multimap<uint32_t,uint32_t> b1_2;
for (auto& bk: bond)
{
uint32_t a, b;
std::tie(a, b) = dekey(bk);
b1_2.insert({ a, b });
b1_2.insert({ b, a });
}
std::multimap<uint32_t,uint32_t> b1_3;
for (uint32_t i = 0; i < dim; ++i)
{
auto a = b1_2.equal_range(i);
std::vector<uint32_t> s;
for (auto j = a.first; j != a.second; ++j)
s.push_back(j->second);
for (size_t si1 = 0; si1 + 1 < s.size(); ++si1)
{
for (size_t si2 = si1 + 1; si2 < s.size(); ++si2)
{
uint32_t x = s[si1];
uint32_t y = s[si2];
if (isBonded(x, y))
continue;
b1_3.insert({ x, y });
b1_3.insert({ y, x });
}
}
}
for (uint32_t i = 0; i < dim; ++i)
{
auto a1 = b1_2.equal_range(i);
auto a2 = b1_3.equal_range(i);
for (auto ai1 = a1.first; ai1 != a1.second; ++ai1)
{
for (auto ai2 = a2.first; ai2 != a2.second; ++ai2)
{
uint32_t b1 = ai1->second;
uint32_t b2 = ai2->second;
if (isBonded(b1, b2))
continue;
bond_1_4.insert(key(b1, b2));
}
}
}
}
std::vector<std::string> BondMap::linked(const Atom& a) const
{
auto i = link.find(a.id());
std::vector<std::string> result;
if (i != link.end())
result = std::vector<std::string>(i->second.begin(), i->second.end());
return result;
}
std::vector<std::string> BondMap::atomIDsForCompound(const std::string& compoundID)
{
std::vector<std::string> result;
auto* compound = mmcif::CompoundFactory::instance().create(compoundID);
if (compound == nullptr)
throw BondMapException("Missing bond information for compound " + compoundID);
for (auto& compAtom: compound->atoms())
result.push_back(compAtom.id);
return result;
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,351 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <boost/algorithm/string.hpp>
#include "cif++/Cif++.hpp"
#include "cif++/CifParser.hpp"
#include "cif++/CifValidator.hpp"
namespace ba = boost::algorithm;
extern int VERBOSE;
namespace cif
{
ValidationError::ValidationError(const std::string& msg)
: mMsg(msg)
{
}
ValidationError::ValidationError(const std::string& cat, const std::string& item, const std::string& msg)
: mMsg("When validating _" + cat + '.' + item + ": " + msg)
{
}
// --------------------------------------------------------------------
DDL_PrimitiveType mapToPrimitiveType(const std::string& s)
{
DDL_PrimitiveType result;
if (iequals(s, "char"))
result = DDL_PrimitiveType::Char;
else if (iequals(s, "uchar"))
result = DDL_PrimitiveType::UChar;
else if (iequals(s, "numb"))
result = DDL_PrimitiveType::Numb;
else
throw ValidationError("Not a known primitive type");
return result;
}
// --------------------------------------------------------------------
int ValidateType::compare(const char* a, const char* b) const
{
int result = 0;
if (*a == 0)
result = *b == 0 ? 0 : -1;
else if (*b == 0)
result = *a == 0 ? 0 : +1;
else
{
try
{
switch (mPrimitiveType)
{
case DDL_PrimitiveType::Numb:
{
double da = strtod(a, nullptr);
double db = strtod(b, nullptr);
auto d = da - db;
if (std::abs(d) > std::numeric_limits<double>::epsilon())
{
if (d > 0)
result = 1;
else if (d < 0)
result = -1;
}
break;
}
case DDL_PrimitiveType::UChar:
case DDL_PrimitiveType::Char:
{
// CIF is guaranteed to have ascii only, therefore this primitive code will do
// also, we're collapsing spaces
auto ai = a, bi = b;
for (;;)
{
if (*ai == 0)
{
if (*bi != 0)
result = -1;
break;
}
else if (*bi == 0)
{
result = 1;
break;
}
char ca = *ai;
char cb = *bi;
if (mPrimitiveType == DDL_PrimitiveType::UChar)
{
ca = tolower(ca);
cb = tolower(cb);
}
result = ca - cb;
if (result != 0)
break;
if (ca == ' ')
{
while (ai[1] == ' ')
++ai;
while (bi[1] == ' ')
++bi;
}
++ai;
++bi;
}
break;
}
}
}
catch (const std::invalid_argument& ex)
{
result = 1;
}
}
return result;
}
// --------------------------------------------------------------------
//void ValidateItem::addLinked(ValidateItem* parent, const std::string& parentItem, const std::string& childItem)
//{
//// if (mParent != nullptr and VERBOSE)
//// cerr << "replacing parent in " << mCategory->mName << " from " << mParent->mCategory->mName << " to " << parent->mCategory->mName << endl;
//// mParent = parent;
//
// if (mType == nullptr and parent != nullptr)
// mType = parent->mType;
//
// if (parent != nullptr)
// {
// mLinked.push_back({parent, parentItem, childItem});
//
// parent->mChildren.insert(this);
////
//// if (mCategory->mKeys == std::vector<std::string>{mTag})
//// parent->mForeignKeys.insert(this);
// }
//}
void ValidateItem::operator()(std::string value) const
{
if (not value.empty() and value != "?" and value != ".")
{
if (mType != nullptr and not regex_match(value, mType->mRx))
throw ValidationError(mCategory->mName, mTag, "Value '" + value + "' does not match type expression for type " + mType->mName);
if (not mEnums.empty())
{
if (mEnums.count(value) == 0)
throw ValidationError(mCategory->mName, mTag, "Value '" + value + "' is not in the list of allowed values");
}
}
}
// --------------------------------------------------------------------
void ValidateCategory::addItemValidator(ValidateItem&& v)
{
if (v.mMandatory)
mMandatoryFields.insert(v.mTag);
v.mCategory = this;
auto r = mItemValidators.insert(std::move(v));
if (not r.second and VERBOSE >= 4)
std::cout << "Could not add validator for item " << v.mTag << " to category " << mName << std::endl;
}
const ValidateItem* ValidateCategory::getValidatorForItem(std::string tag) const
{
const ValidateItem* result = nullptr;
auto i = mItemValidators.find(ValidateItem{tag});
if (i != mItemValidators.end())
result = &*i;
else if (VERBOSE > 4)
std::cout << "No validator for tag " << tag << std::endl;
return result;
}
// --------------------------------------------------------------------
Validator::Validator()
{
}
Validator::~Validator()
{
}
void Validator::addTypeValidator(ValidateType&& v)
{
auto r = mTypeValidators.insert(std::move(v));
if (not r.second and VERBOSE > 4)
std::cout << "Could not add validator for type " << v.mName << std::endl;
}
const ValidateType* Validator::getValidatorForType(std::string typeCode) const
{
const ValidateType* result = nullptr;
auto i = mTypeValidators.find(ValidateType{ typeCode, DDL_PrimitiveType::Char, boost::regex() });
if (i != mTypeValidators.end())
result = &*i;
else if (VERBOSE > 4)
std::cout << "No validator for type " << typeCode << std::endl;
return result;
}
void Validator::addCategoryValidator(ValidateCategory&& v)
{
auto r = mCategoryValidators.insert(std::move(v));
if (not r.second and VERBOSE > 4)
std::cout << "Could not add validator for category " << v.mName << std::endl;
}
const ValidateCategory* Validator::getValidatorForCategory(std::string category) const
{
const ValidateCategory* result = nullptr;
auto i = mCategoryValidators.find(ValidateCategory{category});
if (i != mCategoryValidators.end())
result = &*i;
else if (VERBOSE > 4)
std::cout << "No validator for category " << category << std::endl;
return result;
}
ValidateItem* Validator::getValidatorForItem(std::string tag) const
{
ValidateItem* result = nullptr;
std::string cat, item;
std::tie(cat, item) = splitTagName(tag);
auto* cv = getValidatorForCategory(cat);
if (cv != nullptr)
result = const_cast<ValidateItem*>(cv->getValidatorForItem(item));
if (result == nullptr and VERBOSE > 4)
std::cout << "No validator for item " << tag << std::endl;
return result;
}
void Validator::addLinkValidator(ValidateLink&& v)
{
assert(v.mParentKeys.size() == v.mChildKeys.size());
if (v.mParentKeys.size() != v.mChildKeys.size())
throw std::runtime_error("unequal number of keys for parent and child in link");
auto pcv = getValidatorForCategory(v.mParentCategory);
auto ccv = getValidatorForCategory(v.mChildCategory);
if (pcv == nullptr)
throw std::runtime_error("unknown parent category " + v.mParentCategory);
if (ccv == nullptr)
throw std::runtime_error("unknown child category " + v.mChildCategory);
for (size_t i = 0; i < v.mParentKeys.size(); ++i)
{
auto piv = pcv->getValidatorForItem(v.mParentKeys[i]);
if (piv == nullptr)
throw std::runtime_error("unknown parent tag _" + v.mParentCategory + '.' + v.mParentKeys[i]);
auto civ = ccv->getValidatorForItem(v.mChildKeys[i]);
if (civ == nullptr)
throw std::runtime_error("unknown child tag _" + v.mChildCategory + '.' + v.mChildKeys[i]);
if (civ->mType == nullptr and piv->mType != nullptr)
const_cast<ValidateItem*>(civ)->mType = piv->mType;
}
mLinkValidators.emplace_back(std::move(v));
}
std::vector<const ValidateLink*> Validator::getLinksForParent(const std::string& category) const
{
std::vector<const ValidateLink*> result;
for (auto& l: mLinkValidators)
{
if (l.mParentCategory == category)
result.push_back(&l);
}
return result;
}
std::vector<const ValidateLink*> Validator::getLinksForChild(const std::string& category) const
{
std::vector<const ValidateLink*> result;
for (auto& l: mLinkValidators)
{
if (l.mChildCategory == category)
result.push_back(&l);
}
return result;
}
void Validator::reportError(const std::string& msg, bool fatal)
{
if (mStrict or fatal)
throw ValidationError(msg);
else if (VERBOSE)
std::cerr << msg << std::endl;
}
}

View File

@@ -1,745 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <map>
#include <mutex>
#include <numeric>
#include <shared_mutex>
#include <boost/algorithm/string.hpp>
#include <filesystem>
#include <fstream>
#include "cif++/Cif++.hpp"
#include "cif++/CifParser.hpp"
#include "cif++/CifUtils.hpp"
#include "cif++/Compound.hpp"
#include "cif++/Point.hpp"
namespace ba = boost::algorithm;
namespace fs = std::filesystem;
namespace mmcif
{
// --------------------------------------------------------------------
std::string to_string(BondType bondType)
{
switch (bondType)
{
case BondType::sing: return "sing";
case BondType::doub: return "doub";
case BondType::trip: return "trip";
case BondType::quad: return "quad";
case BondType::arom: return "arom";
case BondType::poly: return "poly";
case BondType::delo: return "delo";
case BondType::pi: return "pi";
}
throw std::invalid_argument("Invalid bondType");
}
BondType from_string(const std::string &bondType)
{
if (cif::iequals(bondType, "sing"))
return BondType::sing;
if (cif::iequals(bondType, "doub"))
return BondType::doub;
if (cif::iequals(bondType, "trip"))
return BondType::trip;
if (cif::iequals(bondType, "quad"))
return BondType::quad;
if (cif::iequals(bondType, "arom"))
return BondType::arom;
if (cif::iequals(bondType, "poly"))
return BondType::poly;
if (cif::iequals(bondType, "delo"))
return BondType::delo;
if (cif::iequals(bondType, "pi"))
return BondType::pi;
throw std::invalid_argument("Invalid bondType: " + bondType);
}
// --------------------------------------------------------------------
// Compound helper classes
struct CompoundAtomLess
{
bool operator()(const CompoundAtom &a, const CompoundAtom &b) const
{
int d = a.id.compare(b.id);
if (d == 0)
d = a.typeSymbol - b.typeSymbol;
return d < 0;
}
};
struct CompoundBondLess
{
bool operator()(const CompoundBond &a, const CompoundBond &b) const
{
int d = a.atomID[0].compare(b.atomID[0]);
if (d == 0)
d = a.atomID[1].compare(b.atomID[1]);
if (d == 0)
d = static_cast<int>(a.type) - static_cast<int>(b.type);
return d < 0;
}
};
// --------------------------------------------------------------------
// Compound
Compound::Compound(cif::Datablock &db)
{
auto &chemComp = db["chem_comp"];
if (chemComp.size() != 1)
throw std::runtime_error("Invalid compound file, chem_comp should contain a single row");
cif::tie(mID, mName, mType, mFormula, mFormulaWeight, mFormalCharge) =
chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge");
auto &chemCompAtom = db["chem_comp_atom"];
for (auto row : chemCompAtom)
{
CompoundAtom atom;
std::string typeSymbol;
cif::tie(atom.id, typeSymbol, atom.charge, atom.aromatic, atom.leavingAtom, atom.stereoConfig, atom.x, atom.y, atom.z) =
row.get("atom_id", "type_symbol", "charge", "pdbx_aromatic_flag", "pdbx_leaving_atom_flag", "pdbx_stereo_config",
"model_Cartn_x", "model_Cartn_y", "model_Cartn_z");
atom.typeSymbol = AtomTypeTraits(typeSymbol).type();
mAtoms.push_back(std::move(atom));
}
auto &chemCompBond = db["chem_comp_bond"];
for (auto row : chemCompBond)
{
CompoundBond bond;
std::string valueOrder;
cif::tie(bond.atomID[0], bond.atomID[1], valueOrder, bond.aromatic, bond.stereoConfig) = row.get("atom_id_1", "atom_id_2", "value_order", "pdbx_aromatic_flag", "pdbx_stereo_config");
bond.type = from_string(valueOrder);
mBonds.push_back(std::move(bond));
}
}
Compound::Compound(cif::Datablock &db, const std::string &id, const std::string &name, const std::string &type)
: mID(id)
, mName(name)
, mType(type)
{
auto &chemCompAtom = db["chem_comp_atom"];
for (auto row : chemCompAtom)
{
CompoundAtom atom;
std::string typeSymbol;
cif::tie(atom.id, typeSymbol, atom.charge, atom.x, atom.y, atom.z) =
row.get("atom_id", "type_symbol", "charge", "x", "y", "z");
atom.typeSymbol = AtomTypeTraits(typeSymbol).type();
mFormalCharge += atom.charge;
mFormulaWeight += AtomTypeTraits(atom.typeSymbol).weight();
mAtoms.push_back(std::move(atom));
}
auto &chemCompBond = db["chem_comp_bond"];
for (auto row : chemCompBond)
{
CompoundBond bond;
std::string btype;
cif::tie(bond.atomID[0], bond.atomID[1], btype, bond.aromatic) = row.get("atom_id_1", "atom_id_2", "type", "aromatic");
using cif::iequals;
if (iequals(btype, "single"))
bond.type = BondType::sing;
else if (iequals(btype, "double"))
bond.type = BondType::doub;
else if (iequals(btype, "triple"))
bond.type = BondType::trip;
else if (iequals(btype, "deloc") or iequals(btype, "aromat") or iequals(btype, "aromatic"))
bond.type = BondType::delo;
else
{
if (cif::VERBOSE)
std::cerr << "Unimplemented chem_comp_bond.type " << btype << " in " << id << std::endl;
bond.type = BondType::sing;
}
mBonds.push_back(std::move(bond));
}
}
CompoundAtom Compound::getAtomByID(const std::string &atomID) const
{
CompoundAtom result = {};
for (auto &a : mAtoms)
{
if (a.id == atomID)
{
result = a;
break;
}
}
if (result.id != atomID)
throw std::out_of_range("No atom " + atomID + " in Compound " + mID);
return result;
}
bool Compound::atomsBonded(const std::string &atomId_1, const std::string &atomId_2) const
{
auto i = find_if(mBonds.begin(), mBonds.end(),
[&](const CompoundBond &b) {
return (b.atomID[0] == atomId_1 and b.atomID[1] == atomId_2) or (b.atomID[0] == atomId_2 and b.atomID[1] == atomId_1);
});
return i != mBonds.end();
}
// --------------------------------------------------------------------
// a factory class to generate compounds
CIFPP_EXPORT const std::map<std::string, char> kAAMap{
{"ALA", 'A'},
{"ARG", 'R'},
{"ASN", 'N'},
{"ASP", 'D'},
{"CYS", 'C'},
{"GLN", 'Q'},
{"GLU", 'E'},
{"GLY", 'G'},
{"HIS", 'H'},
{"ILE", 'I'},
{"LEU", 'L'},
{"LYS", 'K'},
{"MET", 'M'},
{"PHE", 'F'},
{"PRO", 'P'},
{"SER", 'S'},
{"THR", 'T'},
{"TRP", 'W'},
{"TYR", 'Y'},
{"VAL", 'V'},
{"GLX", 'Z'},
{"ASX", 'B'}};
CIFPP_EXPORT const std::map<std::string, char> kBaseMap{
{"A", 'A'},
{"C", 'C'},
{"G", 'G'},
{"T", 'T'},
{"U", 'U'},
{"DA", 'A'},
{"DC", 'C'},
{"DG", 'G'},
{"DT", 'T'}};
// --------------------------------------------------------------------
class CompoundFactoryImpl : public std::enable_shared_from_this<CompoundFactoryImpl>
{
public:
CompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next);
CompoundFactoryImpl(const std::filesystem::path &file, std::shared_ptr<CompoundFactoryImpl> next);
virtual ~CompoundFactoryImpl()
{
for (auto c: mCompounds)
delete c;
}
Compound *get(std::string id)
{
std::shared_lock lock(mMutex);
ba::to_upper(id);
Compound *result = nullptr;
// walk the list, see if any of us has the compound already
for (auto impl = shared_from_this(); impl; impl = impl->mNext)
{
for (auto cmp : impl->mCompounds)
{
if (cmp->id() == id)
{
result = cmp;
break;
}
}
if (result)
break;
}
if (result == nullptr and mMissing.count(id) == 0)
{
for (auto impl = shared_from_this(); impl; impl = impl->mNext)
{
result = impl->create(id);
if (result != nullptr)
break;
}
if (result == nullptr)
mMissing.insert(id);
}
return result;
}
std::shared_ptr<CompoundFactoryImpl> next() const
{
return mNext;
}
bool isKnownPeptide(const std::string &resName)
{
return mKnownPeptides.count(resName) or
(mNext and mNext->isKnownPeptide(resName));
}
bool isKnownBase(const std::string &resName)
{
return mKnownBases.count(resName) or
(mNext and mNext->isKnownBase(resName));
}
protected:
virtual Compound *create(const std::string &id)
{
// For the base class we assume every compound is preloaded
return nullptr;
}
std::shared_timed_mutex mMutex;
std::vector<Compound *> mCompounds;
std::set<std::string> mKnownPeptides;
std::set<std::string> mKnownBases;
std::set<std::string> mMissing;
std::shared_ptr<CompoundFactoryImpl> mNext;
};
// --------------------------------------------------------------------
CompoundFactoryImpl::CompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next)
: mNext(next)
{
for (const auto &[key, value] : kAAMap)
mKnownPeptides.insert(key);
for (const auto &[key, value] : kBaseMap)
mKnownBases.insert(key);
}
CompoundFactoryImpl::CompoundFactoryImpl(const std::filesystem::path &file, std::shared_ptr<CompoundFactoryImpl> next)
: mNext(next)
{
cif::File cifFile(file);
auto compList = cifFile.get("comp_list");
if (compList) // So this is a CCP4 restraints file, special handling
{
auto &chemComp = (*compList)["chem_comp"];
for (const auto &[id, name, group] : chemComp.rows<std::string, std::string, std::string>("id", "name", "group"))
{
std::string type;
// known groups are (counted from ccp4 monomer dictionary)
// D-pyranose
// DNA
// L-PEPTIDE LINKING
// L-SACCHARIDE
// L-peptide
// L-pyranose
// M-peptide
// NON-POLYMER
// P-peptide
// RNA
// furanose
// non-polymer
// non_polymer
// peptide
// pyranose
// saccharide
if (cif::iequals(id, "gly"))
type = "peptide linking";
else if (cif::iequals(group, "l-peptide") or cif::iequals(group, "L-peptide linking") or cif::iequals(group, "peptide"))
type = "L-peptide linking";
else if (cif::iequals(group, "DNA"))
type = "DNA linking";
else if (cif::iequals(group, "RNA"))
type = "RNA linking";
else
type = "non-polymer";
auto &db = cifFile["comp_" + id];
mCompounds.push_back(new Compound(db, id, name, type));
}
}
else
{
// A CCD components file, validate it first
cifFile.loadDictionary("mmcif_pdbx_v50");
if (not cifFile.isValid())
throw std::runtime_error("Invalid compound file");
for (auto &db : cifFile)
mCompounds.push_back(new Compound(db));
}
}
// --------------------------------------------------------------------
// Version for the default compounds, based on the cached components.cif file from CCD
class CCDCompoundFactoryImpl : public CompoundFactoryImpl
{
public:
CCDCompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next, const fs::path& file)
: CompoundFactoryImpl(next)
, mCompoundsFile(file)
{
}
CCDCompoundFactoryImpl(std::shared_ptr<CompoundFactoryImpl> next)
: CompoundFactoryImpl(next)
{
}
Compound *create(const std::string &id) override;
cif::DatablockIndex mIndex;
fs::path mCompoundsFile;
};
Compound *CCDCompoundFactoryImpl::create(const std::string &id)
{
Compound *result = nullptr;
std::unique_ptr<std::istream> ccd;
if (mCompoundsFile.empty())
{
ccd = cif::loadResource("components.cif");
if (not ccd)
throw std::runtime_error("Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data.");
}
else
ccd.reset(new std::ifstream(mCompoundsFile));
cif::File file;
if (mIndex.empty())
{
if (cif::VERBOSE > 1)
{
std::cout << "Creating component index "
<< "...";
std::cout.flush();
}
cif::Parser parser(*ccd, file, false);
mIndex = parser.indexDatablocks();
if (cif::VERBOSE > 1)
std::cout << " done" << std::endl;
// reload the resource, perhaps this should be improved...
if (mCompoundsFile.empty())
{
ccd = cif::loadResource("components.cif");
if (not ccd)
throw std::runtime_error("Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data.");
}
else
ccd.reset(new std::ifstream(mCompoundsFile));
}
if (cif::VERBOSE > 1)
{
std::cout << "Loading component " << id << "...";
std::cout.flush();
}
cif::Parser parser(*ccd, file, false);
parser.parseSingleDatablock(id, mIndex);
if (cif::VERBOSE > 1)
std::cout << " done" << std::endl;
if (not file.empty())
{
auto &db = file.firstDatablock();
if (db.getName() == id)
{
result = new Compound(db);
std::shared_lock lock(mMutex);
mCompounds.push_back(result);
}
}
if (result == nullptr and cif::VERBOSE)
std::cerr << "Could not locate compound " << id << " in the CCD components file" << std::endl;
return result;
}
// --------------------------------------------------------------------
// Version for the default compounds, based on the data found in CCP4's monomers lib
class CCP4CompoundFactoryImpl : public CompoundFactoryImpl
{
public:
CCP4CompoundFactoryImpl(const fs::path &clibd_mon, std::shared_ptr<CompoundFactoryImpl> next = nullptr);
Compound *create(const std::string &id) override;
private:
cif::File mFile;
fs::path mCLIBD_MON;
};
CCP4CompoundFactoryImpl::CCP4CompoundFactoryImpl(const fs::path &clibd_mon, std::shared_ptr<CompoundFactoryImpl> next)
: CompoundFactoryImpl(next)
, mFile((clibd_mon / "list" / "mon_lib_list.cif").string())
, mCLIBD_MON(clibd_mon)
{
const std::regex peptideRx("(?:[lmp]-)?peptide", std::regex::icase);
auto &chemComps = mFile["comp_list"]["chem_comp"];
for (const auto &[group, threeLetterCode] : chemComps.rows<std::string, std::string>("group", "three_letter_code"))
{
if (std::regex_match(group, peptideRx))
mKnownPeptides.insert(threeLetterCode);
else if (ba::iequals(group, "DNA") or ba::iequals(group, "RNA"))
mKnownBases.insert(threeLetterCode);
}
}
Compound *CCP4CompoundFactoryImpl::create(const std::string &id)
{
Compound *result = nullptr;
auto &cat = mFile["comp_list"]["chem_comp"];
auto rs = cat.find(cif::Key("three_letter_code") == id);
if (rs.size() == 1)
{
auto row = rs.front();
std::string name, group;
uint32_t numberAtomsAll, numberAtomsNh;
cif::tie(name, group, numberAtomsAll, numberAtomsNh) =
row.get("name", "group", "number_atoms_all", "number_atoms_nh");
fs::path resFile = mCLIBD_MON / ba::to_lower_copy(id.substr(0, 1)) / (id + ".cif");
if (not fs::exists(resFile) and (id == "COM" or id == "CON" or "PRN")) // seriously...
resFile = mCLIBD_MON / ba::to_lower_copy(id.substr(0, 1)) / (id + '_' + id + ".cif");
if (fs::exists(resFile))
{
cif::File cf(resFile.string());
// locate the datablock
auto &db = cf["comp_" + id];
std::string type;
// known groups are (counted from ccp4 monomer dictionary)
// D-pyranose
// DNA
// L-PEPTIDE LINKING
// L-SACCHARIDE
// L-peptide
// L-pyranose
// M-peptide
// NON-POLYMER
// P-peptide
// RNA
// furanose
// non-polymer
// non_polymer
// peptide
// pyranose
// saccharide
if (cif::iequals(id, "gly"))
type = "peptide linking";
else if (cif::iequals(group, "l-peptide") or cif::iequals(group, "L-peptide linking") or cif::iequals(group, "peptide"))
type = "L-peptide linking";
else if (cif::iequals(group, "DNA"))
type = "DNA linking";
else if (cif::iequals(group, "RNA"))
type = "RNA linking";
else
type = "non-polymer";
mCompounds.push_back(new Compound(db, id, name, type));
result = mCompounds.back();
}
}
return result;
}
// --------------------------------------------------------------------
std::unique_ptr<CompoundFactory> CompoundFactory::sInstance;
thread_local std::unique_ptr<CompoundFactory> CompoundFactory::tlInstance;
bool CompoundFactory::sUseThreadLocalInstance;
void CompoundFactory::init(bool useThreadLocalInstanceOnly)
{
sUseThreadLocalInstance = useThreadLocalInstanceOnly;
}
CompoundFactory::CompoundFactory()
: mImpl(nullptr)
{
const char *clibd_mon = getenv("CLIBD_MON");
if (clibd_mon != nullptr and fs::is_directory(clibd_mon))
mImpl.reset(new CCP4CompoundFactoryImpl(clibd_mon));
else if (cif::VERBOSE)
std::cerr << "CCP4 monomers library not found, CLIBD_MON is not defined" << std::endl;
auto ccd = cif::loadResource("components.cif");
if (ccd)
mImpl.reset(new CCDCompoundFactoryImpl(mImpl));
else if (cif::VERBOSE)
std::cerr << "CCD components.cif file was not found" << std::endl;
}
CompoundFactory::~CompoundFactory()
{
}
CompoundFactory &CompoundFactory::instance()
{
if (sUseThreadLocalInstance)
{
if (not tlInstance)
tlInstance.reset(new CompoundFactory());
return *tlInstance;
}
else
{
if (not sInstance)
sInstance.reset(new CompoundFactory());
return *sInstance;
}
}
void CompoundFactory::clear()
{
if (sUseThreadLocalInstance)
tlInstance.reset(nullptr);
else
sInstance.reset();
}
void CompoundFactory::setDefaultDictionary(const std::filesystem::path &inDictFile)
{
if (not fs::exists(inDictFile))
throw std::runtime_error("file not found: " + inDictFile.string());
try
{
mImpl.reset(new CCDCompoundFactoryImpl(mImpl, inDictFile));
}
catch (const std::exception &)
{
std::cerr << "Error loading dictionary " << inDictFile << std::endl;
throw;
}
}
void CompoundFactory::pushDictionary(const std::filesystem::path &inDictFile)
{
if (not fs::exists(inDictFile))
throw std::runtime_error("file not found: " + inDictFile.string());
// ifstream file(inDictFile);
// if (not file.is_open())
// throw std::runtime_error("Could not open peptide list " + inDictFile);
try
{
mImpl.reset(new CompoundFactoryImpl(inDictFile, mImpl));
}
catch (const std::exception &)
{
std::cerr << "Error loading dictionary " << inDictFile << std::endl;
throw;
}
}
void CompoundFactory::popDictionary()
{
if (mImpl)
mImpl = mImpl->next();
}
const Compound *CompoundFactory::create(std::string id)
{
// static bool warned = false;
// if (mImpl and warned == false)
// {
// std::cerr << "Warning: no compound information library was found, resulting data may be incorrect or incomplete" << std::endl;
// warned = true;
// }
return mImpl ? mImpl->get(id) : nullptr;
}
bool CompoundFactory::isKnownPeptide(const std::string &resName) const
{
return mImpl ? mImpl->isKnownPeptide(resName) : kAAMap.count(resName) > 0;
}
bool CompoundFactory::isKnownBase(const std::string &resName) const
{
return mImpl ? mImpl->isKnownBase(resName) : kBaseMap.count(resName) > 0;
}
} // namespace mmcif

View File

@@ -1,11 +0,0 @@
/* Define to the name of this package. */
#cmakedefine PACKAGE_NAME "@PACKAGE_NAME@"
/* Define to the version of this package. */
#cmakedefine PACKAGE_VERSION "@PACKAGE_VERSION@"
/* Define the complete package string */
#cmakedefine PACKAGE_STRING "@PACKAGE_STRING@"
/* Using resources? */
#cmakedefine USE_RSRC @USE_RSRC@

View File

@@ -1,113 +0,0 @@
/* src/Config.hpp.in. Generated from configure.ac by autoheader. */
/* define if the Boost library is available */
#undef HAVE_BOOST
/* define if the Boost::Date_Time library is available */
#undef HAVE_BOOST_DATE_TIME
/* define if the Boost::IOStreams library is available */
#undef HAVE_BOOST_IOSTREAMS
/* define if the Boost::Regex library is available */
#undef HAVE_BOOST_REGEX
/* define if the compiler supports basic C++17 syntax */
#undef HAVE_CXX17
/* Define to 1 if you have the <dlfcn.h> header file. */
#undef HAVE_DLFCN_H
/* Define to 1 if you have the `floor' function. */
#undef HAVE_FLOOR
/* Define to 1 if you have the <inttypes.h> header file. */
#undef HAVE_INTTYPES_H
/* Define to 1 if you have the <memory.h> header file. */
#undef HAVE_MEMORY_H
/* Define to 1 if you have the `pow' function. */
#undef HAVE_POW
/* Define if you have POSIX threads libraries and header files. */
#undef HAVE_PTHREAD
/* Have PTHREAD_PRIO_INHERIT. */
#undef HAVE_PTHREAD_PRIO_INHERIT
/* Define to 1 if the system has the type `ptrdiff_t'. */
#undef HAVE_PTRDIFF_T
/* Define to 1 if you have the `rint' function. */
#undef HAVE_RINT
/* Define to 1 if you have the `sqrt' function. */
#undef HAVE_SQRT
/* Define to 1 if you have the <stdint.h> header file. */
#undef HAVE_STDINT_H
/* Define to 1 if you have the <stdlib.h> header file. */
#undef HAVE_STDLIB_H
/* Define to 1 if you have the `strchr' function. */
#undef HAVE_STRCHR
/* Define to 1 if you have the `strerror' function. */
#undef HAVE_STRERROR
/* Define to 1 if you have the <strings.h> header file. */
#undef HAVE_STRINGS_H
/* Define to 1 if you have the <string.h> header file. */
#undef HAVE_STRING_H
/* Define to 1 if you have the <sys/ioctl.h> header file. */
#undef HAVE_SYS_IOCTL_H
/* Define to 1 if you have the <sys/stat.h> header file. */
#undef HAVE_SYS_STAT_H
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
/* Define to 1 if you have the <termios.h> header file. */
#undef HAVE_TERMIOS_H
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
/* Define to 1 if the system has the type `_Bool'. */
#undef HAVE__BOOL
/* Define to the sub-directory where libtool stores uninstalled libraries. */
#undef LT_OBJDIR
/* Define to the address where bug reports for this package should be sent. */
#undef PACKAGE_BUGREPORT
/* Define to the full name of this package. */
#undef PACKAGE_NAME
/* Define to the full name and version of this package. */
#undef PACKAGE_STRING
/* Define to the one symbol short name of this package. */
#undef PACKAGE_TARNAME
/* Define to the home page for this package. */
#undef PACKAGE_URL
/* Define to the version of this package. */
#undef PACKAGE_VERSION
/* Define to necessary symbol if this constant uses a non-standard name on
your system. */
#undef PTHREAD_CREATE_JOINABLE
/* Define to 1 if you have the ANSI C header files. */
#undef STDC_HEADERS
/* Use mrc to store resources */
#undef USE_RSRC

File diff suppressed because it is too large Load Diff

View File

@@ -1,306 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <random>
#include <valarray>
#include "cif++/Point.hpp"
#include "cif++/Matrix.hpp"
namespace mmcif
{
// --------------------------------------------------------------------
Quaternion Normalize(Quaternion q)
{
std::valarray<double> t(4);
t[0] = q.R_component_1();
t[1] = q.R_component_2();
t[2] = q.R_component_3();
t[3] = q.R_component_4();
t *= t;
double length = std::sqrt(t.sum());
if (length > 0.001)
q /= static_cast<Quaternion::value_type>(length);
else
q = Quaternion(1, 0, 0, 0);
return q;
}
// --------------------------------------------------------------------
std::tuple<double,Point> QuaternionToAngleAxis(Quaternion q)
{
if (q.R_component_1() > 1)
q = Normalize(q);
// angle:
double angle = 2 * acos(q.R_component_1());
angle = angle * 180 / kPI;
// axis:
float s = std::sqrt(1 - q.R_component_1() * q.R_component_1());
if (s < 0.001)
s = 1;
Point axis(q.R_component_2() / s, q.R_component_3() / s, q.R_component_4() / s);
return std::make_tuple(angle, axis);
}
Point CenterPoints(std::vector<Point>& Points)
{
Point t;
for (Point& pt : Points)
{
t.mX += pt.mX;
t.mY += pt.mY;
t.mZ += pt.mZ;
}
t.mX /= Points.size();
t.mY /= Points.size();
t.mZ /= Points.size();
for (Point& pt : Points)
{
pt.mX -= t.mX;
pt.mY -= t.mY;
pt.mZ -= t.mZ;
}
return t;
}
Point Centroid(std::vector<Point>& Points)
{
Point result;
for (Point& pt : Points)
result += pt;
result /= static_cast<float>(Points.size());
return result;
}
double RMSd(const std::vector<Point>& a, const std::vector<Point>& b)
{
double sum = 0;
for (uint32_t i = 0; i < a.size(); ++i)
{
std::valarray<double> d(3);
d[0] = b[i].mX - a[i].mX;
d[1] = b[i].mY - a[i].mY;
d[2] = b[i].mZ - a[i].mZ;
d *= d;
sum += d.sum();
}
return std::sqrt(sum / a.size());
}
// The next function returns the largest solution for a quartic equation
// based on Ferrari's algorithm.
// A depressed quartic is of the form:
//
// x^4 + ax^2 + bx + c = 0
//
// (since I'm too lazy to find out a better way, I've implemented the
// routine using complex values to avoid nan's as a result of taking
// sqrt of a negative number)
double LargestDepressedQuarticSolution(double a, double b, double c)
{
std::complex<double> P = - (a * a) / 12 - c;
std::complex<double> Q = - (a * a * a) / 108 + (a * c) / 3 - (b * b) / 8;
std::complex<double> R = - Q / 2.0 + std::sqrt((Q * Q) / 4.0 + (P * P * P) / 27.0);
std::complex<double> U = std::pow(R, 1 / 3.0);
std::complex<double> y;
if (U == 0.0)
y = -5.0 * a / 6.0 + U - std::pow(Q, 1.0 / 3.0);
else
y = -5.0 * a / 6.0 + U - P / (3.0 * U);
std::complex<double> W = std::sqrt(a + 2.0 * y);
// And to get the final result:
// result = (±W + std::sqrt(-(3 * alpha + 2 * y ± 2 * beta / W))) / 2;
// We want the largest result, so:
std::valarray<double> t(4);
t[0] = (( W + std::sqrt(-(3.0 * a + 2.0 * y + 2.0 * b / W))) / 2.0).real();
t[1] = (( W + std::sqrt(-(3.0 * a + 2.0 * y - 2.0 * b / W))) / 2.0).real();
t[2] = ((-W + std::sqrt(-(3.0 * a + 2.0 * y + 2.0 * b / W))) / 2.0).real();
t[3] = ((-W + std::sqrt(-(3.0 * a + 2.0 * y - 2.0 * b / W))) / 2.0).real();
return t.max();
}
Quaternion AlignPoints(const std::vector<Point>& pa, const std::vector<Point>& pb)
{
// First calculate M, a 3x3 Matrix containing the sums of products of the coordinates of A and B
Matrix<double> M(3, 3, 0);
for (uint32_t i = 0; i < pa.size(); ++i)
{
const Point& a = pa[i];
const Point& b = pb[i];
M(0, 0) += a.mX * b.mX; M(0, 1) += a.mX * b.mY; M(0, 2) += a.mX * b.mZ;
M(1, 0) += a.mY * b.mX; M(1, 1) += a.mY * b.mY; M(1, 2) += a.mY * b.mZ;
M(2, 0) += a.mZ * b.mX; M(2, 1) += a.mZ * b.mY; M(2, 2) += a.mZ * b.mZ;
}
// Now calculate N, a symmetric 4x4 Matrix
SymmetricMatrix<double> N(4);
N(0, 0) = M(0, 0) + M(1, 1) + M(2, 2);
N(0, 1) = M(1, 2) - M(2, 1);
N(0, 2) = M(2, 0) - M(0, 2);
N(0, 3) = M(0, 1) - M(1, 0);
N(1, 1) = M(0, 0) - M(1, 1) - M(2, 2);
N(1, 2) = M(0, 1) + M(1, 0);
N(1, 3) = M(0, 2) + M(2, 0);
N(2, 2) = -M(0, 0) + M(1, 1) - M(2, 2);
N(2, 3) = M(1, 2) + M(2, 1);
N(3, 3) = -M(0, 0) - M(1, 1) + M(2, 2);
// det(N - λI) = 0
// find the largest λ (λm)
//
// Aλ4 + Bλ3 + Cλ2 + Dλ + E = 0
// A = 1
// B = 0
// and so this is a so-called depressed quartic
// solve it using Ferrari's algorithm
double C = -2 * (
M(0, 0) * M(0, 0) + M(0, 1) * M(0, 1) + M(0, 2) * M(0, 2) +
M(1, 0) * M(1, 0) + M(1, 1) * M(1, 1) + M(1, 2) * M(1, 2) +
M(2, 0) * M(2, 0) + M(2, 1) * M(2, 1) + M(2, 2) * M(2, 2));
double D = 8 * (M(0, 0) * M(1, 2) * M(2, 1) +
M(1, 1) * M(2, 0) * M(0, 2) +
M(2, 2) * M(0, 1) * M(1, 0)) -
8 * (M(0, 0) * M(1, 1) * M(2, 2) +
M(1, 2) * M(2, 0) * M(0, 1) +
M(2, 1) * M(1, 0) * M(0, 2));
double E =
(N(0,0) * N(1,1) - N(0,1) * N(0,1)) * (N(2,2) * N(3,3) - N(2,3) * N(2,3)) +
(N(0,1) * N(0,2) - N(0,0) * N(2,1)) * (N(2,1) * N(3,3) - N(2,3) * N(1,3)) +
(N(0,0) * N(1,3) - N(0,1) * N(0,3)) * (N(2,1) * N(2,3) - N(2,2) * N(1,3)) +
(N(0,1) * N(2,1) - N(1,1) * N(0,2)) * (N(0,2) * N(3,3) - N(2,3) * N(0,3)) +
(N(1,1) * N(0,3) - N(0,1) * N(1,3)) * (N(0,2) * N(2,3) - N(2,2) * N(0,3)) +
(N(0,2) * N(1,3) - N(2,1) * N(0,3)) * (N(0,2) * N(1,3) - N(2,1) * N(0,3));
// solve quartic
double lm = LargestDepressedQuarticSolution(C, D, E);
// calculate t = (N - λI)
Matrix<double> li = IdentityMatrix<double>(4) * lm;
Matrix<double> t = N - li;
// calculate a Matrix of cofactors for t
Matrix<double> cf(4, 4);
const uint32_t ixs[4][3] =
{
{ 1, 2, 3 },
{ 0, 2, 3 },
{ 0, 1, 3 },
{ 0, 1, 2 }
};
uint32_t maxR = 0;
for (uint32_t r = 0; r < 4; ++r)
{
const uint32_t* ir = ixs[r];
for (uint32_t c = 0; c < 4; ++c)
{
const uint32_t* ic = ixs[c];
cf(r, c) =
t(ir[0], ic[0]) * t(ir[1], ic[1]) * t(ir[2], ic[2]) +
t(ir[0], ic[1]) * t(ir[1], ic[2]) * t(ir[2], ic[0]) +
t(ir[0], ic[2]) * t(ir[1], ic[0]) * t(ir[2], ic[1]) -
t(ir[0], ic[2]) * t(ir[1], ic[1]) * t(ir[2], ic[0]) -
t(ir[0], ic[1]) * t(ir[1], ic[0]) * t(ir[2], ic[2]) -
t(ir[0], ic[0]) * t(ir[1], ic[2]) * t(ir[2], ic[1]);
}
if (r > maxR and cf(r, 0) > cf(maxR, 0))
maxR = r;
}
// NOTE the negation of the y here, why? Maybe I swapped r/c above?
Quaternion q(cf(maxR, 0), cf(maxR, 1), -cf(maxR, 2), cf(maxR, 3));
q = Normalize(q);
return q;
}
// --------------------------------------------------------------------
Point Nudge(Point p, float offset)
{
static std::random_device rd;
static std::mt19937_64 rng(rd());
std::uniform_real_distribution<> randomAngle(0, 2 * kPI);
std::normal_distribution<> randomOffset(0, offset);
float theta = static_cast<float>(randomAngle(rng));
float phi1 = static_cast<float>(randomAngle(rng) - kPI);
float phi2 = static_cast<float>(randomAngle(rng) - kPI);
Quaternion q = boost::math::spherical(1.0f, theta, phi1, phi2);
Point r{ 0, 0, 1 };
r.rotate(q);
r *= static_cast<float>(randomOffset(rng));
return p + r;
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,93 +0,0 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <atomic>
#include <mutex>
#include "cif++/Symmetry.hpp"
#include "cif++/CifUtils.hpp"
#include "SymOpTable_data.hpp"
namespace mmcif
{
// --------------------------------------------------------------------
// Unfortunately, clipper has a different numbering scheme than PDB
// for rotation numbers. So we created a table to map those.
// Perhaps a bit over the top, but hey....
// --------------------------------------------------------------------
int GetSpacegroupNumber(std::string spacegroup)
{
if (spacegroup == "P 21 21 2 A")
spacegroup = "P 21 21 2 (a)";
else if (spacegroup.empty())
throw std::runtime_error("No spacegroup, cannot continue");
int result = 0;
const size_t N = kNrOfSpaceGroups;
int32_t L = 0, R = static_cast<int32_t>(N - 1);
while (L <= R)
{
int32_t i = (L + R) / 2;
int d = spacegroup.compare(kSpaceGroups[i].name);
if (d > 0)
L = i + 1;
else if (d < 0)
R = i - 1;
else
{
result = kSpaceGroups[i].nr;
break;
}
}
// not found, see if we can find a match based on xHM name
if (result == 0)
{
for (size_t i = 0; i < kNrOfSpaceGroups; ++i)
{
auto& sp = kSpaceGroups[i];
if (sp.xHM == spacegroup)
{
result = sp.nr;
break;
}
}
}
if (result == 0)
throw std::runtime_error("Spacegroup name " + spacegroup + " was not found in table");
return result;
}
}

File diff suppressed because it is too large Load Diff

881
src/atom_type.cpp Normal file
View File

@@ -0,0 +1,881 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// NOLINTBEGIN(modernize-use-std-numbers)
#include "cif++.hpp"
#include <cassert>
#include <cstdint>
#include <iostream>
#include <stdexcept>
#include <string>
namespace cif
{
namespace data
{
const atom_type_info kKnownAtoms[] = // NOLINT(bugprone-throwing-static-initialization,cert-err58-cpp)
{
{ Nn, "Unknown", "Nn", 0, false, { kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // 0 Nn Unknown
{ H, "Hydrogen", "H", 1.008f, false, { 53, 25, 37, 32, kNA, kNA, 120 } }, // 1 H Hydro­gen
{ He, "Helium", "He", 4.0026f, false, { 31, kNA, 32, 46, kNA, kNA, 140 } }, // 2 He He­lium
{ Li, "Lithium", "Li", 6.94f, true, { 167, 145, 134, 133, 124, kNA, 182 } }, // 3 Li Lith­ium
{ Be, "Beryllium", "Be", 9.0122f, true, { 112, 105, 90, 102, 90, 85, kNA } }, // 4 Be Beryl­lium
{ B, "Boron", "B", 10.81f, true, { 87, 85, 82, 85, 78, 73, kNA } }, // 5 B Boron
{ C, "Carbon", "C", 12.011f, false, { 67, 70, 77, 75, 67, 60, 170 } }, // 6 C Carbon
{ N, "Nitrogen", "N", 14.007f, false, { 56, 65, 75, 71, 60, 54, 155 } }, // 7 N Nitro­gen
{ O, "Oxygen", "O", 15.999f, false, { 48, 60, 73, 63, 57, 53, 152 } }, // 8 O Oxy­gen
{ F, "Fluorine", "F", 18.998f, false, { 42, 50, 71, 64, 59, 53, 147 } }, // 9 F Fluor­ine
{ Ne, "Neon", "Ne", 20.180f, false, { 38, kNA, 69, 67, 96, kNA, 154 } }, // 10 Ne Neon
{ Na, "Sodium", "Na", 22.990f, true, { 190, 180, 154, 155, 160, kNA, 227 } }, // 11 Na So­dium
{ Mg, "Magnesium", "Mg", 24.305f, true, { 145, 150, 130, 139, 132, 127, 173 } }, // 12 Mg Magne­sium
{ Al, "Aluminium", "Al", 26.982f, true, { 118, 125, 118, 126, 113, 111, kNA } }, // 13 Al Alumin­ium
{ Si, "Silicon", "Si", 28.085f, true, { 111, 110, 111, 116, 107, 102, 210 } }, // 14 Si Sili­con
{ P, "Phosphorus", "P", 30.974f, false, { 98, 100, 106, 111, 102, 94, 180 } }, // 15 P Phos­phorus
{ S, "Sulfur", "S", 32.06f, false, { 88, 100, 102, 103, 94, 95, 180 } }, // 16 S Sulfur
{ Cl, "Chlorine", "Cl", 35.45f, false, { 79, 100, 99, 99, 95, 93, 175 } }, // 17 Cl Chlor­ine
{ Ar, "Argon", "Ar", 39.948f, false, { 71, kNA, 97, 96, 107, 96, 188 } }, // 18 Ar Argon
{ K, "Potassium", "K", 39.098f, true, { 243, 220, 196, 196, 193, kNA, 275 } }, // 19 K Potas­sium
{ Ca, "Calcium", "Ca", 40.078f, true, { 194, 180, 174, 171, 147, 133, kNA } }, // 20 Ca Cal­cium
{ Sc, "Scandium", "Sc", 44.956f, true, { 184, 160, 144, 148, 116, 114, kNA } }, // 21 Sc Scan­dium
{ Ti, "Titanium", "Ti", 47.867f, true, { 176, 140, 136, 136, 117, 108, kNA } }, // 22 Ti Tita­nium
{ V, "Vanadium", "V", 50.942f, true, { 171, 135, 125, 134, 112, 106, kNA } }, // 23 V Vana­dium
{ Cr, "Chromium", "Cr", 51.996f, true, { 166, 140, 127, 122, 111, 103, kNA } }, // 24 Cr Chrom­ium
{ Mn, "Manganese", "Mn", 54.938f, true, { 161, 140, 139, 119, 105, 103, kNA } }, // 25 Mn Manga­nese
{ Fe, "Iron", "Fe", 55.845f, true, { 156, 140, 125, 116, 109, 102, kNA } }, // 26 Fe Iron
{ Co, "Cobalt", "Co", 58.933f, true, { 152, 135, 126, 111, 103, 96, kNA } }, // 27 Co Cobalt
{ Ni, "Nickel", "Ni", 58.693f, true, { 149, 135, 121, 110, 101, 101, 163 } }, // 28 Ni Nickel
{ Cu, "Copper", "Cu", 63.546f, true, { 145, 135, 138, 112, 115, 120, 140 } }, // 29 Cu Copper
{ Zn, "Zinc", "Zn", 65.38f, true, { 142, 135, 131, 118, 120, kNA, 139 } }, // 30 Zn Zinc
{ Ga, "Gallium", "Ga", 69.723f, true, { 136, 130, 126, 124, 117, 121, 187 } }, // 31 Ga Gallium
{ Ge, "Germanium", "Ge", 72.630f, true, { 125, 125, 122, 121, 111, 114, kNA } }, // 32 Ge Germa­nium
{ As, "Arsenic", "As", 74.922f, true, { 114, 115, 119, 121, 114, 106, 185 } }, // 33 As Arsenic
{ Se, "Selenium", "Se", 78.971f, false, { 103, 115, 116, 116, 107, 107, 190 } }, // 34 Se Sele­nium
{ Br, "Bromine", "Br", 79.904f, false, { 94, 115, 114, 114, 109, 110, 185 } }, // 35 Br Bromine
{ Kr, "Krypton", "Kr", 83.798f, false, { 88, kNA, 110, 117, 121, 108, 202 } }, // 36 Kr Kryp­ton
{ Rb, "Rubidium", "Rb", 85.468f, true, { 265, 235, 211, 210, 202, kNA, kNA } }, // 37 Rb Rubid­ium
{ Sr, "Strontium", "Sr", 87.62f, true, { 219, 200, 192, 185, 157, 139, kNA } }, // 38 Sr Stront­ium
{ Y, "Yttrium", "Y", 88.906f, true, { 212, 180, 162, 163, 130, 124, kNA } }, // 39 Y Yttrium
{ Zr, "Zirconium", "Zr", 91.224f, true, { 206, 155, 148, 154, 127, 121, kNA } }, // 40 Zr Zirco­nium
{ Nb, "Niobium", "Nb", 92.906f, true, { 198, 145, 137, 147, 125, 116, kNA } }, // 41 Nb Nio­bium
{ Mo, "Molybdenum", "Mo", 95.95f, true, { 190, 145, 145, 138, 121, 113, kNA } }, // 42 Mo Molyb­denum
{ Tc, "Technetium", "Tc", 98, true, { 183, 135, 156, 128, 120, 110, kNA } }, // 43 Tc Tech­netium
{ Ru, "Ruthenium", "Ru", 101.07f, true, { 178, 130, 126, 125, 114, 103, kNA } }, // 44 Ru Ruthe­nium
{ Rh, "Rhodium", "Rh", 102.91f, true, { 173, 135, 135, 125, 110, 106, kNA } }, // 45 Rh Rho­dium
{ Pd, "Palladium", "Pd", 106.42f, true, { 169, 140, 131, 120, 117, 112, 163 } }, // 46 Pd Pallad­ium
{ Ag, "Silver", "Ag", 107.87f, true, { 165, 160, 153, 128, 139, 137, 172 } }, // 47 Ag Silver
{ Cd, "Cadmium", "Cd", 112.41f, true, { 161, 155, 148, 136, 144, kNA, 158 } }, // 48 Cd Cad­mium
{ In, "Indium", "In", 114.82f, true, { 156, 155, 144, 142, 136, 146, 193 } }, // 49 In Indium
{ Sn, "Tin", "Sn", 118.71f, true, { 145, 145, 141, 140, 130, 132, 217 } }, // 50 Sn Tin
{ Sb, "Antimony", "Sb", 121.76f, false, { 133, 145, 138, 140, 133, 127, kNA } }, // 51 Sb Anti­mony
{ Te, "Tellurium", "Te", 127.60f, false, { 123, 140, 135, 136, 128, 121, 206 } }, // 52 Te Tellurium
{ I, "Iodine", "I", 126.90f, false, { 115, 140, 133, 133, 129, 125, 198 } }, // 53 I Iodine
{ Xe, "Xenon", "Xe", 131.29f, false, { 108, kNA, 130, 131, 135, 122, 216 } }, // 54 Xe Xenon
{ Cs, "Caesium", "Cs", 132.91f, true, { 298, 260, 225, 232, 209, kNA, kNA } }, // 55 Cs Cae­sium
{ Ba, "Barium", "Ba", 137.33f, true, { 253, 215, 198, 196, 161, 149, kNA } }, // 56 Ba Ba­rium
{ La, "Lanthanum", "La", 138.91f, true, { kNA, 195, 169, 180, 139, 139, kNA } }, // 57 La Lan­thanum
{ Ce, "Cerium", "Ce", 140.12f, true, { kNA, 185, kNA, 163, 137, 131, kNA } }, // 58 Ce Cerium
{ Pr, "Praseodymium", "Pr", 140.91f, true, { 247, 185, kNA, 176, 138, 128, kNA } }, // 59 Pr Praseo­dymium
{ Nd, "Neodymium", "Nd", 144.24f, true, { 206, 185, kNA, 174, 137, kNA, kNA } }, // 60 Nd Neo­dymium
{ Pm, "Promethium", "Pm", 145, true, { 205, 185, kNA, 173, 135, kNA, kNA } }, // 61 Pm Prome­thium
{ Sm, "Samarium", "Sm", 150.36f, true, { 238, 185, kNA, 172, 134, kNA, kNA } }, // 62 Sm Sama­rium
{ Eu, "Europium", "Eu", 151.96f, true, { 231, 185, kNA, 168, 134, kNA, kNA } }, // 63 Eu Europ­ium
{ Gd, "Gadolinium", "Gd", 157.25f, true, { 233, 180, kNA, 169, 135, 132, kNA } }, // 64 Gd Gadolin­ium
{ Tb, "Terbium", "Tb", 158.93f, true, { 225, 175, kNA, 168, 135, kNA, kNA } }, // 65 Tb Ter­bium
{ Dy, "Dysprosium", "Dy", 162.50f, true, { 228, 175, kNA, 167, 133, kNA, kNA } }, // 66 Dy Dyspro­sium
{ Ho, "Holmium", "Ho", 164.93f, true, { 226, 175, kNA, 166, 133, kNA, kNA } }, // 67 Ho Hol­mium
{ Er, "Erbium", "Er", 167.26f, true, { 226, 175, kNA, 165, 133, kNA, kNA } }, // 68 Er Erbium
{ Tm, "Thulium", "Tm", 168.93f, true, { 222, 175, kNA, 164, 131, kNA, kNA } }, // 69 Tm Thulium
{ Yb, "Ytterbium", "Yb", 173.05f, true, { 222, 175, kNA, 170, 129, kNA, kNA } }, // 70 Yb Ytter­bium
{ Lu, "Lutetium", "Lu", 174.97f, true, { 217, 175, 160, 162, 131, 131, kNA } }, // 71 Lu Lute­tium
{ Hf, "Hafnium", "Hf", 178.49f, true, { 208, 155, 150, 152, 128, 122, kNA } }, // 72 Hf Haf­nium
{ Ta, "Tantalum", "Ta", 180.95f, true, { 200, 145, 138, 146, 126, 119, kNA } }, // 73 Ta Tanta­lum
{ W, "Tungsten", "W", 183.84f, true, { 193, 135, 146, 137, 120, 115, kNA } }, // 74 W Tung­sten
{ Re, "Rhenium", "Re", 186.21f, true, { 188, 135, 159, 131, 119, 110, kNA } }, // 75 Re Rhe­nium
{ Os, "Osmium", "Os", 190.23f, true, { 185, 130, 128, 129, 116, 109, kNA } }, // 76 Os Os­mium
{ Ir, "Iridium", "Ir", 192.22f, true, { 180, 135, 137, 122, 115, 107, kNA } }, // 77 Ir Iridium
{ Pt, "Platinum", "Pt", 195.08f, true, { 177, 135, 128, 123, 112, 110, 175 } }, // 78 Pt Plat­inum
{ Au, "Gold", "Au", 196.97f, true, { 174, 135, 144, 124, 121, 123, 166 } }, // 79 Au Gold
{ Hg, "Mercury", "Hg", 200.59f, true, { 171, 150, 149, 133, 142, kNA, 155 } }, // 80 Hg Mer­cury
{ Tl, "Thallium", "Tl", 204.38f, true, { 156, 190, 148, 144, 142, 150, 196 } }, // 81 Tl Thallium
{ Pb, "Lead", "Pb", 207.2f, true, { 154, 180, 147, 144, 135, 137, 202 } }, // 82 Pb Lead
{ Bi, "Bismuth", "Bi", 208.98f, true, { 143, 160, 146, 151, 141, 135, kNA } }, // 83 Bi Bis­muth
{ Po, "Polonium", "Po", 209, true, { 135, 190, kNA, 145, 135, 129, kNA } }, // 84 Po Polo­nium
{ At, "Astatine", "At", 210, false, { 127, kNA, kNA, 147, 138, 138, kNA } }, // 85 At Asta­tine
{ Rn, "Radon", "Rn", 222, false, { 120, kNA, 145, 142, 145, 133, kNA } }, // 86 Rn Radon
{ Fr, "Francium", "Fr", 223, true, { kNA, kNA, kNA, 223, 218, kNA, kNA } }, // 87 Fr Fran­cium
{ Ra, "Radium", "Ra", 226, true, { kNA, 215, kNA, 201, 173, 159, kNA } }, // 88 Ra Ra­dium
{ Ac, "Actinium", "Ac", 227, true, { kNA, 195, kNA, 186, 153, 140, kNA } }, // 89 Ac Actin­ium
{ Th, "Thorium", "Th", 232.04f, true, { kNA, 180, kNA, 175, 143, 136, kNA } }, // 90 Th Thor­ium
{ Pa, "Protactinium", "Pa", 231.04f, true, { kNA, 180, kNA, 169, 138, 129, kNA } }, // 91 Pa Protac­tinium
{ U, "Uranium", "U", 238.03f, true, { kNA, 175, kNA, 170, 134, 118, 186 } }, // 92 U Ura­nium
{ Np, "Neptunium", "Np", 237, true, { kNA, 175, kNA, 171, 136, 116, kNA } }, // 93 Np Neptu­nium
{ Pu, "Plutonium", "Pu", 244, true, { kNA, 175, kNA, 172, 135, kNA, kNA } }, // 94 Pu Pluto­nium
{ Am, "Americium", "Am", 243, true, { kNA, 175, kNA, 166, 135, kNA, kNA } }, // 95 Am Ameri­cium
{ Cm, "Curium", "Cm", 247, true, { kNA, kNA, kNA, 166, 136, kNA, kNA } }, // 96 Cm Curium
{ Bk, "Berkelium", "Bk", 247, true, { kNA, kNA, kNA, 168, 139, kNA, kNA } }, // 97 Bk Berkel­ium
{ Cf, "Californium", "Cf", 251, true, { kNA, kNA, kNA, 168, 140, kNA, kNA } }, // 98 Cf Califor­nium
{ Es, "Einsteinium", "Es", 252, true, { kNA, kNA, kNA, 165, 140, kNA, kNA } }, // 99 Es Einstei­nium
{ Fm, "Fermium", "Fm", 257, true, { kNA, kNA, kNA, 167, kNA, kNA, kNA } }, // 100 Fm Fer­mium
{ Md, "Mendelevium", "Md", 258, true, { kNA, kNA, kNA, 173, 139, kNA, kNA } }, // 101 Md Mende­levium
{ No, "Nobelium", "No", 259, true, { kNA, kNA, kNA, 176, kNA, kNA, kNA } }, // 102 No Nobel­ium
{ Lr, "Lawrencium", "Lr", 266, true, { kNA, kNA, kNA, 161, 141, kNA, kNA } }, // 103 Lr Lawren­cium
{ Rf, "Rutherfordium", "Rf", 267, true, { kNA, kNA, kNA, 157, 140, 131, kNA } }, // 104 Rf Ruther­fordium
{ Db, "Dubnium", "Db", 268, true, { kNA, kNA, kNA, 149, 136, 126, kNA } }, // 105 Db Dub­nium
{ Sg, "Seaborgium", "Sg", 269, true, { kNA, kNA, kNA, 143, 128, 121, kNA } }, // 106 Sg Sea­borgium
{ Bh, "Bohrium", "Bh", 270, true, { kNA, kNA, kNA, 141, 128, 119, kNA } }, // 107 Bh Bohr­ium
{ Hs, "Hassium", "Hs", 277, true, { kNA, kNA, kNA, 134, 125, 118, kNA } }, // 108 Hs Has­sium
{ Mt, "Meitnerium", "Mt", 278, true, { kNA, kNA, kNA, 129, 125, 113, kNA } }, // 109 Mt Meit­nerium
{ Ds, "Darmstadtium", "Ds", 281, true, { kNA, kNA, kNA, 128, 116, 112, kNA } }, // 110 Ds Darm­stadtium
{ Rg, "Roentgenium", "Rg", 282, true, { kNA, kNA, kNA, 121, 116, 118, kNA } }, // 111 Rg Roent­genium
{ Cn, "Copernicium", "Cn", 285, true, { kNA, kNA, kNA, 122, 137, 130, kNA } }, // 112 Cn Coper­nicium
{ Nh, "Nihonium", "Nh", 286, true, { kNA, kNA, kNA, 136, kNA, kNA, kNA } }, // 113 Nh Nihon­ium
{ Fl, "Flerovium", "Fl", 289, true, { kNA, kNA, kNA, 143, kNA, kNA, kNA } }, // 114 Fl Flerov­ium
{ Mc, "Moscovium", "Mc", 290, true, { kNA, kNA, kNA, 162, kNA, kNA, kNA } }, // 115 Mc Moscov­ium
{ Lv, "Livermorium", "Lv", 293, true, { kNA, kNA, kNA, 175, kNA, kNA, kNA } }, // 116 Lv Liver­morium
{ Ts, "Tennessine", "Ts", 294, true, { kNA, kNA, kNA, 165, kNA, kNA, kNA } }, // 117 Ts Tenness­ine
{ Og, "Oganesson", "Og", 294, true, { kNA, kNA, kNA, 157, kNA, kNA, kNA } }, // 118 Og Oga­nesson
{ D, "Deuterium", "D", 2.014f, false, { 53, 25, 37, 32, kNA, kNA, 120 } }, // 1 D Deuterium
};
uint32_t kKnownAtomsCount = sizeof(kKnownAtoms) / sizeof(atom_type_info);
// --------------------------------------------------------------------
// Crystal ionic radii, as taken from Wikipedia (https://en.m.wikipedia.org/wiki/Ionic_radius)
const struct ionic_radii
{
atom_type type;
float radii[11];
} kCrystalIonicRadii[] = {
{ H, { kNA, kNA, 208, -4, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Hydrogen
{ Li, { kNA, kNA, kNA, 90, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Lithium
{ Be, { kNA, kNA, kNA, kNA, 59, kNA, kNA, kNA, kNA, kNA, kNA } }, // Beryllium
{ B, { kNA, kNA, kNA, kNA, kNA, 41, kNA, kNA, kNA, kNA, kNA } }, // Boron
{ C, { kNA, kNA, kNA, kNA, kNA, kNA, 30, kNA, kNA, kNA, kNA } }, // Carbon
{ N, { 132, kNA, kNA, kNA, kNA, 30, kNA, 27, kNA, kNA, kNA } }, // Nitrogen
{ O, { kNA, 126, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Oxygen
{ F, { kNA, kNA, 119, kNA, kNA, kNA, kNA, kNA, kNA, 22, kNA } }, // Fluorine
{ Na, { kNA, kNA, kNA, 116, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Sodium
{ Mg, { kNA, kNA, kNA, kNA, 86, kNA, kNA, kNA, kNA, kNA, kNA } }, // Magnesium
{ Al, { kNA, kNA, kNA, kNA, kNA, 67.5f, kNA, kNA, kNA, kNA, kNA } }, // Aluminium
{ Si, { kNA, kNA, kNA, kNA, kNA, kNA, 54, kNA, kNA, kNA, kNA } }, // Silicon
{ P, { kNA, kNA, kNA, kNA, kNA, 58, kNA, 52, kNA, kNA, kNA } }, // Phosphorus
{ S, { kNA, 170, kNA, kNA, kNA, kNA, 51, kNA, 43, kNA, kNA } }, // Sulfur
{ Cl, { kNA, kNA, 181, kNA, kNA, kNA, kNA, 26, kNA, 41, kNA } }, // Chlorine
{ K, { kNA, kNA, kNA, 152, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Potassium
{ Ca, { kNA, kNA, kNA, kNA, 114, kNA, kNA, kNA, kNA, kNA, kNA } }, // Calcium
{ Sc, { kNA, kNA, kNA, kNA, kNA, 88.5f, kNA, kNA, kNA, kNA, kNA } }, // Scandium
{ Ti, { kNA, kNA, kNA, kNA, 100, 81, 74.5f, kNA, kNA, kNA, kNA } }, // Titanium
{ V, { kNA, kNA, kNA, kNA, 93, 78, 72, 68, kNA, kNA, kNA } }, // Vanadium
{ Cr, { kNA, kNA, kNA, kNA, 87, 75.5f, 69, 63, 58, kNA, kNA } }, // Chromium ls
// { Cr,{ kNA, kNA, kNA, kNA, 94, kNA, kNA, kNA, kNA, kNA, kNA } }, // Chromium hs
{ Mn, { kNA, kNA, kNA, kNA, 81, 72, 67, 47, 39.5f, 60, kNA } }, // Manganese ls
// { Mn,{ kNA, kNA, kNA, kNA, 97, 78.5f, kNA, kNA, kNA, kNA, kNA } }, // Manganese hs
{ Fe, { kNA, kNA, kNA, kNA, 75, 69, 72.5f, kNA, 39, kNA, kNA } }, // Iron ls
// { Fe,{ kNA, kNA, kNA, kNA, 92, 78.5f, kNA, kNA, kNA, kNA, kNA } }, // Iron hs
{ Co, { kNA, kNA, kNA, kNA, 79, 68.5f, kNA, kNA, kNA, kNA, kNA } }, // Cobalt ls
// { Co,{ kNA, kNA, kNA, kNA, 88.5f, 75, 67, kNA, kNA, kNA, kNA } }, // Cobalt hs
{ Ni, { kNA, kNA, kNA, kNA, 83, 70, 62, kNA, kNA, kNA, kNA } }, // Nickel ls
// { Ni,{ kNA, kNA, kNA, kNA, kNA, 74, kNA, kNA, kNA, kNA, kNA } }, // Nickel hs
{ Cu, { kNA, kNA, kNA, 91, 87, 68, kNA, kNA, kNA, kNA, kNA } }, // Copper
{ Zn, { kNA, kNA, kNA, kNA, 88, kNA, kNA, kNA, kNA, kNA, kNA } }, // Zinc
{ Ga, { kNA, kNA, kNA, kNA, kNA, 76, kNA, kNA, kNA, kNA, kNA } }, // Gallium
{ Ge, { kNA, kNA, kNA, kNA, 87, kNA, 67, kNA, kNA, kNA, kNA } }, // Germanium
{ As, { kNA, kNA, kNA, kNA, kNA, 72, kNA, 60, kNA, kNA, kNA } }, // Arsenic
{ Se, { kNA, 184, kNA, kNA, kNA, kNA, 64, kNA, 56, kNA, kNA } }, // Selenium
{ Br, { kNA, kNA, 182, kNA, kNA, 73, kNA, 45, kNA, 53, kNA } }, // Bromine
{ Rb, { kNA, kNA, kNA, 166, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Rubidium
{ Sr, { kNA, kNA, kNA, kNA, 132, kNA, kNA, kNA, kNA, kNA, kNA } }, // Strontium
{ Y, { kNA, kNA, kNA, kNA, kNA, 104, kNA, kNA, kNA, kNA, kNA } }, // Yttrium
{ Zr, { kNA, kNA, kNA, kNA, kNA, kNA, 86, kNA, kNA, kNA, kNA } }, // Zirconium
{ Nb, { kNA, kNA, kNA, kNA, kNA, 86, 82, 78, kNA, kNA, kNA } }, // Niobium
{ Mo, { kNA, kNA, kNA, kNA, kNA, 83, 79, 75, 73, kNA, kNA } }, // Molybdenum
{ Tc, { kNA, kNA, kNA, kNA, kNA, kNA, 78.5f, 74, kNA, 70, kNA } }, // Technetium
{ Ru, { kNA, kNA, kNA, kNA, kNA, 82, 76, 70.5f, kNA, 52, 150 } }, // Ruthenium
{ Rh, { kNA, kNA, kNA, kNA, kNA, 80.5f, 74, 69, kNA, kNA, kNA } }, // Rhodium
{ Pd, { kNA, kNA, kNA, 73, 100, 90, 75.5f, kNA, kNA, kNA, kNA } }, // Palladium
{ Ag, { kNA, kNA, kNA, 129, 108, 89, kNA, kNA, kNA, kNA, kNA } }, // Silver
{ Cd, { kNA, kNA, kNA, kNA, 109, kNA, kNA, kNA, kNA, kNA, kNA } }, // Cadmium
{ In, { kNA, kNA, kNA, kNA, kNA, 94, kNA, kNA, kNA, kNA, kNA } }, // Indium
{ Sn, { kNA, kNA, kNA, kNA, kNA, kNA, 83, kNA, kNA, kNA, kNA } }, // Tin
{ Sb, { kNA, kNA, kNA, kNA, kNA, 90, kNA, 74, kNA, kNA, kNA } }, // Antimony
{ Te, { kNA, 207, kNA, kNA, kNA, kNA, 111, kNA, 70, kNA, kNA } }, // Tellurium
{ I, { kNA, kNA, 206, kNA, kNA, kNA, kNA, 109, kNA, 67, kNA } }, // Iodine
{ Xe, { kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, 62 } }, // Xenon
{ Cs, { kNA, kNA, kNA, 167, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Caesium
{ Ba, { kNA, kNA, kNA, kNA, 149, kNA, kNA, kNA, kNA, kNA, kNA } }, // Barium
{ La, { kNA, kNA, kNA, kNA, kNA, 117.2f, kNA, kNA, kNA, kNA, kNA } }, // Lanthanum
{ Ce, { kNA, kNA, kNA, kNA, kNA, 115, 101, kNA, kNA, kNA, kNA } }, // Cerium
{ Pr, { kNA, kNA, kNA, kNA, kNA, 113, 99, kNA, kNA, kNA, kNA } }, // Praseodymium
{ Nd, { kNA, kNA, kNA, kNA, 143, 112.3f, kNA, kNA, kNA, kNA, kNA } }, // Neodymium
{ Pm, { kNA, kNA, kNA, kNA, kNA, 111, kNA, kNA, kNA, kNA, kNA } }, // Promethium
{ Sm, { kNA, kNA, kNA, kNA, 136, 109.8f, kNA, kNA, kNA, kNA, kNA } }, // Samarium
{ Eu, { kNA, kNA, kNA, kNA, 131, 108.7f, kNA, kNA, kNA, kNA, kNA } }, // Europium
{ Gd, { kNA, kNA, kNA, kNA, kNA, 107.8f, kNA, kNA, kNA, kNA, kNA } }, // Gadolinium
{ Tb, { kNA, kNA, kNA, kNA, kNA, 106.3f, 90, kNA, kNA, kNA, kNA } }, // Terbium
{ Dy, { kNA, kNA, kNA, kNA, 121, 105.2f, kNA, kNA, kNA, kNA, kNA } }, // Dysprosium
{ Ho, { kNA, kNA, kNA, kNA, kNA, 104.1f, kNA, kNA, kNA, kNA, kNA } }, // Holmium
{ Er, { kNA, kNA, kNA, kNA, kNA, 103, kNA, kNA, kNA, kNA, kNA } }, // Erbium
{ Tm, { kNA, kNA, kNA, kNA, 117, 102, kNA, kNA, kNA, kNA, kNA } }, // Thulium
{ Yb, { kNA, kNA, kNA, kNA, 116, 100.8f, kNA, kNA, kNA, kNA, kNA } }, // Ytterbium
{ Lu, { kNA, kNA, kNA, kNA, kNA, 100.1f, kNA, kNA, kNA, kNA, kNA } }, // Lutetium
{ Hf, { kNA, kNA, kNA, kNA, kNA, kNA, 85, kNA, kNA, kNA, kNA } }, // Hafnium
{ Ta, { kNA, kNA, kNA, kNA, kNA, 86, 82, 78, kNA, kNA, kNA } }, // Tantalum
{ W, { kNA, kNA, kNA, kNA, kNA, kNA, 80, 76, 74, kNA, kNA } }, // Tungsten
{ Re, { kNA, kNA, kNA, kNA, kNA, kNA, 77, 72, 69, 67, kNA } }, // Rhenium
{ Os, { kNA, kNA, kNA, kNA, kNA, kNA, 77, 71.5f, 68.5f, 66.5f, 53 } }, // Osmium
{ Ir, { kNA, kNA, kNA, kNA, kNA, 82, 76.5f, 71, kNA, kNA, kNA } }, // Iridium
{ Pt, { kNA, kNA, kNA, kNA, 94, kNA, 76.5f, 71, kNA, kNA, kNA } }, // Platinum
{ Au, { kNA, kNA, kNA, 151, kNA, 99, kNA, 71, kNA, kNA, kNA } }, // Gold
{ Hg, { kNA, kNA, kNA, 133, 116, kNA, kNA, kNA, kNA, kNA, kNA } }, // Mercury
{ Tl, { kNA, kNA, kNA, 164, kNA, 102.5f, kNA, kNA, kNA, kNA, kNA } }, // Thallium
{ Pb, { kNA, kNA, kNA, kNA, 133, kNA, 91.5f, kNA, kNA, kNA, kNA } }, // Lead
{ Bi, { kNA, kNA, kNA, kNA, kNA, 117, kNA, 90, kNA, kNA, kNA } }, // Bismuth
{ Po, { kNA, kNA, kNA, kNA, kNA, kNA, 108, kNA, 81, kNA, kNA } }, // Polonium
{ At, { kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, 76, kNA } }, // Astatine
{ Fr, { kNA, kNA, kNA, 194, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Francium
{ Ra, { kNA, kNA, kNA, kNA, 162, kNA, kNA, kNA, kNA, kNA, kNA } }, // Radium
{ Ac, { kNA, kNA, kNA, kNA, kNA, 126, kNA, kNA, kNA, kNA, kNA } }, // Actinium
{ Th, { kNA, kNA, kNA, kNA, kNA, kNA, 108, kNA, kNA, kNA, kNA } }, // Thorium
{ Pa, { kNA, kNA, kNA, kNA, kNA, 116, 104, 92, kNA, kNA, kNA } }, // Protactinium
{ U, { kNA, kNA, kNA, kNA, kNA, 116.5f, 103, 90, 87, kNA, kNA } }, // Uranium
{ Np, { kNA, kNA, kNA, kNA, 124, 115, 101, 89, 86, 85, kNA } }, // Neptunium
{ Pu, { kNA, kNA, kNA, kNA, kNA, 114, 100, 88, 85, kNA, kNA } }, // Plutonium
{ Am, { kNA, kNA, kNA, kNA, 140, 111.5f, 99, kNA, kNA, kNA, kNA } }, // Americium
{ Cm, { kNA, kNA, kNA, kNA, kNA, 111, 99, kNA, kNA, kNA, kNA } }, // Curium
{ Bk, { kNA, kNA, kNA, kNA, kNA, 110, 97, kNA, kNA, kNA, kNA } }, // Berkelium
{ Cf, { kNA, kNA, kNA, kNA, kNA, 109, 96.1f, kNA, kNA, kNA, kNA } }, // Californium
{ Es, { kNA, kNA, kNA, kNA, kNA, 92.8f, kNA, kNA, kNA, kNA, kNA } }, // Einsteinium
},
kEffectiveIonicRadii[] = {
{ H, { kNA, kNA, 139.9f, -18, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Hydrogen
{ Li, { kNA, kNA, kNA, 76, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Lithium
{ Be, { kNA, kNA, kNA, kNA, 45, kNA, kNA, kNA, kNA, kNA, kNA } }, // Beryllium
{ B, { kNA, kNA, kNA, kNA, kNA, 27, kNA, kNA, kNA, kNA, kNA } }, // Boron
{ C, { kNA, kNA, kNA, kNA, kNA, kNA, 16, kNA, kNA, kNA, kNA } }, // Carbon
{ N, { 146, kNA, kNA, kNA, kNA, 16, kNA, 13, kNA, kNA, kNA } }, // Nitrogen
{ O, { kNA, 140, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Oxygen
{ F, { kNA, kNA, 133, kNA, kNA, kNA, kNA, kNA, kNA, 8, kNA } }, // Fluorine
{ Na, { kNA, kNA, kNA, 102, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Sodium
{ Mg, { kNA, kNA, kNA, kNA, 72, kNA, kNA, kNA, kNA, kNA, kNA } }, // Magnesium
{ Al, { kNA, kNA, kNA, kNA, kNA, 53.5f, kNA, kNA, kNA, kNA, kNA } }, // Aluminium
{ Si, { kNA, kNA, kNA, kNA, kNA, kNA, 40, kNA, kNA, kNA, kNA } }, // Silicon
{ P, { 212, kNA, kNA, kNA, kNA, 44, kNA, 38, kNA, kNA, kNA } }, // Phosphorus
{ S, { kNA, 184, kNA, kNA, kNA, kNA, 37, kNA, 29, kNA, kNA } }, // Sulfur
{ Cl, { kNA, kNA, 181, kNA, kNA, kNA, kNA, 12, kNA, 27, kNA } }, // Chlorine
{ K, { kNA, kNA, kNA, 138, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Potassium
{ Ca, { kNA, kNA, kNA, kNA, 100, kNA, kNA, kNA, kNA, kNA, kNA } }, // Calcium
{ Sc, { kNA, kNA, kNA, kNA, kNA, 74.5f, kNA, kNA, kNA, kNA, kNA } }, // Scandium
{ Ti, { kNA, kNA, kNA, kNA, 86, 67, 60.5f, kNA, kNA, kNA, kNA } }, // Titanium
{ V, { kNA, kNA, kNA, kNA, 79, 64, 58, 54, kNA, kNA, kNA } }, // Vanadium
{ Cr, { kNA, kNA, kNA, kNA, 73, 61.5f, 55, 49, 44, kNA, kNA } }, // Chromium ls
{ Cr, { kNA, kNA, kNA, kNA, 80, kNA, kNA, kNA, kNA, kNA, kNA } }, // Chromium hs
{ Mn, { kNA, kNA, kNA, kNA, 67, 58, 53, 33, 25.5f, 46, kNA } }, // Manganese ls
{ Mn, { kNA, kNA, kNA, kNA, 83, 64.5f, kNA, kNA, kNA, kNA, kNA } }, // Manganese hs
{ Fe, { kNA, kNA, kNA, kNA, 61, 55, 58.5f, kNA, 25, kNA, kNA } }, // Iron ls
{ Fe, { kNA, kNA, kNA, kNA, 78, 64.5f, kNA, kNA, kNA, kNA, kNA } }, // Iron hs
{ Co, { kNA, kNA, kNA, kNA, 65, 54.5f, kNA, kNA, kNA, kNA, kNA } }, // Cobalt ls
{ Co, { kNA, kNA, kNA, kNA, 74.5f, 61, 53, kNA, kNA, kNA, kNA } }, // Cobalt hs
{ Ni, { kNA, kNA, kNA, kNA, 69, 56, 48, kNA, kNA, kNA, kNA } }, // Nickel ls
{ Ni, { kNA, kNA, kNA, kNA, kNA, 60, kNA, kNA, kNA, kNA, kNA } }, // Nickel hs
{ Cu, { kNA, kNA, kNA, 77, 73, 54, kNA, kNA, kNA, kNA, kNA } }, // Copper
{ Zn, { kNA, kNA, kNA, kNA, 74, kNA, kNA, kNA, kNA, kNA, kNA } }, // Zinc
{ Ga, { kNA, kNA, kNA, kNA, kNA, 62, kNA, kNA, kNA, kNA, kNA } }, // Gallium
{ Ge, { kNA, kNA, kNA, kNA, 73, kNA, 53, kNA, kNA, kNA, kNA } }, // Germanium
{ As, { kNA, kNA, kNA, kNA, kNA, 58, kNA, 46, kNA, kNA, kNA } }, // Arsenic
{ Se, { kNA, 198, kNA, kNA, kNA, kNA, 50, kNA, 42, kNA, kNA } }, // Selenium
{ Br, { kNA, kNA, 196, kNA, kNA, 59, kNA, 31, kNA, 39, kNA } }, // Bromine
{ Rb, { kNA, kNA, kNA, 152, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Rubidium
{ Sr, { kNA, kNA, kNA, kNA, 118, kNA, kNA, kNA, kNA, kNA, kNA } }, // Strontium
{ Y, { kNA, kNA, kNA, kNA, kNA, 90, kNA, kNA, kNA, kNA, kNA } }, // Yttrium
{ Zr, { kNA, kNA, kNA, kNA, kNA, kNA, 72, kNA, kNA, kNA, kNA } }, // Zirconium
{ Nb, { kNA, kNA, kNA, kNA, kNA, 72, 68, 64, kNA, kNA, kNA } }, // Niobium
{ Mo, { kNA, kNA, kNA, kNA, kNA, 69, 65, 61, 59, kNA, kNA } }, // Molybdenum
{ Tc, { kNA, kNA, kNA, kNA, kNA, kNA, 64.5f, 60, kNA, 56, kNA } }, // Technetium
{ Ru, { kNA, kNA, kNA, kNA, kNA, 68, 62, 56.5f, kNA, 38, 36 } }, // Ruthenium
{ Rh, { kNA, kNA, kNA, kNA, kNA, 66.5f, 60, 55, kNA, kNA, kNA } }, // Rhodium
{ Pd, { kNA, kNA, kNA, 59, 86, 76, 61.5f, kNA, kNA, kNA, kNA } }, // Palladium
{ Ag, { kNA, kNA, kNA, 115, 94, 75, kNA, kNA, kNA, kNA, kNA } }, // Silver
{ Cd, { kNA, kNA, kNA, kNA, 95, kNA, kNA, kNA, kNA, kNA, kNA } }, // Cadmium
{ In, { kNA, kNA, kNA, kNA, kNA, 80, kNA, kNA, kNA, kNA, kNA } }, // Indium
{ Sn, { kNA, kNA, kNA, kNA, 118, kNA, 69, kNA, kNA, kNA, kNA } }, // Tin
{ Sb, { kNA, kNA, kNA, kNA, kNA, 76, kNA, 60, kNA, kNA, kNA } }, // Antimony
{ Te, { kNA, 221, kNA, kNA, kNA, kNA, 97, kNA, 56, kNA, kNA } }, // Tellurium
{ I, { kNA, kNA, 220, kNA, kNA, kNA, kNA, 95, kNA, 53, kNA } }, // Iodine
{ Xe, { kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, 48 } }, // Xenon
{ Cs, { kNA, kNA, kNA, 167, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Caesium
{ Ba, { kNA, kNA, kNA, kNA, 135, kNA, kNA, kNA, kNA, kNA, kNA } }, // Barium
{ La, { kNA, kNA, kNA, kNA, kNA, 103.2f, kNA, kNA, kNA, kNA, kNA } }, // Lanthanum
{ Ce, { kNA, kNA, kNA, kNA, kNA, 101, 87, kNA, kNA, kNA, kNA } }, // Cerium
{ Pr, { kNA, kNA, kNA, kNA, kNA, 99, 85, kNA, kNA, kNA, kNA } }, // Praseodymium
{ Nd, { kNA, kNA, kNA, kNA, 129, 98.3f, kNA, kNA, kNA, kNA, kNA } }, // Neodymium
{ Pm, { kNA, kNA, kNA, kNA, kNA, 97, kNA, kNA, kNA, kNA, kNA } }, // Promethium
{ Sm, { kNA, kNA, kNA, kNA, 122, 95.8f, kNA, kNA, kNA, kNA, kNA } }, // Samarium
{ Eu, { kNA, kNA, kNA, kNA, 117, 94.7f, kNA, kNA, kNA, kNA, kNA } }, // Europium
{ Gd, { kNA, kNA, kNA, kNA, kNA, 93.5f, kNA, kNA, kNA, kNA, kNA } }, // Gadolinium
{ Tb, { kNA, kNA, kNA, kNA, kNA, 92.3f, 76, kNA, kNA, kNA, kNA } }, // Terbium
{ Dy, { kNA, kNA, kNA, kNA, 107, 91.2f, kNA, kNA, kNA, kNA, kNA } }, // Dysprosium
{ Ho, { kNA, kNA, kNA, kNA, kNA, 90.1f, kNA, kNA, kNA, kNA, kNA } }, // Holmium
{ Er, { kNA, kNA, kNA, kNA, kNA, 89, kNA, kNA, kNA, kNA, kNA } }, // Erbium
{ Tm, { kNA, kNA, kNA, kNA, 103, 88, kNA, kNA, kNA, kNA, kNA } }, // Thulium
{ Yb, { kNA, kNA, kNA, kNA, 102, 86.8f, kNA, kNA, kNA, kNA, kNA } }, // Ytterbium
{ Lu, { kNA, kNA, kNA, kNA, kNA, 86.1f, kNA, kNA, kNA, kNA, kNA } }, // Lutetium
{ Hf, { kNA, kNA, kNA, kNA, kNA, kNA, 71, kNA, kNA, kNA, kNA } }, // Hafnium
{ Ta, { kNA, kNA, kNA, kNA, kNA, 72, 68, 64, kNA, kNA, kNA } }, // Tantalum
{ W, { kNA, kNA, kNA, kNA, kNA, kNA, 66, 62, 60, kNA, kNA } }, // Tungsten
{ Re, { kNA, kNA, kNA, kNA, kNA, kNA, 63, 58, 55, 53, kNA } }, // Rhenium
{ Os, { kNA, kNA, kNA, kNA, kNA, kNA, 63, 57.5f, 54.5f, 52.5f, 39 } }, // Osmium
{ Ir, { kNA, kNA, kNA, kNA, kNA, 68, 62.5f, 57, kNA, kNA, kNA } }, // Iridium
{ Pt, { kNA, kNA, kNA, kNA, 80, kNA, 62.5f, 57, kNA, kNA, kNA } }, // Platinum
{ Au, { kNA, kNA, kNA, 137, kNA, 85, kNA, 57, kNA, kNA, kNA } }, // Gold
{ Hg, { kNA, kNA, kNA, 119, 102, kNA, kNA, kNA, kNA, kNA, kNA } }, // Mercury
{ Tl, { kNA, kNA, kNA, 150, kNA, 88.5f, kNA, kNA, kNA, kNA, kNA } }, // Thallium
{ Pb, { kNA, kNA, kNA, kNA, 119, kNA, 77.5f, kNA, kNA, kNA, kNA } }, // Lead
{ Bi, { kNA, kNA, kNA, kNA, kNA, 103, kNA, 76, kNA, kNA, kNA } }, // Bismuth
{ Po, { kNA, 223, kNA, kNA, kNA, kNA, 94, kNA, 67, kNA, kNA } }, // Polonium
{ At, { kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, kNA, 62, kNA } }, // Astatine
{ Fr, { kNA, kNA, kNA, 180, kNA, kNA, kNA, kNA, kNA, kNA, kNA } }, // Francium
{ Ra, { kNA, kNA, kNA, kNA, 148, kNA, kNA, kNA, kNA, kNA, kNA } }, // Radium
{ Ac, { kNA, kNA, kNA, kNA, kNA, 106.5f, kNA, kNA, kNA, kNA, kNA } }, // Actinium
{ Th, { kNA, kNA, kNA, kNA, kNA, kNA, 94, kNA, kNA, kNA, kNA } }, // Thorium
{ Pa, { kNA, kNA, kNA, kNA, kNA, 104, 90, 78, kNA, kNA, kNA } }, // Protactinium
{ U, { kNA, kNA, kNA, kNA, kNA, 102.5f, 89, 76, 73, kNA, kNA } }, // Uranium
{ Np, { kNA, kNA, kNA, kNA, 110, 101, 87, 75, 72, 71, kNA } }, // Neptunium
{ Pu, { kNA, kNA, kNA, kNA, kNA, 100, 86, 74, 71, kNA, kNA } }, // Plutonium
{ Am, { kNA, kNA, kNA, kNA, 126, 97.5f, 85, kNA, kNA, kNA, kNA } }, // Americium
{ Cm, { kNA, kNA, kNA, kNA, kNA, 97, 85, kNA, kNA, kNA, kNA } }, // Curium
{ Bk, { kNA, kNA, kNA, kNA, kNA, 96, 83, kNA, kNA, kNA, kNA } }, // Berkelium
{ Cf, { kNA, kNA, kNA, kNA, kNA, 95, 82.1f, kNA, kNA, kNA, kNA } }, // Californium
{ Es, { kNA, kNA, kNA, kNA, kNA, 83.5f, kNA, kNA, kNA, kNA, kNA } }, // Einsteinium
};
// --------------------------------------------------------------------
// The coefficients from Waasmaier & Kirfel (1995), Acta Cryst. A51, 416-431.
struct SFDataArrayElement
{
atom_type symbol;
int8_t charge;
atom_type_traits::SFData sf;
};
SFDataArrayElement kWKSFData[] = {
{ H, 0, { { 0.413048, 0.294953, 0.187491, 0.080701, 0.023736, 0.000049 }, { 15.569946, 32.398468, 5.711404, 61.889874, 1.334118, 0.000000 } } },
{ He, 0, { { 0.732354, 0.753896, 0.283819, 0.190003, 0.039139, 0.000487 }, { 11.553918, 4.595831, 1.546299, 26.463964, 0.377523, 0.000000 } } },
{ Li, 0, { { 0.974637, 0.158472, 0.811855, 0.262416, 0.790108, 0.002542 }, { 4.334946, 0.342451, 97.102966, 201.363831, 1.409234, 0.000000 } } },
{ Be, 0, { { 1.533712, 0.638283, 0.601052, 0.106139, 1.118414, 0.002511 }, { 42.662079, 0.595420, 99.106499, 0.151340, 1.843093, 0.000000 } } },
{ B, 0, { { 2.085185, 1.064580, 1.062788, 0.140515, 0.641784, 0.003823 }, { 23.494068, 1.137894, 61.238976, 0.114886, 0.399036, 0.000000 } } },
{ C, 0, { { 2.657506, 1.078079, 1.490909, -4.241070, 0.713791, 4.297983 }, { 14.780758, 0.776775, 42.086842, -0.000294, 0.239535, 0.000000 } } },
{ N, 0, { { 11.893780, 3.277479, 1.858092, 0.858927, 0.912985, -11.804902 }, { 0.000158, 10.232723, 30.344690, 0.656065, 0.217287, 0.000000 } } },
{ O, 0, { { 2.960427, 2.508818, 0.637853, 0.722838, 1.142756, 0.027014 }, { 14.182259, 5.936858, 0.112726, 34.958481, 0.390240, 0.000000 } } },
{ F, 0, { { 3.511943, 2.772244, 0.678385, 0.915159, 1.089261, 0.032557 }, { 10.687859, 4.380466, 0.093982, 27.255203, 0.313066, 0.000000 } } },
{ Ne, 0, { { 4.183749, 2.905726, 0.520513, 1.135641, 1.228065, 0.025576 }, { 8.175457, 3.252536, 0.063295, 21.813910, 0.224952, 0.000000 } } },
{ Na, 0, { { 4.910127, 3.081783, 1.262067, 1.098938, 0.560991, 0.079712 }, { 3.281434, 9.119178, 0.102763, 132.013947, 0.405878, 0.000000 } } },
{ Mg, 0, { { 4.708971, 1.194814, 1.558157, 1.170413, 3.239403, 0.126842 }, { 4.875207, 108.506081, 0.111516, 48.292408, 1.928171, 0.000000 } } },
{ Al, 0, { { 4.730796, 2.313951, 1.541980, 1.117564, 3.154754, 0.139509 }, { 3.628931, 43.051167, 0.095960, 108.932388, 1.555918, 0.000000 } } },
{ Si, 0, { { 5.275329, 3.191038, 1.511514, 1.356849, 2.519114, 0.145073 }, { 2.631338, 33.730728, 0.081119, 86.288643, 1.170087, 0.000000 } } },
{ P, 0, { { 1.950541, 4.146930, 1.494560, 1.522042, 5.729711, 0.155233 }, { 0.908139, 27.044952, 0.071280, 67.520187, 1.981173, 0.000000 } } },
{ S, 0, { { 6.372157, 5.154568, 1.473732, 1.635073, 1.209372, 0.154722 }, { 1.514347, 22.092527, 0.061373, 55.445175, 0.646925, 0.000000 } } },
{ Cl, 0, { { 1.446071, 6.870609, 6.151801, 1.750347, 0.634168, 0.146773 }, { 0.052357, 1.193165, 18.343416, 46.398396, 0.401005, 0.000000 } } },
{ Ar, 0, { { 7.188004, 6.638454, 0.454180, 1.929593, 1.523654, 0.265954 }, { 0.956221, 15.339877, 15.339862, 39.043823, 0.062409, 0.000000 } } },
{ K, 0, { { 8.163991, 7.146945, 1.070140, 0.877316, 1.486434, 0.253614 }, { 12.816323, 0.808945, 210.327011, 39.597652, 0.052821, 0.000000 } } },
{ Ca, 0, { { 8.593655, 1.477324, 1.436254, 1.182839, 7.113258, 0.196255 }, { 10.460644, 0.041891, 81.390381, 169.847839, 0.688098, 0.000000 } } },
{ Sc, 0, { { 1.476566, 1.487278, 1.600187, 9.177463, 7.099750, 0.157765 }, { 53.131023, 0.035325, 137.319489, 9.098031, 0.602102, 0.000000 } } },
{ Ti, 0, { { 9.818524, 1.522646, 1.703101, 1.768774, 7.082555, 0.102473 }, { 8.001879, 0.029763, 39.885422, 120.157997, 0.532405, 0.000000 } } },
{ V, 0, { { 10.473575, 1.547881, 1.986381, 1.865616, 7.056250, 0.067744 }, { 7.081940, 0.026040, 31.909672, 108.022842, 0.474882, 0.000000 } } },
{ Cr, 0, { { 11.007069, 1.555477, 2.985293, 1.347855, 7.034779, 0.065510 }, { 6.366281, 0.023987, 23.244839, 105.774498, 0.429369, 0.000000 } } },
{ Mn, 0, { { 11.709542, 1.733414, 2.673141, 2.023368, 7.003180, -0.147293 }, { 5.597120, 0.017800, 21.788420, 89.517914, 0.383054, 0.000000 } } },
{ Fe, 0, { { 12.311098, 1.876623, 3.066177, 2.070451, 6.975185, -0.304931 }, { 5.009415, 0.014461, 18.743040, 82.767876, 0.346506, 0.000000 } } },
{ Co, 0, { { 12.914510, 2.481908, 3.466894, 2.106351, 6.960892, -0.936572 }, { 4.507138, 0.009126, 16.438129, 76.987320, 0.314418, 0.000000 } } },
{ Ni, 0, { { 13.521865, 6.947285, 3.866028, 2.135900, 4.284731, -2.762697 }, { 4.077277, 0.286763, 14.622634, 71.966080, 0.004437, 0.000000 } } },
{ Cu, 0, { { 14.014192, 4.784577, 5.056806, 1.457971, 6.932996, -3.254477 }, { 3.738280, 0.003744, 13.034982, 72.554794, 0.265666, 0.000000 } } },
{ Zn, 0, { { 14.741002, 6.907748, 4.642337, 2.191766, 38.424042, -36.915829 }, { 3.388232, 0.243315, 11.903689, 63.312130, 0.000397, 0.000000 } } },
{ Ga, 0, { { 15.758946, 6.841123, 4.121016, 2.714681, 2.395246, -0.847395 }, { 3.121754, 0.226057, 12.482196, 66.203621, 0.007238, 0.000000 } } },
{ Ge, 0, { { 16.540613, 1.567900, 3.727829, 3.345098, 6.785079, 0.018726 }, { 2.866618, 0.012198, 13.432163, 58.866047, 0.210974, 0.000000 } } },
{ As, 0, { { 17.025642, 4.503441, 3.715904, 3.937200, 6.790175, -2.984117 }, { 2.597739, 0.003012, 14.272119, 50.437996, 0.193015, 0.000000 } } },
{ Se, 0, { { 17.354071, 4.653248, 4.259489, 4.136455, 6.749163, -3.160982 }, { 2.349787, 0.002550, 15.579460, 45.181202, 0.177432, 0.000000 } } },
{ Br, 0, { { 17.550570, 5.411882, 3.937180, 3.880645, 6.707793, -2.492088 }, { 2.119226, 16.557184, 0.002481, 42.164009, 0.162121, 0.000000 } } },
{ Kr, 0, { { 17.655279, 6.848105, 4.171004, 3.446760, 6.685200, -2.810592 }, { 1.908231, 16.606236, 0.001598, 39.917473, 0.146896, 0.000000 } } },
{ Rb, 0, { { 8.123134, 2.138042, 6.761702, 1.156051, 17.679546, 1.139548 }, { 15.142385, 33.542667, 0.129372, 224.132507, 1.713368, 0.000000 } } },
{ Sr, 0, { { 17.730219, 9.795867, 6.099763, 2.620025, 0.600053, 1.140251 }, { 1.563060, 14.310868, 0.120574, 135.771317, 0.120574, 0.000000 } } },
{ Y, 0, { { 17.792040, 10.253252, 5.714949, 3.170516, 0.918251, 1.131787 }, { 1.429691, 13.132816, 0.112173, 108.197029, 0.112173, 0.000000 } } },
{ Zr, 0, { { 17.859772, 10.911038, 5.821115, 3.512513, 0.746965, 1.124859 }, { 1.310692, 12.319285, 0.104353, 91.777542, 0.104353, 0.000000 } } },
{ Nb, 0, { { 17.958399, 12.063054, 5.007015, 3.287667, 1.531019, 1.123452 }, { 1.211590, 12.246687, 0.098615, 75.011948, 0.098615, 0.000000 } } },
{ Mo, 0, { { 6.236218, 17.987711, 12.973127, 3.451426, 0.210899, 1.108770 }, { 0.090780, 1.108310, 11.468720, 66.684151, 0.090780, 0.000000 } } },
{ Tc, 0, { { 17.840963, 3.428236, 1.373012, 12.947364, 6.335469, 1.074784 }, { 1.005729, 41.901382, 119.320541, 9.781542, 0.083391, 0.000000 } } },
{ Ru, 0, { { 6.271624, 17.906738, 14.123269, 3.746008, 0.908235, 1.043992 }, { 0.077040, 0.928222, 9.555345, 35.860680, 123.552246, 0.000000 } } },
{ Rh, 0, { { 6.216648, 17.919739, 3.854252, 0.840326, 15.173498, 0.995452 }, { 0.070789, 0.856121, 33.889484, 121.686691, 9.029517, 0.000000 } } },
{ Pd, 0, { { 6.121511, 4.784063, 16.631683, 4.318258, 13.246773, 0.883099 }, { 0.062549, 0.784031, 8.751391, 34.489983, 0.784031, 0.000000 } } },
{ Ag, 0, { { 6.073874, 17.155437, 4.173344, 0.852238, 17.988686, 0.756603 }, { 0.055333, 7.896512, 28.443739, 110.376106, 0.716809, 0.000000 } } },
{ Cd, 0, { { 6.080986, 18.019468, 4.018197, 1.303510, 17.974669, 0.603504 }, { 0.048990, 7.273646, 29.119284, 95.831207, 0.661231, 0.000000 } } },
{ In, 0, { { 6.196477, 18.816183, 4.050479, 1.638929, 17.962912, 0.333097 }, { 0.042072, 6.695665, 31.009790, 103.284348, 0.610714, 0.000000 } } },
{ Sn, 0, { { 19.325171, 6.281571, 4.498866, 1.856934, 17.917318, 0.119024 }, { 6.118104, 0.036915, 32.529045, 95.037186, 0.565651, 0.000000 } } },
{ Sb, 0, { { 5.394956, 6.549570, 19.650681, 1.827820, 17.867832, -0.290506 }, { 33.326523, 0.030974, 5.564929, 87.130966, 0.523992, 0.000000 } } },
{ Te, 0, { { 6.660302, 6.940756, 19.847015, 1.557175, 17.802427, -0.806668 }, { 33.031654, 0.025750, 5.065547, 84.101616, 0.487660, 0.000000 } } },
{ I, 0, { { 19.884502, 6.736593, 8.110516, 1.170953, 17.548716, -0.448811 }, { 4.628591, 0.027754, 31.849096, 84.406387, 0.463550, 0.000000 } } },
{ Xe, 0, { { 19.978920, 11.774945, 9.332182, 1.244749, 17.737501, -6.065902 }, { 4.143356, 0.010142, 28.796200, 75.280685, 0.413616, 0.000000 } } },
{ Cs, 0, { { 17.418674, 8.314444, 10.323193, 1.383834, 19.876251, -2.322802 }, { 0.399828, 0.016872, 25.605827, 233.339676, 3.826915, 0.000000 } } },
{ Ba, 0, { { 19.747343, 17.368477, 10.465718, 2.592602, 11.003653, -5.183497 }, { 3.481823, 0.371224, 21.226641, 173.834274, 0.010719, 0.000000 } } },
{ La, 0, { { 19.966019, 27.329655, 11.018425, 3.086696, 17.335455, -21.745489 }, { 3.197408, 0.003446, 19.955492, 141.381973, 0.341817, 0.000000 } } },
{ Ce, 0, { { 17.355122, 43.988499, 20.546650, 3.130670, 11.353665, -38.386017 }, { 0.328369, 0.002047, 3.088196, 134.907654, 18.832960, 0.000000 } } },
{ Pr, 0, { { 21.551311, 17.161730, 11.903859, 2.679103, 9.564197, -3.871068 }, { 2.995675, 0.312491, 17.716705, 152.192825, 0.010468, 0.000000 } } },
{ Nd, 0, { { 17.331244, 62.783924, 12.160097, 2.663483, 22.239950, -57.189842 }, { 0.300269, 0.001320, 17.026001, 148.748993, 2.910268, 0.000000 } } },
{ Pm, 0, { { 17.286388, 51.560162, 12.478557, 2.675515, 22.960947, -45.973682 }, { 0.286620, 0.001550, 16.223755, 143.984512, 2.796480, 0.000000 } } },
{ Sm, 0, { { 23.700363, 23.072214, 12.777782, 2.684217, 17.204367, -17.452166 }, { 2.689539, 0.003491, 15.495437, 139.862473, 0.274536, 0.000000 } } },
{ Eu, 0, { { 17.186195, 37.156837, 13.103387, 2.707246, 24.419271, -31.586687 }, { 0.261678, 0.001995, 14.787360, 134.816299, 2.581883, 0.000000 } } },
{ Gd, 0, { { 24.898117, 17.104952, 13.222581, 3.266152, 48.995213, -43.505684 }, { 2.435028, 0.246961, 13.996325, 110.863091, 0.001383, 0.000000 } } },
{ Tb, 0, { { 25.910013, 32.344139, 13.765117, 2.751404, 17.064405, -26.851971 }, { 2.373912, 0.002034, 13.481969, 125.836510, 0.236916, 0.000000 } } },
{ Dy, 0, { { 26.671785, 88.687576, 14.065445, 2.768497, 17.067781, -83.279831 }, { 2.282593, 0.000665, 12.920230, 121.937187, 0.225531, 0.000000 } } },
{ Ho, 0, { { 27.150190, 16.999819, 14.059334, 3.386979, 46.546471, -41.165253 }, { 2.169660, 0.215414, 12.213148, 100.506783, 0.001211, 0.000000 } } },
{ Er, 0, { { 28.174887, 82.493271, 14.624002, 2.802756, 17.018515, -77.135223 }, { 2.120995, 0.000640, 11.915256, 114.529938, 0.207519, 0.000000 } } },
{ Tm, 0, { { 28.925894, 76.173798, 14.904704, 2.814812, 16.998117, -70.839813 }, { 2.046203, 0.000656, 11.465375, 111.411980, 0.199376, 0.000000 } } },
{ Yb, 0, { { 29.676760, 65.624069, 15.160854, 2.830288, 16.997850, -60.313812 }, { 1.977630, 0.000720, 11.044622, 108.139153, 0.192110, 0.000000 } } },
{ Lu, 0, { { 30.122866, 15.099346, 56.314899, 3.540980, 16.943729, -51.049416 }, { 1.883090, 10.342764, 0.000780, 89.559250, 0.183849, 0.000000 } } },
{ Hf, 0, { { 30.617033, 15.145351, 54.933548, 4.096253, 16.896156, -49.719837 }, { 1.795613, 9.934469, 0.000739, 76.189705, 0.175914, 0.000000 } } },
{ Ta, 0, { { 31.066359, 15.341823, 49.278297, 4.577665, 16.828321, -44.119026 }, { 1.708732, 9.618455, 0.000760, 66.346199, 0.168002, 0.000000 } } },
{ W, 0, { { 31.507900, 15.682498, 37.960129, 4.885509, 16.792112, -32.864574 }, { 1.629485, 9.446448, 0.000898, 59.980675, 0.160798, 0.000000 } } },
{ Re, 0, { { 31.888456, 16.117104, 42.390297, 5.211669, 16.767591, -37.412682 }, { 1.549238, 9.233474, 0.000689, 54.516373, 0.152815, 0.000000 } } },
{ Os, 0, { { 32.210297, 16.678440, 48.559906, 5.455839, 16.735533, -43.677956 }, { 1.473531, 9.049695, 0.000519, 50.210201, 0.145771, 0.000000 } } },
{ Ir, 0, { { 32.004436, 1.975454, 17.070105, 15.939454, 5.990003, 4.018893 }, { 1.353767, 81.014175, 0.128093, 7.661196, 26.659403, 0.000000 } } },
{ Pt, 0, { { 31.273891, 18.445440, 17.063745, 5.555933, 1.575270, 4.050394 }, { 1.316992, 8.797154, 0.124741, 40.177994, 1.316997, 0.000000 } } },
{ Au, 0, { { 16.777390, 19.317156, 32.979683, 5.595453, 10.576854, -6.279078 }, { 0.122737, 8.621570, 1.256902, 38.008820, 0.000601, 0.000000 } } },
{ Hg, 0, { { 16.839890, 20.023823, 28.428564, 5.881564, 4.714706, 4.076478 }, { 0.115905, 8.256927, 1.195250, 39.247227, 1.195250, 0.000000 } } },
{ Tl, 0, { { 16.630795, 19.386616, 32.808571, 1.747191, 6.356862, 4.066939 }, { 0.110704, 7.181401, 1.119730, 90.660263, 26.014978, 0.000000 } } },
{ Pb, 0, { { 16.419567, 32.738590, 6.530247, 2.342742, 19.916475, 4.049824 }, { 0.105499, 1.055049, 25.025890, 80.906593, 6.664449, 0.000000 } } },
{ Bi, 0, { { 16.282274, 32.725136, 6.678302, 2.694750, 20.576559, 4.040914 }, { 0.101180, 1.002287, 25.714146, 77.057549, 6.291882, 0.000000 } } },
{ Po, 0, { { 16.289164, 32.807171, 21.095163, 2.505901, 7.254589, 4.046556 }, { 0.098121, 0.966265, 6.046622, 76.598068, 28.096128, 0.000000 } } },
{ At, 0, { { 16.011461, 32.615547, 8.113899, 2.884082, 21.377867, 3.995684 }, { 0.092639, 0.904416, 26.543257, 68.372963, 5.499512, 0.000000 } } },
{ Rn, 0, { { 16.070229, 32.641106, 21.489658, 2.299218, 9.480184, 4.020977 }, { 0.090437, 0.876409, 5.239687, 69.188477, 27.632641, 0.000000 } } },
{ Fr, 0, { { 16.007385, 32.663830, 21.594351, 1.598497, 11.121192, 4.003472 }, { 0.087031, 0.840187, 4.954467, 199.805801, 26.905106, 0.000000 } } },
{ Ra, 0, { { 32.563690, 21.396671, 11.298093, 2.834688, 15.914965, 3.981773 }, { 0.801980, 4.590666, 22.758972, 160.404388, 0.083544, 0.000000 } } },
{ Ac, 0, { { 15.914053, 32.535042, 21.553976, 11.433394, 3.612409, 3.939212 }, { 0.080511, 0.770669, 4.352206, 21.381622, 130.500748, 0.000000 } } },
{ Th, 0, { { 15.784024, 32.454899, 21.849222, 4.239077, 11.736191, 3.922533 }, { 0.077067, 0.735137, 4.097976, 109.464111, 20.512138, 0.000000 } } },
{ Pa, 0, { { 32.740208, 21.973675, 12.957398, 3.683832, 15.744058, 3.886066 }, { 0.709545, 4.050881, 19.231543, 117.255005, 0.074040, 0.000000 } } },
{ U, 0, { { 15.679275, 32.824306, 13.660459, 3.687261, 22.279434, 3.854444 }, { 0.071206, 0.681177, 18.236156, 112.500038, 3.930325, 0.000000 } } },
{ Np, 0, { { 32.999901, 22.638077, 14.219973, 3.672950, 15.683245, 3.769391 }, { 0.657086, 3.854918, 17.435474, 109.464485, 0.068033, 0.000000 } } },
{ Pu, 0, { { 33.281178, 23.148544, 15.153755, 3.031492, 15.704215, 3.664200 }, { 0.634999, 3.856168, 16.849735, 121.292038, 0.064857, 0.000000 } } },
{ Am, 0, { { 33.435162, 23.657259, 15.576339, 3.027023, 15.746100, 3.541160 }, { 0.612785, 3.792942, 16.195778, 117.757004, 0.061755, 0.000000 } } },
{ Cm, 0, { { 15.804837, 33.480801, 24.150198, 3.655563, 15.499866, 3.390840 }, { 0.058619, 0.590160, 3.674720, 100.736191, 15.408296, 0.000000 } } },
{ Bk, 0, { { 15.889072, 33.625286, 24.710381, 3.707139, 15.839268, 3.213169 }, { 0.055503, 0.569571, 3.615472, 97.694786, 14.754303, 0.000000 } } },
{ Cf, 0, { { 33.794075, 25.467693, 16.048487, 3.657525, 16.008982, 3.005326 }, { 0.550447, 3.581973, 14.357388, 96.064972, 0.052450, 0.000000 } } },
{ H, -1, { { 0.702260, 0.763666, 0.248678, 0.261323, 0.023017, 0.000425 }, { 23.945604, 74.897919, 6.773289, 233.583450, 1.337531, 0.000000 } } },
{ Li, +1, { { 0.432724, 0.549257, 0.376575, -0.336481, 0.976060, 0.001764 }, { 0.260367, 1.042836, 7.885294, 0.260368, 3.042539, 0.000000 } } },
{ Be, +2, { { 3.055430, -2.372617, 1.044914, 0.544233, 0.381737, -0.653773 }, { 0.001226, 0.001227, 1.542106, 0.456279, 4.047479, 0.000000 } } },
{ C, atom_type_traits::kWKSFVal,
{ { 1.258489, 0.728215, 1.119856, 2.168133, 0.705239, 0.019722 },
{ 10.683769, 0.208177, 0.836097, 24.603704, 58.954273, 0.000000 } } },
{ O, -1, { { 3.106934, 3.235142, 1.148886, 0.783981, 0.676953, 0.046136 }, { 19.868080, 6.960252, 0.170043, 65.693512, 0.630757, 0.000000 } } },
{ O, -2, { { 3.990247, 2.300563, 0.607200, 1.907882, 1.167080, 0.025429 }, { 16.639956, 5.636819, 0.108493, 47.299709, 0.379984, 0.000000 } } },
{ F, -1, { { 0.457649, 3.841561, 1.432771, 0.801876, 3.395041, 0.069525 }, { 0.917243, 5.507803, 0.164955, 51.076206, 15.821679, 0.000000 } } },
{ Na, +1, { { 3.148690, 4.073989, 0.767888, 0.995612, 0.968249, 0.045300 }, { 2.594987, 6.046925, 0.070139, 14.122657, 0.217037, 0.000000 } } },
{ Mg, +2, { { 3.062918, 4.135106, 0.853742, 1.036792, 0.852520, 0.058851 }, { 2.015803, 4.417941, 0.065307, 9.669710, 0.187818, 0.000000 } } },
{ Al, +3, { { 4.132015, 0.912049, 1.102425, 0.614876, 3.219136, 0.019397 }, { 3.528641, 7.378344, 0.133708, 0.039065, 1.644728, 0.000000 } } },
{ Si, atom_type_traits::kWKSFVal,
{ { 2.879033, 3.072960, 1.515981, 1.390030, 4.995051, 0.146030 },
{ 1.239713, 38.706276, 0.081481, 93.616333, 2.770293, 0.000000 } } },
{ Si, +4, { { 3.676722, 3.828496, 1.258033, 0.419024, 0.720421, 0.097266 }, { 1.446851, 3.013144, 0.064397, 0.206254, 5.970222, 0.000000 } } },
{ Cl, -1, { { 1.061802, 7.139886, 6.524271, 2.355626, 35.829403, -34.916603 }, { 0.144727, 1.171795, 19.467655, 60.320301, 0.000436, 0.000000 } } },
{ K, +1, { { -17.609339, 1.494873, 7.150305, 10.899569, 15.808228, 0.257164 }, { 18.840979, 0.053453, 0.812940, 22.264105, 14.351593, 0.000000 } } },
{ Ca, +2, { { 8.501441, 12.880483, 9.765095, 7.156669, 0.711160, -21.013187 }, { 10.525848, -0.004033, 0.010692, 0.684443, 27.231771, 0.000000 } } },
{ Sc, +3, { { 7.104348, 1.511488, -53.669773, 38.404816, 24.532240, 0.118642 }, { 0.601957, 0.033386, 12.572138, 10.859736, 14.125230, 0.000000 } } },
{ Ti, +2, { { 7.040119, 1.496285, 9.657304, 0.006534, 1.649561, 0.150362 }, { 0.537072, 0.031914, 8.009958, 201.800293, 24.039482, 0.000000 } } },
{ Ti, +3, { { 36.587933, 7.230255, -9.086077, 2.084594, 17.294008, -35.111282 }, { 0.000681, 0.522262, 5.262317, 15.881716, 6.149805, 0.000000 } } },
{ Ti, +4, { { 45.355537, 7.092900, 7.483858, -43.498817, 1.678915, -0.110628 }, { 9.252186, 0.523046, 13.082852, 10.193876, 0.023064, 0.000000 } } },
{ V, +2, { { 7.754356, 2.064100, 2.576998, 2.011404, 7.126177, -0.533379 }, { 7.066315, 0.014993, 7.066308, 22.055786, 0.467568, 0.000000 } } },
{ V, +3, { { 9.958480, 1.596350, 1.483442, -10.846044, 17.332867, 0.474921 }, { 6.763041, 0.056895, 17.750029, 0.328826, 0.388013, 0.000000 } } },
{ V, +5, { { 15.575018, 8.448095, 1.612040, -9.721855, 1.534029, 0.552676 }, { 0.682708, 5.566640, 10.527077, 0.907961, 0.066667, 0.000000 } } },
{ Cr, +2, { { 10.598877, 1.565858, 2.728280, 0.098064, 6.959321, 0.049870 }, { 6.151846, 0.023519, 17.432816, 54.002388, 0.426301, 0.000000 } } },
{ Cr, +3, { { 7.989310, 1.765079, 2.627125, 1.829380, 6.980908, -0.192123 }, { 6.068867, 0.018342, 6.068887, 16.309284, 0.420864, 0.000000 } } },
{ Mn, +2, { { 11.287712, 26.042414, 3.058096, 0.090258, 7.088306, -24.566132 }, { 5.506225, 0.000774, 16.158575, 54.766354, 0.375580, 0.000000 } } },
{ Mn, +3, { { 6.926972, 2.081342, 11.128379, 2.375107, -0.419287, -0.093713 }, { 0.378315, 0.015054, 5.379957, 14.429586, 0.004939, 0.000000 } } },
{ Mn, +4, { { 12.409131, 7.466993, 1.809947, -12.138477, 10.780248, 0.672146 }, { 0.300400, 0.112814, 12.520756, 0.168653, 5.173237, 0.000000 } } },
{ Fe, +2, { { 11.776765, 11.165097, 3.533495, 0.165345, 7.036932, -9.676919 }, { 4.912232, 0.001748, 14.166556, 42.381958, 0.341324, 0.000000 } } },
{ Fe, +3, { { 9.721638, 63.403847, 2.141347, 2.629274, 7.033846, -61.930725 }, { 4.869297, 0.000293, 4.867602, 13.539076, 0.338520, 0.000000 } } },
{ Co, +2, { { 6.993840, 26.285812, 12.254289, 0.246114, 4.017407, -24.796852 }, { 0.310779, 0.000684, 4.400528, 35.741447, 12.536393, 0.000000 } } },
{ Co, +3, { { 6.861739, 2.678570, 12.281889, 3.501741, -0.179384, -1.147345 }, { 0.309794, 0.008142, 4.331703, 11.914167, 11.914167, 0.000000 } } },
{ Ni, +2, { { 12.519017, 37.832058, 4.387257, 0.661552, 6.949072, -36.344471 }, { 3.933053, 0.000442, 10.449184, 23.860998, 0.283723, 0.000000 } } },
{ Ni, +3, { { 13.579366, 1.902844, 12.859268, 3.811005, -6.838595, -0.317618 }, { 0.313140, 0.012621, 3.906407, 10.894311, 0.344379, 0.000000 } } },
{ Cu, +1, { { 12.960763, 16.342150, 1.110102, 5.520682, 6.915452, -14.849320 }, { 3.576010, 0.000975, 29.523218, 10.114283, 0.261326, 0.000000 } } },
{ Cu, +2, { { 11.895569, 16.344978, 5.799817, 1.048804, 6.789088, -14.878383 }, { 3.378519, 0.000924, 8.133653, 20.526524, 0.254741, 0.000000 } } },
{ Zn, +2, { { 13.340772, 10.428857, 5.544489, 0.762295, 6.869172, -8.945248 }, { 3.215913, 0.001413, 8.542680, 21.891756, 0.239215, 0.000000 } } },
{ Ga, +3, { { 13.123875, 35.288189, 6.126979, 0.611551, 6.724807, -33.875122 }, { 2.809960, 0.000323, 6.831534, 16.784311, 0.212002, 0.000000 } } },
{ Ge, +4, { { 6.876636, 6.779091, 9.969591, 3.135857, 0.152389, 1.086542 }, { 2.025174, 0.176650, 3.573822, 7.685848, 16.677574, 0.000000 } } },
{ Br, -1, { { 17.714310, 6.466926, 6.947385, 4.402674, -0.697279, 1.152674 }, { 2.122554, 19.050768, 0.152708, 58.690361, 58.690372, 0.000000 } } },
{ Rb, +1, { { 17.684320, 7.761588, 6.680874, 2.668883, 0.070974, 1.133263 }, { 1.710209, 14.919863, 0.128542, 31.654478, 0.128543, 0.000000 } } },
{ Sr, +2, { { 17.694973, 1.275762, 6.154252, 9.234786, 0.515995, 1.125309 }, { 1.550888, 30.133041, 0.118774, 13.821799, 0.118774, 0.000000 } } },
{ Y, +3, { { 46.660366, 10.369686, 4.623042, -62.170834, 17.471146, 19.023842 }, { -0.019971, 13.180257, 0.176398, -0.016727, 1.467348, 0.000000 } } },
{ Zr, +4, { { 6.802956, 17.699253, 10.650647, -0.248108, 0.250338, 0.827902 }, { 0.096228, 1.296127, 11.240715, -0.219259, -0.219021, 0.000000 } } },
{ Nb, +3, { { 17.714323, 1.675213, 7.483963, 8.322464, 11.143573, -8.339573 }, { 1.172419, 30.102791, 0.080255, -0.002983, 10.456687, 0.000000 } } },
{ Nb, +5, { { 17.580206, 7.633277, 10.793497, 0.180884, 67.837921, -68.024780 }, { 1.165852, 0.078558, 9.507652, 31.621656, -0.000438, 0.000000 } } },
{ Mo, +3, { { 7.447050, 17.778122, 11.886068, 1.997905, 1.789626, -1.898764 }, { 0.072000, 1.073145, 9.834720, 28.221746, -0.011674, 0.000000 } } },
{ Mo, +5, { { 7.929879, 17.667669, 11.515987, 0.500402, 77.444084, -78.056595 }, { 0.068856, 1.068064, 9.046229, 26.558945, -0.000473, 0.000000 } } },
{ Mo, +6, { { 34.757683, 9.653037, 6.584769, -18.628115, 2.490594, 1.141916 }, { 1.301770, 7.123843, 0.094097, 1.617443, 12.335434, 0.000000 } } },
{ Ru, +3, { { 17.894758, 13.579529, 10.729251, 2.474095, 48.227997, -51.905243 }, { 0.902827, 8.740579, 0.045125, 24.764954, -0.001699, 0.000000 } } },
{ Ru, +4, { { 17.845776, 13.455084, 10.229087, 1.653524, 14.059795, -17.241762 }, { 0.901070, 8.482392, 0.045972, 23.015272, -0.004889, 0.000000 } } },
{ Rh, +3, { { 17.758621, 14.569813, 5.298320, 2.533579, 0.879753, 0.960843 }, { 0.841779, 8.319533, 0.069050, 23.709131, 0.069050, 0.000000 } } },
{ Rh, +4, { { 17.716188, 14.446654, 5.185801, 1.703448, 0.989992, 0.959941 }, { 0.840572, 8.100647, 0.068995, 22.357307, 0.068995, 0.000000 } } },
{ Pd, +2, { { 6.122282, 15.651012, 3.513508, 9.060790, 8.771199, 0.879336 }, { 0.062424, 8.018296, 24.784275, 0.776457, 0.776457, 0.000000 } } },
{ Pd, +4, { { 6.152421, -96.069023, 31.622141, 81.578255, 17.801403, 0.915874 }, { 0.063951, 11.090354, 13.466152, 9.758302, 0.783014, 0.000000 } } },
{ Ag, +1, { { 6.091192, 4.019526, 16.948174, 4.258638, 13.889437, 0.785127 }, { 0.056305, 0.719340, 7.758938, 27.368349, 0.719340, 0.000000 } } },
{ Ag, +2, { { 6.401808, 48.699802, 4.799859, -32.332523, 16.356710, 1.068247 }, { 0.068167, 0.942270, 20.639496, 1.100365, 6.883131, 0.000000 } } },
{ Cd, +2, { { 6.093711, 43.909691, 17.041306, -39.675117, 17.958918, 0.664795 }, { 0.050624, 8.654143, 15.621396, 11.082067, 0.667591, 0.000000 } } },
{ In, +3, { { 6.206277, 18.497746, 3.078131, 10.524613, 7.401234, 0.293677 }, { 0.041357, 6.605563, 18.792250, 0.608082, 0.608082, 0.000000 } } },
{ Sn, +2, { { 6.353672, 4.770377, 14.672025, 4.235959, 18.002131, -0.042519 }, { 0.034720, 6.167891, 6.167879, 29.006456, 0.561774, 0.000000 } } },
{ Sn, +4, { { 15.445732, 6.420892, 4.562980, 1.713385, 18.033537, -0.172219 }, { 6.280898, 0.033144, 6.280899, 17.983601, 0.557980, 0.000000 } } },
{ Sb, +3, { { 10.189171, 57.461918, 19.356573, 4.862206, -45.394096, 1.516108 }, { 0.089485, 0.375256, 5.357987, 22.153736, 0.297768, 0.000000 } } },
{ Sb, +5, { { 17.920622, 6.647932, 12.724075, 1.555545, 7.600591, -0.445371 }, { 0.522315, 0.029487, 5.718210, 16.433775, 5.718204, 0.000000 } } },
{ I, -1, { { 20.010330, 17.835524, 8.104130, 2.231118, 9.158548, -3.341004 }, { 4.565931, 0.444266, 32.430672, 95.149040, 0.014906, 0.000000 } } },
{ Cs, +1, { { 19.939056, 24.967621, 10.375884, 0.454243, 17.660248, -19.394306 }, { 3.770511, 0.004040, 25.311275, 76.537766, 0.384730, 0.000000 } } },
{ Ba, +2, { { 19.750200, 17.513683, 10.884892, 0.321585, 65.149834, -59.618172 }, { 3.430748, 0.361590, 21.358307, 70.309402, 0.001418, 0.000000 } } },
{ La, +3, { { 19.688887, 17.345703, 11.356296, 0.099418, 82.358124, -76.846909 }, { 3.146211, 0.339586, 18.753832, 90.345459, 0.001072, 0.000000 } } },
{ Ce, +3, { { 26.593231, 85.866432, -6.677695, 12.111847, 17.401903, -80.313423 }, { 3.280381, 0.001012, 4.313575, 17.868504, 0.326962, 0.000000 } } },
{ Ce, +4, { { 17.457533, 25.659941, 11.691037, 19.695251, -16.994749, -3.515096 }, { 0.311812, -0.003793, 16.568687, 2.886395, -0.008931, 0.000000 } } },
{ Pr, +3, { { 20.879841, 36.035797, 12.135341, 0.283103, 17.167803, -30.500784 }, { 2.870897, 0.002364, 16.615236, 53.909359, 0.306993, 0.000000 } } },
{ Pr, +4, { { 17.496082, 21.538509, 20.403114, 12.062211, -7.492043, -9.016722 }, { 0.294457, -0.002742, 2.772886, 15.804613, -0.013556, 0.000000 } } },
{ Nd, +3, { { 17.120077, 56.038139, 21.468307, 10.000671, 2.905866, -50.541992 }, { 0.291295, 0.001421, 2.743681, 14.581367, 22.485098, 0.000000 } } },
{ Pm, +3, { { 22.221066, 17.068142, 12.805423, 0.435687, 52.238770, -46.767181 }, { 2.635767, 0.277039, 14.927315, 45.768017, 0.001455, 0.000000 } } },
{ Sm, +3, { { 15.618565, 19.538092, 13.398946, -4.358811, 24.490461, -9.714854 }, { 0.006001, 0.306379, 14.979594, 0.748825, 2.454492, 0.000000 } } },
{ Eu, +2, { { 23.899035, 31.657497, 12.955752, 1.700576, 16.992199, -26.204315 }, { 2.467332, 0.002230, 13.625002, 35.089481, 0.253136, 0.000000 } } },
{ Eu, +3, { { 17.758327, 33.498665, 24.067188, 13.436883, -9.019134, -19.768026 }, { 0.244474, -0.003901, 2.487526, 14.568011, -0.015628, 0.000000 } } },
{ Gd, +3, { { 24.344999, 16.945311, 13.866931, 0.481674, 93.506378, -88.147179 }, { 2.333971, 0.239215, 12.982995, 43.876347, 0.000673, 0.000000 } } },
{ Tb, +3, { { 24.878252, 16.856016, 13.663937, 1.279671, 39.271294, -33.950317 }, { 2.223301, 0.227290, 11.812528, 29.910065, 0.001527, 0.000000 } } },
{ Dy, +3, { { 16.864344, 90.383461, 13.675473, 1.687078, 25.540651, -85.150650 }, { 0.216275, 0.000593, 11.121207, 26.250975, 2.135930, 0.000000 } } },
{ Ho, +3, { { 16.837524, 63.221336, 13.703766, 2.061602, 26.202621, -58.026505 }, { 0.206873, 0.000796, 10.500283, 24.031883, 2.055060, 0.000000 } } },
{ Er, +3, { { 16.810127, 22.681061, 13.864114, 2.294506, 26.864477, -17.513460 }, { 0.198293, 0.002126, 9.973341, 22.836388, 1.979442, 0.000000 } } },
{ Tm, +3, { { 16.787500, 15.350905, 14.182357, 2.299111, 27.573771, -10.192087 }, { 0.190852, 0.003036, 9.602934, 22.526880, 1.912862, 0.000000 } } },
{ Yb, +2, { { 28.443794, 16.849527, 14.165081, 3.445311, 28.308853, -23.214935 }, { 1.863896, 0.183811, 9.225469, 23.691355, 0.001463, 0.000000 } } },
{ Yb, +3, { { 28.191629, 16.828087, 14.167848, 2.744962, 23.171774, -18.103676 }, { 1.842889, 0.182788, 9.045957, 20.799847, 0.001759, 0.000000 } } },
{ Lu, +3, { { 28.828693, 16.823227, 14.247617, 3.079559, 25.647667, -20.626528 }, { 1.776641, 0.175560, 8.575531, 19.693701, 0.001453, 0.000000 } } },
{ Hf, +4, { { 29.267378, 16.792543, 14.785310, 2.184128, 23.791996, -18.820383 }, { 1.697911, 0.168313, 8.190025, 18.277578, 0.001431, 0.000000 } } },
{ Ta, +5, { { 29.539469, 16.741854, 15.182070, 1.642916, 16.437447, -11.542459 }, { 1.612934, 0.160460, 7.654408, 17.070732, 0.001858, 0.000000 } } },
{ W, +6, { { 29.729357, 17.247808, 15.184488, 1.154652, 0.739335, 3.945157 }, { 1.501648, 0.140803, 6.880573, 14.299601, 14.299618, 0.000000 } } },
{ Os, +4, { { 17.113485, 15.792370, 23.342392, 4.090271, 7.671292, 3.988390 }, { 0.131850, 7.288542, 1.389307, 19.629425, 1.389307, 0.000000 } } },
{ Ir, +3, { { 31.537575, 16.363338, 15.597141, 5.051404, 1.436935, 4.009459 }, { 1.334144, 7.451918, 0.127514, 21.705648, 0.127515, 0.000000 } } },
{ Ir, +4, { { 30.391249, 16.146996, 17.019068, 4.458904, 0.975372, 4.006865 }, { 1.328519, 7.181766, 0.127337, 19.060146, 1.328519, 0.000000 } } },
{ Pt, +2, { { 31.986849, 17.249048, 15.269374, 5.760234, 1.694079, 4.032512 }, { 1.281143, 7.625512, 0.123571, 24.190826, 0.123571, 0.000000 } } },
{ Pt, +4, { { 41.932713, 16.339224, 17.653894, 6.012420, -12.036877, 4.094551 }, { 1.111409, 6.466086, 0.128917, 16.954155, 0.778721, 0.000000 } } },
{ Au, +1, { { 32.124306, 16.716476, 16.814100, 7.311565, 0.993064, 4.040792 }, { 1.216073, 7.165378, 0.118715, 20.442486, 53.095985, 0.000000 } } },
{ Au, +3, { { 31.704271, 17.545767, 16.819551, 5.522640, 0.361725, 4.042679 }, { 1.215561, 7.220506, 0.118812, 20.050970, 1.215562, 0.000000 } } },
{ Hg, +1, { { 28.866837, 19.277540, 16.776051, 6.281459, 3.710289, 4.068430 }, { 1.173967, 7.583842, 0.115351, 29.055994, 1.173968, 0.000000 } } },
{ Hg, +2, { { 32.411079, 18.690371, 16.711773, 9.974835, -3.847611, 4.052869 }, { 1.162980, 7.329806, 0.114518, 22.009489, 22.009493, 0.000000 } } },
{ Tl, +1, { { 32.295044, 16.570049, 17.991013, 1.535355, 7.554591, 4.054030 }, { 1.101544, 0.110020, 6.528559, 52.495068, 20.338634, 0.000000 } } },
{ Tl, +3, { { 32.525639, 19.139185, 17.100321, 5.891115, 12.599463, -9.256075 }, { 1.094966, 6.900992, 0.103667, 18.489614, -0.001401, 0.000000 } } },
{ Pb, +2, { { 27.392647, 16.496822, 19.984501, 6.813923, 5.233910, 4.065623 }, { 1.058874, 0.106305, 6.708123, 24.395554, 1.058874, 0.000000 } } },
{ Pb, +4, { { 32.505657, 20.014240, 14.645661, 5.029499, 1.760138, 4.044678 }, { 1.047035, 6.670321, 0.105279, 16.525040, 0.105279, 0.000000 } } },
{ Bi, +3, { { 32.461437, 19.438683, 16.302486, 7.322662, 0.431704, 4.043703 }, { 0.997930, 6.038867, 0.101338, 18.371586, 46.361046, 0.000000 } } },
{ Bi, +5, { { 16.734028, 20.580494, 9.452623, 61.155834, -34.041023, 4.113663 }, { 0.105076, 4.773282, 11.762162, 1.211775, 1.619408, 0.000000 } } },
{ Ra, +2, { { 4.986228, 32.474945, 21.947443, 11.800013, 10.807292, 3.956572 }, { 0.082597, 0.791468, 4.608034, 24.792431, 0.082597, 0.000000 } } },
{ Ac, +3, { { 15.584983, 32.022125, 21.456327, 0.757593, 12.341252, 3.838984 }, { 0.077438, 0.739963, 4.040735, 47.525002, 19.406845, 0.000000 } } },
{ Th, +4, { { 15.515445, 32.090691, 13.996399, 12.918157, 7.635514, 3.831122 }, { 0.074499, 0.711663, 3.871044, 18.596891, 3.871044, 0.000000 } } },
{ U, +3, { { 15.360309, 32.395657, 21.961290, 1.325894, 14.251453, 3.706622 }, { 0.067815, 0.654643, 3.643409, 39.604965, 16.330570, 0.000000 } } },
{ U, +4, { { 15.355091, 32.235306, 0.557745, 14.396367, 21.751173, 3.705863 }, { 0.067789, 0.652613, 42.354237, 15.908239, 3.553231, 0.000000 } } },
{ U, +6, { { 15.333844, 31.770849, 21.274414, 13.872636, 0.048519, 3.700591 }, { 0.067644, 0.646384, 3.317894, 14.650250, 75.339699, 0.000000 } } },
{ Np, +3, { { 15.378152, 32.572132, 22.206125, 1.413295, 14.828381, 3.603370 }, { 0.064613, 0.631420, 3.561936, 37.875511, 15.546129, 0.000000 } } },
{ Np, +4, { { 15.373926, 32.423019, 21.969994, 0.662078, 14.969350, 3.603039 }, { 0.064597, 0.629658, 3.476389, 39.438942, 15.135764, 0.000000 } } },
{ Np, +6, { { 15.359986, 31.992825, 21.412458, 0.066574, 14.568174, 3.600942 }, { 0.064528, 0.624505, 3.253441, 67.658318, 13.980832, 0.000000 } } },
{ Pu, +3, { { 15.356004, 32.769127, 22.680210, 1.351055, 15.416232, 3.428895 }, { 0.060590, 0.604663, 3.491509, 37.260635, 14.981921, 0.000000 } } },
{ Pu, +4, { { 15.416219, 32.610569, 22.256662, 0.719495, 15.518152, 3.480408 }, { 0.061456, 0.607938, 3.411848, 37.628792, 14.464360, 0.000000 } } },
{ Pu, +6, { { 15.436506, 32.289719, 14.726737, 15.012391, 7.024677, 3.502325 }, { 0.061815, 0.606541, 3.245363, 13.616438, 3.245364, 0.000000 } } }
};
SFDataArrayElement kELSFData[] = {
{ H, 0, { { 0.034900, 0.120100, 0.197000, 0.057300, 0.119500 }, { 0.534700, 3.586700, 12.347100, 18.952499, 38.626900 } } },
{ He, 0, { { 0.031700, 0.083800, 0.152600, 0.133400, 0.016400 }, { 0.250700, 1.475100, 4.493800, 12.664600, 31.165300 } } },
{ Li, 0, { { 0.075000, 0.224900, 0.554800, 1.495400, 0.935400 }, { 0.386400, 2.938300, 15.382900, 53.554501, 138.733704 } } },
{ Be, 0, { { 0.078000, 0.221000, 0.674000, 1.386700, 0.692500 }, { 0.313100, 2.238100, 10.151700, 30.906099, 78.327301 } } },
{ B, 0, { { 0.090900, 0.255100, 0.773800, 1.213600, 0.460600 }, { 0.299500, 2.115500, 8.381600, 24.129200, 63.131401 } } },
{ C, 0, { { 0.089300, 0.256300, 0.757000, 1.048700, 0.357500 }, { 0.246500, 1.710000, 6.409400, 18.611300, 50.252300 } } },
{ N, 0, { { 0.102200, 0.321900, 0.798200, 0.819700, 0.171500 }, { 0.245100, 1.748100, 6.192500, 17.389400, 48.143101 } } },
{ O, 0, { { 0.097400, 0.292100, 0.691000, 0.699000, 0.203900 }, { 0.206700, 1.381500, 4.694300, 12.710500, 32.472599 } } },
{ F, 0, { { 0.108300, 0.317500, 0.648700, 0.584600, 0.142100 }, { 0.205700, 1.343900, 4.278800, 11.393200, 28.788099 } } },
{ Ne, 0, { { 0.126900, 0.353500, 0.558200, 0.467400, 0.146000 }, { 0.220000, 1.377900, 4.020300, 9.493400, 23.127800 } } },
{ Na, 0, { { 0.214200, 0.685300, 0.769200, 1.658900, 1.448200 }, { 0.333400, 2.344600, 10.083000, 48.303699, 138.270004 } } },
{ Mg, 0, { { 0.231400, 0.686600, 0.967700, 2.188200, 1.133900 }, { 0.327800, 2.272000, 10.924100, 39.289799, 101.974800 } } },
{ Al, 0, { { 0.239000, 0.657300, 1.201100, 2.558600, 1.231200 }, { 0.313800, 2.106300, 10.416300, 34.455200, 98.534401 } } },
{ Si, 0, { { 0.251900, 0.637200, 1.379500, 2.508200, 1.050000 }, { 0.307500, 2.017400, 9.674600, 29.374399, 80.473198 } } },
{ P, 0, { { 0.254800, 0.610600, 1.454100, 2.320400, 0.847700 }, { 0.290800, 1.874000, 8.517600, 24.343399, 63.299599 } } },
{ S, 0, { { 0.249700, 0.562800, 1.389900, 2.186500, 0.771500 }, { 0.268100, 1.671100, 7.026700, 19.537701, 50.388802 } } },
{ Cl, 0, { { 0.244300, 0.539700, 1.391900, 2.019700, 0.662100 }, { 0.246800, 1.524200, 6.153700, 16.668699, 42.308601 } } },
{ Ar, 0, { { 0.238500, 0.501700, 1.342800, 1.889900, 0.607900 }, { 0.228900, 1.369400, 5.256100, 14.092800, 35.536098 } } },
{ K, 0, { { 0.411500, 1.403100, 2.278400, 2.674200, 2.216200 }, { 0.370300, 3.387400, 13.102900, 68.959198, 194.432907 } } },
{ Ca, 0, { { 0.405400, 1.388000, 2.160200, 3.753200, 2.206300 }, { 0.349900, 3.099100, 11.960800, 53.935299, 142.389206 } } },
{ Sc, 0, { { 0.378700, 1.218100, 2.059400, 3.261800, 2.387000 }, { 0.313300, 2.585600, 9.581300, 41.768799, 116.728203 } } },
{ Ti, 0, { { 0.382500, 1.259800, 2.000800, 3.061700, 2.069400 }, { 0.304000, 2.486300, 9.278300, 39.075100, 109.458298 } } },
{ V, 0, { { 0.387600, 1.275000, 1.910900, 2.831400, 1.897900 }, { 0.296700, 2.378000, 8.798100, 35.952801, 101.720100 } } },
{ Cr, 0, { { 0.404600, 1.369600, 1.894100, 2.080000, 1.219600 }, { 0.298600, 2.395800, 9.140600, 37.470100, 113.712097 } } },
{ Mn, 0, { { 0.379600, 1.209400, 1.781500, 2.542000, 1.593700 }, { 0.269900, 2.045500, 7.472600, 31.060400, 91.562202 } } },
{ Fe, 0, { { 0.394600, 1.272500, 1.703100, 2.314000, 1.479500 }, { 0.271700, 2.044300, 7.600700, 29.971399, 86.226501 } } },
{ Co, 0, { { 0.411800, 1.316100, 1.649300, 2.193000, 1.283000 }, { 0.274200, 2.037200, 7.720500, 29.968000, 84.938301 } } },
{ Ni, 0, { { 0.386000, 1.176500, 1.545100, 2.073000, 1.381400 }, { 0.247800, 1.766000, 6.310700, 25.220400, 74.314598 } } },
{ Cu, 0, { { 0.431400, 1.320800, 1.523600, 1.467100, 0.856200 }, { 0.269400, 1.922300, 7.347400, 28.989201, 90.624603 } } },
{ Zn, 0, { { 0.428800, 1.264600, 1.447200, 1.829400, 1.093400 }, { 0.259300, 1.799800, 6.750000, 25.586000, 73.528397 } } },
{ Ga, 0, { { 0.481800, 1.403200, 1.656100, 2.460500, 1.105400 }, { 0.282500, 1.978500, 8.754600, 32.523800, 98.552299 } } },
{ Ge, 0, { { 0.465500, 1.301400, 1.608800, 2.699800, 1.300300 }, { 0.264700, 1.792600, 7.607100, 26.554100, 77.523804 } } },
{ As, 0, { { 0.451700, 1.222900, 1.585200, 2.795800, 1.263800 }, { 0.249300, 1.643600, 6.815400, 22.368099, 62.039001 } } },
{ Se, 0, { { 0.447700, 1.167800, 1.584300, 2.808700, 1.195600 }, { 0.240500, 1.544200, 6.323100, 19.461000, 52.023300 } } },
{ Br, 0, { { 0.479800, 1.194800, 1.869500, 2.695300, 0.820300 }, { 0.250400, 1.596300, 6.965300, 19.849199, 50.323299 } } },
{ Kr, 0, { { 0.454600, 1.099300, 1.769600, 2.706800, 0.867200 }, { 0.230900, 1.427900, 5.944900, 16.675200, 42.224300 } } },
{ Rb, 0, { { 1.016000, 2.852800, 3.546600, -7.780400, 12.114800 }, { 0.485300, 5.092500, 25.785101, 130.451508, 138.677505 } } },
{ Sr, 0, { { 0.670300, 1.492600, 3.336800, 4.460000, 3.150100 }, { 0.319000, 2.228700, 10.350400, 52.329102, 151.221603 } } },
{ Y, 0, { { 0.689400, 1.547400, 3.245000, 4.212600, 2.976400 }, { 0.318900, 2.290400, 10.006200, 44.077099, 125.012001 } } },
{ Zr, 0, { { 0.671900, 1.468400, 3.166800, 3.955700, 2.892000 }, { 0.303600, 2.124900, 8.923600, 36.845798, 108.204903 } } },
{ Nb, 0, { { 0.612300, 1.267700, 3.034800, 3.384100, 2.368300 }, { 0.270900, 1.768300, 7.248900, 27.946501, 98.562401 } } },
{ Mo, 0, { { 0.677300, 1.479800, 3.178800, 3.082400, 1.838400 }, { 0.292000, 2.060600, 8.112900, 30.533600, 100.065804 } } },
{ Tc, 0, { { 0.708200, 1.639200, 3.199300, 3.432700, 1.871100 }, { 0.297600, 2.210600, 8.524600, 33.145599, 96.637703 } } },
{ Ru, 0, { { 0.673500, 1.493400, 3.096600, 2.725400, 1.559700 }, { 0.277300, 1.971600, 7.324900, 26.689100, 90.558098 } } },
{ Rh, 0, { { 0.641300, 1.369000, 2.985400, 2.695200, 1.543300 }, { 0.258000, 1.772100, 6.385400, 23.254900, 85.151703 } } },
{ Pd, 0, { { 0.590400, 1.177500, 2.651900, 2.287500, 0.868900 }, { 0.232400, 1.501900, 5.159100, 15.542800, 46.821301 } } },
{ Ag, 0, { { 0.637700, 1.379000, 2.829400, 2.363100, 1.455300 }, { 0.246600, 1.697400, 5.765600, 20.094299, 76.737198 } } },
{ Cd, 0, { { 0.636400, 1.424700, 2.780200, 2.597300, 1.788600 }, { 0.240700, 1.682300, 5.658800, 20.721901, 69.110901 } } },
{ In, 0, { { 0.676800, 1.658900, 2.774000, 3.183500, 2.132600 }, { 0.252200, 1.854500, 6.293600, 25.145700, 84.544800 } } },
{ Sn, 0, { { 0.722400, 1.961000, 2.716100, 3.560300, 1.897200 }, { 0.265100, 2.060400, 7.301100, 27.549299, 81.334900 } } },
{ Sb, 0, { { 0.710600, 1.924700, 2.614900, 3.832200, 1.889900 }, { 0.256200, 1.964600, 6.885200, 24.764799, 68.916801 } } },
{ Te, 0, { { 0.694700, 1.869000, 2.535600, 4.001300, 1.895500 }, { 0.245900, 1.854200, 6.441100, 22.173000, 59.220600 } } },
{ I, 0, { { 0.704700, 1.948400, 2.594000, 4.152600, 1.505700 }, { 0.245500, 1.863800, 6.763900, 21.800699, 56.439499 } } },
{ Xe, 0, { { 0.673700, 1.790800, 2.412900, 4.210000, 1.705800 }, { 0.230500, 1.689000, 5.821800, 18.392799, 47.249599 } } },
{ Cs, 0, { { 1.270400, 3.801800, 5.661800, 0.920500, 4.810500 }, { 0.435600, 4.205800, 23.434200, 136.778305, 171.756104 } } },
{ Ba, 0, { { 0.904900, 2.607600, 4.849800, 5.160300, 4.738800 }, { 0.306600, 2.436300, 12.182100, 54.613499, 161.997803 } } },
{ La, 0, { { 0.840500, 2.386300, 4.613900, 5.151400, 4.794900 }, { 0.279100, 2.141000, 10.340000, 41.914799, 132.020401 } } },
{ Ce, 0, { { 0.855100, 2.391500, 4.577200, 5.027800, 4.511800 }, { 0.280500, 2.120000, 10.180800, 42.063301, 130.989304 } } },
{ Pr, 0, { { 0.909600, 2.531300, 4.526600, 4.637600, 4.369000 }, { 0.293900, 2.247100, 10.826600, 48.884201, 147.602005 } } },
{ Nd, 0, { { 0.880700, 2.418300, 4.444800, 4.685800, 4.172500 }, { 0.280200, 2.083600, 10.035700, 47.450600, 146.997604 } } },
{ Pm, 0, { { 0.947100, 2.546300, 4.352300, 4.478900, 3.908000 }, { 0.297700, 2.227600, 10.576200, 49.361900, 145.358002 } } },
{ Sm, 0, { { 0.969900, 2.583700, 4.277800, 4.457500, 3.598500 }, { 0.300300, 2.244700, 10.648700, 50.799400, 146.417892 } } },
{ Eu, 0, { { 0.869400, 2.241300, 3.919600, 3.969400, 4.549800 }, { 0.265300, 1.859000, 8.399800, 36.739700, 125.708900 } } },
{ Gd, 0, { { 0.967300, 2.470200, 4.114800, 4.497200, 3.209900 }, { 0.290900, 2.101400, 9.706700, 43.426998, 125.947403 } } },
{ Tb, 0, { { 0.932500, 2.367300, 3.879100, 3.967400, 3.799600 }, { 0.276100, 1.951100, 8.929600, 41.593700, 131.012207 } } },
{ Dy, 0, { { 0.950500, 2.370500, 3.821800, 4.047100, 3.445100 }, { 0.277300, 1.946900, 8.886200, 43.093800, 133.139603 } } },
{ Ho, 0, { { 0.924800, 2.242800, 3.618200, 3.791000, 3.791200 }, { 0.266000, 1.818300, 7.965500, 33.112900, 101.813904 } } },
{ Er, 0, { { 1.037300, 2.482400, 3.655800, 3.892500, 3.005600 }, { 0.294400, 2.079700, 9.415600, 45.805599, 132.772003 } } },
{ Tm, 0, { { 1.007500, 2.378700, 3.544000, 3.693200, 3.175900 }, { 0.281600, 1.948600, 8.716200, 41.841999, 125.031998 } } },
{ Yb, 0, { { 1.034700, 2.391100, 3.461900, 3.655600, 3.005200 }, { 0.285500, 1.967900, 8.761900, 42.330399, 125.649902 } } },
{ Lu, 0, { { 0.992700, 2.243600, 3.355400, 3.781300, 3.099400 }, { 0.270100, 1.807300, 7.811200, 34.484901, 103.352600 } } },
{ Hf, 0, { { 1.029500, 2.291100, 3.411000, 3.949700, 2.492500 }, { 0.276100, 1.862500, 8.096100, 34.271198, 98.529503 } } },
{ Ta, 0, { { 1.019000, 2.229100, 3.409700, 3.925200, 2.267900 }, { 0.269400, 1.796200, 7.694400, 31.094200, 91.108902 } } },
{ W, 0, { { 0.985300, 2.116700, 3.357000, 3.798100, 2.279800 }, { 0.256900, 1.674500, 7.009800, 26.923401, 81.390999 } } },
{ Re, 0, { { 0.991400, 2.085800, 3.453100, 3.881200, 1.852600 }, { 0.254800, 1.651800, 6.884500, 26.723400, 81.721497 } } },
{ Os, 0, { { 0.981300, 2.032200, 3.366500, 3.623500, 1.974100 }, { 0.248700, 1.597300, 6.473700, 23.281700, 70.925400 } } },
{ Ir, 0, { { 1.019400, 2.064500, 3.442500, 3.491400, 1.697600 }, { 0.255400, 1.647500, 6.596600, 23.226900, 70.027199 } } },
{ Pt, 0, { { 0.914800, 1.809600, 3.213400, 3.295300, 1.575400 }, { 0.226300, 1.381300, 5.324300, 17.598700, 60.017101 } } },
{ Au, 0, { { 0.967400, 1.891600, 3.399300, 3.052400, 1.260700 }, { 0.235800, 1.471200, 5.675800, 18.711901, 61.528599 } } },
{ Hg, 0, { { 1.003300, 1.946900, 3.439600, 3.154800, 1.418000 }, { 0.241300, 1.529800, 5.800900, 19.452000, 60.575298 } } },
{ Tl, 0, { { 1.068900, 2.103800, 3.603900, 3.492700, 1.828300 }, { 0.254000, 1.671500, 6.350900, 23.153099, 78.709900 } } },
{ Pb, 0, { { 1.089100, 2.186700, 3.616000, 3.803100, 1.899400 }, { 0.255200, 1.717400, 6.513100, 23.917000, 74.703903 } } },
{ Bi, 0, { { 1.100700, 2.230600, 3.568900, 4.154900, 2.038200 }, { 0.254600, 1.735100, 6.494800, 23.646400, 70.377998 } } },
{ Po, 0, { { 1.156800, 2.435300, 3.645900, 4.406400, 1.717900 }, { 0.264800, 1.878600, 7.174900, 25.176600, 69.282097 } } },
{ At, 0, { { 1.090900, 2.197600, 3.383100, 4.670000, 2.127700 }, { 0.246600, 1.670700, 6.019700, 20.765699, 57.266300 } } },
{ Rn, 0, { { 1.075600, 2.163000, 3.317800, 4.885200, 2.048900 }, { 0.240200, 1.616900, 5.764400, 19.456800, 52.500900 } } },
{ Fr, 0, { { 1.428200, 3.508100, 5.676700, 4.196400, 3.894600 }, { 0.318300, 2.688900, 13.481600, 54.386600, 200.832108 } } },
{ Ra, 0, { { 1.312700, 3.124300, 5.298800, 5.389100, 5.413300 }, { 0.288700, 2.289700, 10.827600, 43.538898, 145.610901 } } },
{ Ac, 0, { { 1.312800, 3.102100, 5.338500, 5.961100, 4.756200 }, { 0.286100, 2.250900, 10.528700, 41.779598, 128.297302 } } },
{ Th, 0, { { 1.255300, 2.917800, 5.086200, 6.120600, 4.712200 }, { 0.270100, 2.063600, 9.305100, 34.597698, 107.919998 } } },
{ Pa, 0, { { 1.321800, 3.144400, 5.437100, 5.644400, 4.010700 }, { 0.282700, 2.225000, 10.245400, 41.116199, 124.444901 } } },
{ U, 0, { { 1.338200, 3.204300, 5.455800, 5.483900, 3.634200 }, { 0.283800, 2.245200, 10.251900, 41.725101, 124.902298 } } },
{ Np, 0, { { 1.519300, 4.005300, 6.532700, -0.140200, 6.748900 }, { 0.321300, 2.820600, 14.887800, 68.910301, 81.725700 } } },
{ Pu, 0, { { 1.351700, 3.293700, 5.321300, 4.646600, 3.571400 }, { 0.281300, 2.241800, 9.995200, 42.793900, 132.173904 } } },
{ Am, 0, { { 1.213500, 2.796200, 4.754500, 4.573100, 4.478600 }, { 0.248300, 1.843700, 7.542100, 29.384100, 112.457901 } } },
{ Cm, 0, { { 1.293700, 3.110000, 5.039300, 4.754600, 3.503100 }, { 0.263800, 2.034100, 8.710100, 35.299198, 109.497200 } } },
{ Bk, 0, { { 1.291500, 3.102300, 4.930900, 4.600900, 3.466100 }, { 0.261100, 2.002300, 8.437700, 34.155899, 105.891098 } } },
{ Cf, 0, { { 1.208900, 2.739100, 4.348200, 4.004700, 4.649700 }, { 0.242100, 1.748700, 6.726200, 23.215300, 80.310799 } } }
};
} // namespace data
// --------------------------------------------------------------------
// atom_type_traits
atom_type_traits::atom_type_traits(const std::string &symbol)
: m_info(nullptr)
{
for (auto &i : data::kKnownAtoms)
{
if (cif::iequals(i.symbol, symbol))
{
m_info = &i;
break;
}
}
if (symbol == "X")
m_info = &data::kKnownAtoms[0];
if (m_info == nullptr)
throw std::invalid_argument("Not a known element: " + symbol);
}
atom_type_traits::atom_type_traits(atom_type t)
{
if (t < H or t >= data::kKnownAtomsCount)
throw std::invalid_argument("atomType out of range");
m_info = &data::kKnownAtoms[t];
assert(m_info->type == t);
}
bool atom_type_traits::is_element(const std::string &symbol)
{
bool result = false;
for (auto &i : data::kKnownAtoms)
{
if (cif::iequals(i.symbol, symbol))
{
result = true;
break;
}
}
return result;
}
bool atom_type_traits::is_metal(const std::string &symbol)
{
bool result = false;
for (auto &i : data::kKnownAtoms)
{
if (cif::iequals(i.symbol, symbol))
{
result = i.metal;
break;
}
}
return result;
}
bool atom_type_traits::has_sf(int charge) const
{
auto type = m_info->type;
if (type == D)
type = H;
bool result = false;
for (auto &sf : data::kWKSFData)
{
if (sf.symbol == type and sf.charge == charge)
{
result = true;
break;
}
}
return result;
}
auto atom_type_traits::wksf(int charge) const -> const SFData &
{
auto type = m_info->type;
if (type == D)
type = H;
for (auto &sf : data::kWKSFData)
{
if (sf.symbol == type and sf.charge == charge)
return sf.sf;
}
if (charge != 0)
{
// Oops, not found. Fall back to zero charge and see if we can use that
if (VERBOSE > 0)
std::cerr << "No scattering factor found for " << name() << " with charge " << charge << " will try to fall back to zero charge...\n";
for (auto &sf : data::kWKSFData)
{
if (sf.symbol == type and sf.charge == 0)
return sf.sf;
}
}
throw std::out_of_range("No scattering factor found for " + name() + std::to_string(charge));
}
auto atom_type_traits::elsf() const -> const SFData &
{
auto type = m_info->type;
if (type == D)
type = H;
for (auto &sf : data::kELSFData)
{
if (sf.symbol == type)
return sf.sf;
}
throw std::invalid_argument("No scattering factor found for " + name());
}
// ionic radii
float atom_type_traits::crystal_ionic_radius(int charge) const
{
float result = kNA;
if (charge >= -3 and charge <= 8)
{
for (auto &r : data::kCrystalIonicRadii)
{
if (r.type != m_info->type)
continue;
result = r.radii[charge < 0 ? charge + 3 : charge + 2] / 100.0f;
break;
}
}
return result;
}
float atom_type_traits::effective_ionic_radius(int charge) const
{
float result = kNA;
if (charge >= -3 and charge <= 8)
{
for (auto &r : data::kEffectiveIonicRadii)
{
if (r.type != m_info->type)
continue;
result = r.radii[charge < 0 ? charge + 3 : charge + 2] / 100.0f;
break;
}
}
return result;
}
} // namespace cif
// NOLINTEND(modernize-use-std-numbers)

Some files were not shown because too many files have changed in this diff Show More