Compare commits

..

845 Commits

Author SHA1 Message Date
Maarten L. Hekkelman
d638d634ba Remove gxrio and replace it with stripped down version 2022-11-09 14:23:51 +01:00
Maarten L. Hekkelman
35196789e0 updated makefile, use system regex if good enough 2022-11-09 10:33:44 +01:00
Maarten L. Hekkelman
e907ce6c29 exclude regex from tar 2022-11-09 09:19:39 +01:00
Maarten L. Hekkelman
b80bc20d17 Use system installed boost headers for regex, when available 2022-11-09 08:54:56 +01:00
Maarten L. Hekkelman
3a87eaa435 include utility
exclude from all for gxrio
2022-11-08 15:46:03 +01:00
Maarten L. Hekkelman
143eb57f04 Removed remaining mrc_add_resources call 2022-11-08 15:38:06 +01:00
Maarten L. Hekkelman
6cc550bf18 oops 2022-11-08 15:20:29 +01:00
Maarten L. Hekkelman
7f5336661b Error reporting 2022-11-08 14:53:15 +01:00
Maarten L. Hekkelman
e44539ef2c Checks before building indices. Better error reporting 2022-11-08 13:45:06 +01:00
Maarten L. Hekkelman
a2f5850173 avoid crash on empty branches 2022-11-08 09:29:15 +01:00
Maarten L. Hekkelman
283f4883f7 Fix makefile 2022-11-08 08:43:49 +01:00
Maarten L. Hekkelman
ce9842f671 Fix in PDB export
Better add_git_submodule
2022-11-08 08:28:02 +01:00
Maarten L. Hekkelman
b784433fd7 less verbose parser 2022-11-07 17:00:49 +01:00
Maarten L. Hekkelman
8c064e7c0a version bump 2022-11-07 12:37:01 +01:00
Maarten L. Hekkelman
c15a8bd127 export source tarballs 2022-11-07 12:32:14 +01:00
Maarten L. Hekkelman
64e40e7b31 Fix writing PDB CISPEP records
Better checking for open files
More verbose parser
2022-11-07 11:06:06 +01:00
Maarten L. Hekkelman
06d254e0de Revert "Use system version of boost regex, when available"
This reverts commit eaa342ca32.
2022-11-03 16:52:09 +01:00
Maarten L. Hekkelman
eaa342ca32 Use system version of boost regex, when available 2022-11-03 16:36:14 +01:00
Maarten L. Hekkelman
782f7c467b Support for cifv1.0 (empty category names) 2022-11-03 15:48:58 +01:00
Maarten L. Hekkelman
c45d02cb70 sigh 2022-11-03 13:09:04 +01:00
Maarten L. Hekkelman
5b4c131eea More verbose 2022-11-03 13:00:11 +01:00
Maarten L. Hekkelman
bbe71af99e Accept invalid CCD component files, for now 2022-11-03 12:18:26 +01:00
Maarten L. Hekkelman
49912d019f Better error reporting 2022-11-03 11:43:57 +01:00
Maarten L. Hekkelman
d4758e09d7 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-11-03 09:37:38 +01:00
Maarten L. Hekkelman
f0a913cc07 Fixes for deuterium...
Fixes for sugar branches
2022-11-03 09:37:31 +01:00
Maarten L. Hekkelman
8b0b8e3688 removed submodule 2022-11-01 16:52:42 +01:00
Maarten L. Hekkelman
de4fc8a015 Added necessary include 2022-11-01 16:50:54 +01:00
Maarten L. Hekkelman
bf1e56ec53 Another attempt 2022-11-01 16:29:59 +01:00
Maarten L. Hekkelman
040b4e4ff9 clean up 2022-11-01 14:54:26 +01:00
Maarten L. Hekkelman
4666ee3145 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-11-01 14:54:10 +01:00
Maarten L. Hekkelman
2958c56a92 change parser to use streambuf directly 2022-11-01 14:53:17 +01:00
Maarten L. Hekkelman
9cff8768ab Merge branch 'potential-performance-gain' into trunk 2022-11-01 13:41:06 +01:00
Maarten L. Hekkelman
cc671b8006 fixes in numeric conversions 2022-11-01 13:41:01 +01:00
Maarten L. Hekkelman
728abe6d0e less verbose pdb2cif 2022-11-01 12:11:04 +01:00
Maarten L. Hekkelman
7b8f3f2538 optimise retract buffer 2022-11-01 11:56:18 +01:00
Maarten L. Hekkelman
98db98f916 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-11-01 09:54:20 +01:00
Maarten L. Hekkelman
96a67b23ca Fix loading dictionaries 2022-11-01 09:53:36 +01:00
Maarten L. Hekkelman
2c3d7542e5 no submodule for gxrio 2022-11-01 08:45:52 +01:00
Maarten L. Hekkelman
f84d83b723 Add gxrio dependency again 2022-10-31 10:50:53 +01:00
Maarten L. Hekkelman
b1837ba029 for freebsd 2022-10-31 10:35:28 +01:00
Maarten L. Hekkelman
260438fa44 fix for meta project 2022-10-30 19:51:29 +01:00
Maarten L. Hekkelman
23d82beb04 Fix version string 2022-10-30 13:02:53 +01:00
Maarten L. Hekkelman
19db5d736b Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-10-30 11:14:16 +01:00
Maarten L. Hekkelman
6946c40657 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-10-30 11:12:30 +01:00
Maarten L. Hekkelman
bd3723ee20 Do not crash on empty rows (find result) 2022-10-30 11:12:19 +01:00
Maarten L. Hekkelman
1f078d4827 update for meta project 2022-10-30 10:28:01 +01:00
Maarten L. Hekkelman
3c62a38667 write out PDB files 2022-10-28 16:13:33 +02:00
Maarten L. Hekkelman
7ffda74e3d pdb2cif, avoid duplicate key violation on REMARK 350 2022-10-26 16:43:21 +02:00
Maarten L. Hekkelman
560f6debc6 updating dictionaries and default to downloading components.cif 2022-10-26 15:46:50 +02:00
Maarten L. Hekkelman
ea1ac33de8 Update README.md
No more boost requirement
2022-10-26 15:30:56 +02:00
Maarten L. Hekkelman
7ea30237ae Merge branch 'trunk' into new-develop 2022-10-26 15:23:35 +02:00
Maarten L. Hekkelman
bc668487e2 Fix installation of dictionary files 2022-10-26 14:29:40 +02:00
Maarten L. Hekkelman
1769f9864b Fixed TLS parser, and more 2022-10-26 13:23:23 +02:00
Maarten L. Hekkelman
75ffd97802 use gxrio internally for reading pdb files 2022-10-25 12:41:14 +02:00
Maarten L. Hekkelman
cfd5b7da0f Reintroduce more atom members 2022-10-25 11:51:35 +02:00
Maarten L. Hekkelman
26b7d1df26 Write chem_comp for polymer residues in pdb2cif 2022-10-21 16:01:12 +02:00
Maarten L. Hekkelman
0747929cd6 Allow missing CRYST1 record in PDB files... finally 2022-10-21 15:42:19 +02:00
Maarten L. Hekkelman
5bcfb102f4 change_residue fix 2022-10-20 19:48:26 +02:00
Maarten L. Hekkelman
908fb1ccea gxrio dependency 2022-10-20 13:36:18 +02:00
Maarten L. Hekkelman
af8389baa4 fix dependencies 2022-10-11 15:36:53 +02:00
Maarten L. Hekkelman
24ca1017cd Fix mm::polymer to have its own auth_asym_id 2022-10-11 12:17:42 +02:00
Maarten L. Hekkelman
85c21aeb01 row as vector 2022-10-11 09:05:03 +02:00
Maarten L. Hekkelman
2f249048d9 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2022-10-10 09:03:34 +02:00
Maarten L. Hekkelman
974cb40ab3 for gcc 9.4 2022-10-10 09:01:40 +02:00
Maarten L. Hekkelman
c01c16ea60 more accessors in cif::mm::atom 2022-10-05 17:04:39 +02:00
Maarten L. Hekkelman
bd157c249c Reintroduced PDB conversion code 2022-10-05 14:18:08 +02:00
Maarten L. Hekkelman
b0ac33c1b1 optimisation in query
fix memory problem in cif::item, making it safer
2022-10-05 13:21:57 +02:00
Maarten L. Hekkelman
82e73a9525 All tests pass 2022-10-05 10:13:53 +02:00
Maarten L. Hekkelman
adc316d671 Use index in lookup, when available
start branch code in model::structure (not done yet)
2022-10-04 13:57:38 +02:00
Maarten L. Hekkelman
6a0b6b99ac using index in category 2022-10-03 16:22:32 +02:00
Maarten L. Hekkelman
08dd9dd5b4 missing methods 2022-10-03 10:01:42 +02:00
Maarten L. Hekkelman
557a1c2d00 Reactivated several structure methods 2022-10-03 09:32:20 +02:00
Maarten L. Hekkelman
f77bbfedda updates 2022-10-03 08:41:50 +02:00
Maarten L. Hekkelman
3aa3fe19e2 Better validate diagnostic output 2022-09-30 10:25:55 +02:00
Maarten L. Hekkelman
35fcc0493e Moving cif::Structure back in as model 2022-09-28 17:11:04 +02:00
Maarten L. Hekkelman
9485bec2fa Moved symmetry back into libcifpp 2022-09-27 16:09:24 +02:00
Maarten L. Hekkelman
4b759e731c Moved code back into libcifpp, 3d stuff mainly 2022-09-27 14:34:31 +02:00
Maarten L. Hekkelman
7dd6a8a1aa fixes and updated submodules 2022-08-24 16:20:16 +02:00
Maarten L. Hekkelman
96725ae8b9 Clean up makefile 2022-08-19 13:55:35 +02:00
Maarten L. Hekkelman
b3a0ded9a8 add git submodule 2022-08-19 13:38:59 +02:00
Maarten L. Hekkelman
184c491803 changed find1 a bit more
reverted to returning empty results in case nothing is found
2022-08-18 17:20:41 +02:00
Maarten L. Hekkelman
f944b3ce00 changed find1 logic for only row_handles, now returns empty row_handle instead of throwing when not found
various condition fixes
2022-08-18 11:16:28 +02:00
Maarten L. Hekkelman
2557f41863 some documentation and cleanup of cif::item 2022-08-17 20:40:31 +02:00
Maarten L. Hekkelman
2b92cee3f7 some documentation and cleanup of cif::item 2022-08-17 20:14:48 +02:00
Maarten L. Hekkelman
8071768579 better row_handle::get 2022-08-17 19:39:14 +02:00
Maarten L. Hekkelman
71c8541b68 validate links
fix get_parents_condition
2022-08-17 17:08:38 +02:00
Maarten L. Hekkelman
3d66c77188 writing order 2022-08-17 16:02:57 +02:00
Maarten L. Hekkelman
8701512961 Remove cif++/Cif++.hpp
Implemented reorder by index
2022-08-17 15:29:14 +02:00
Maarten L. Hekkelman
b317c780ba Fixes for pre c++20
file constructor from raw data
2022-08-17 15:02:56 +02:00
Maarten L. Hekkelman
681aa3bf8b clean up 2022-08-17 11:24:29 +02:00
Maarten L. Hekkelman
a68e053471 Remove tests 2022-08-17 11:17:09 +02:00
Maarten L. Hekkelman
25a90e3b32 split out pdbx code
fix dangling memory reference
2022-08-17 11:14:06 +02:00
Maarten L. Hekkelman
2f62759dfe Before split-out of libpdbxpp 2022-08-17 08:50:56 +02:00
Maarten L. Hekkelman
cf9ec46ab8 Removed DSSP code, moved to dssp project 2022-08-16 16:57:25 +02:00
Maarten L. Hekkelman
ecbef51b10 - fix category::clear
- fix dssp TCO value
2022-08-16 16:34:15 +02:00
Maarten L. Hekkelman
dfff8c9587 condition work (children, parents) 2022-08-16 16:18:11 +02:00
Maarten L. Hekkelman
cc5d52bbf9 boost::regex usage 2022-08-16 16:17:56 +02:00
Maarten L. Hekkelman
a9e9f86c93 Renaming internal use of mmcif_pdbx dictionary. It was wrong to use the name mmcif_pdbx_v50 2022-08-16 16:17:30 +02:00
Maarten L. Hekkelman
a2c52713b2 Refactored dssp to be standalone 2022-08-16 11:50:43 +02:00
Maarten L. Hekkelman
545aca88d8 More info 2022-08-15 17:17:56 +02:00
Maarten L. Hekkelman
ac27248784 Started moving DSSP code 2022-08-15 15:37:07 +02:00
Maarten L. Hekkelman
5758bfbaea Required changes for FreeBSD 2022-08-15 10:11:11 +02:00
Maarten L. Hekkelman
8d3a079774 Fix regex for item_validator 2022-08-15 10:09:18 +02:00
Maarten L. Hekkelman
718c138510 Fix constructor for item 2022-08-15 09:49:23 +02:00
Maarten L. Hekkelman
29aac70e67 Changed boost::regex dependency to git submodule, in case it is needed 2022-08-15 08:59:47 +02:00
Maarten L. Hekkelman
700575adfe Merged 2022-08-15 07:42:51 +02:00
Maarten L. Hekkelman
9fe6e5df85 remove dependency on boost::program_options 2022-08-14 20:08:36 +02:00
Maarten L. Hekkelman
ce7434a463 Make boost::regex optional 2022-08-14 19:31:31 +02:00
Maarten L. Hekkelman
ad7d876d07 new dependency 2022-08-13 21:55:19 +02:00
Maarten L. Hekkelman
0dc19e86fa fixed example 2022-08-11 20:53:54 +02:00
Maarten L. Hekkelman
a12acaa5c7 moving from namespace cif::v2 to cif 2022-08-11 20:39:34 +02:00
Maarten L. Hekkelman
ff62efe720 More tests 2022-08-11 16:49:59 +02:00
Maarten L. Hekkelman
2407877184 Reordering all files 2022-08-11 15:44:59 +02:00
Maarten L. Hekkelman
5fde050738 debugging 2022-08-10 17:09:53 +02:00
Maarten L. Hekkelman
a855f88073 Getting rid of boost/algorithm/string 2022-08-10 16:46:48 +02:00
Maarten L. Hekkelman
cfa2acd61d backup 2022-08-08 08:10:22 +02:00
Maarten L. Hekkelman
d9db2fe2e7 insert 2022-08-07 20:48:59 +02:00
Maarten L. Hekkelman
15d62cd3b6 Fix mapping between pdbx_poly_seq_scheme and atom_site while reading an mmcif::Structure 2022-08-07 12:51:16 +02:00
Maarten L. Hekkelman
19a89aeb7e - start row_initializer 2022-08-07 11:49:09 +02:00
Maarten L. Hekkelman
677c61c32f moving insert_impl, index work 2022-08-06 16:08:34 +02:00
Maarten L. Hekkelman
4dd4f66397 backup 2022-08-04 16:29:55 +02:00
Maarten L. Hekkelman
04b7828abc validator work 2022-08-04 13:57:12 +02:00
Maarten L. Hekkelman
9c621ecab8 more condition work 2022-08-03 16:40:48 +02:00
Maarten L. Hekkelman
ab9c4d9416 compiling again 2022-08-03 16:09:27 +02:00
Maarten L. Hekkelman
e5eb62255a started with validator, running into the ground 2022-08-03 12:44:58 +02:00
Maarten L. Hekkelman
98ff79432b backup 2022-08-02 16:56:55 +02:00
Maarten L. Hekkelman
24fa80ba2a parser just started working again, a bit 2022-08-02 16:42:52 +02:00
Maarten L. Hekkelman
3999d792ef const iterator construction from non-const iterator 2022-08-02 11:59:29 +02:00
Maarten L. Hekkelman
4db3732749 move construction and operators = 2022-08-02 11:42:11 +02:00
Maarten L. Hekkelman
07131e8b40 copy constructor for category 2022-08-02 11:24:14 +02:00
Maarten L. Hekkelman
39b91e74c9 - new item storage
- formatting of numbers using to_chars
2022-08-02 10:35:14 +02:00
Maarten L. Hekkelman
d4bb7ec3bc Fixed includes based on feedback from Patricia Herrera. 2022-08-01 19:00:42 +02:00
Maarten L. Hekkelman
6175b7e359 backup 2022-08-01 16:58:52 +02:00
Maarten L. Hekkelman
10442d506a structured binding, start 2022-08-01 15:08:42 +02:00
Maarten L. Hekkelman
573a695c3d small steps 2022-08-01 12:57:00 +02:00
Maarten L. Hekkelman
a76bef0d01 backup 2022-08-01 08:33:49 +02:00
Maarten L. Hekkelman
e20111b566 small optimization 2022-07-31 16:19:24 +02:00
Maarten L. Hekkelman
4a1d9c8f75 New storage layout for item_value 2022-07-31 15:53:18 +02:00
Maarten L. Hekkelman
26c86282e3 before refactoring item_value based on statistics 2022-07-27 16:46:33 +02:00
Maarten L. Hekkelman
0eaeb1650d split out item 2022-07-27 12:41:25 +02:00
Maarten L. Hekkelman
f4a6533f6b eerste fröbelwerk 2022-07-26 17:00:14 +02:00
Maarten L. Hekkelman
df1b6a13e1 updated git ignore 2022-07-25 16:21:55 +02:00
Maarten L. Hekkelman
e8f24f617c - Fixed copy/paste error in Category interface
- message in progress finished
2022-07-25 10:40:03 +02:00
Maarten L. Hekkelman
9454fdc217 Fix for parsing empty quoted strings. 2022-07-21 13:03:23 +02:00
Maarten L. Hekkelman
22543d8fe5 Update changelog 2022-07-12 16:55:55 +02:00
Maarten L. Hekkelman
60d1dc82e6 Improved remark 3 parser for large molecules 2022-07-12 16:54:38 +02:00
Maarten L. Hekkelman
87486f87ef revert loading compressed dictionaries 2022-07-12 14:20:01 +02:00
Maarten L. Hekkelman
80e7da0f13 Locating dictionaries updated 2022-07-12 14:13:59 +02:00
Maarten L. Hekkelman
3745beae66 Fix search order for resources 2022-07-12 13:55:19 +02:00
Maarten L. Hekkelman
3965840bfa New way of locating resources 2022-07-12 13:49:02 +02:00
Maarten L. Hekkelman
a88c6f3d32 Fix for older clang (on MacOS?). 2022-07-05 11:07:12 +02:00
Maarten L. Hekkelman
ed6c6f0026 Move assignment of Structure is not possible due to reference to datablock 2022-07-05 09:46:59 +02:00
Maarten L. Hekkelman
bdda9d72b5 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2022-07-05 09:36:36 +02:00
Maarten L. Hekkelman
fd080e778e Changes for MacOS/MSVC 2022-07-05 09:36:26 +02:00
Maarten L. Hekkelman
9f72df2ecd Update LICENSE
Format changed, now recognized
2022-07-01 08:21:22 +02:00
ojcharles
617db012f0 Update Cif++.cpp (#15)
add mutex library for std::unique_lock, not included in std lib
2022-07-01 08:07:24 +02:00
Maarten L. Hekkelman
9d15541237 include cstring for gnu c++ 12 2022-06-22 08:53:05 +02:00
Maarten L. Hekkelman
35c99564c6 Fix importing sugars from PDB files 2022-06-01 15:17:54 +02:00
Maarten L. Hekkelman
1d8fe334d6 Fix writing sugar branches 2022-06-01 13:22:34 +02:00
Maarten L. Hekkelman
d86bb314ac Better handling of missing residues/mismatch seqres 2022-06-01 11:27:41 +02:00
Maarten L. Hekkelman
0ef8eb59f8 Fix scattering factors error 2022-05-18 13:04:42 +02:00
Maarten L. Hekkelman
b5fe4a9a87 locating resources that might be protected 2022-05-18 11:53:13 +02:00
Maarten L. Hekkelman
11fea31b98 more loading resources 2022-05-18 11:37:26 +02:00
Maarten L. Hekkelman
f629275ed5 locating resources that might be protected 2022-05-18 11:25:47 +02:00
Maarten L. Hekkelman
a5f6166469 locating resources that might be protected 2022-05-18 11:14:14 +02:00
Maarten L. Hekkelman
501050e591 Add move constructor to mmcif::Structure 2022-05-10 17:11:04 +02:00
Maarten L. Hekkelman
e1b240b2b2 sugar work 2022-05-04 16:48:28 +02:00
Maarten L. Hekkelman
3d79278ed7 Merge branch 'trunk' into develop 2022-05-04 09:51:15 +02:00
Maarten L. Hekkelman
5e0b197a43 mmcif::Atom::compound() revision 2022-05-04 09:50:24 +02:00
Maarten L. Hekkelman
9c4170d9e2 - Added more const members
- change PDB writing interface
2022-05-03 11:52:15 +02:00
Maarten L. Hekkelman
af721eb196 Make having no compound less fatal 2022-05-02 14:40:22 +02:00
Maarten L. Hekkelman
788e315f5e Fix entity_branch_link entry 2022-05-02 12:24:35 +02:00
Maarten L. Hekkelman
4a82a8d5a8 Fixed all tests 2022-05-02 11:09:36 +02:00
Maarten L. Hekkelman
11019a26f8 Merge branch 'sugar-tests' into develop 2022-05-02 10:03:44 +02:00
Maarten L. Hekkelman
6f8909dce9 Fixed tests 2022-05-02 10:01:10 +02:00
Maarten L. Hekkelman
5525103aaf backup 2022-05-02 09:26:59 +02:00
Maarten L. Hekkelman
291ef737b1 - Fix removing atoms
- Optimize isUnquotedString
2022-05-01 14:09:06 +02:00
Maarten L. Hekkelman
af125bdd57 backup 2022-04-26 16:04:13 +02:00
Maarten L. Hekkelman
79089bbb8c removed incorrect assert 2022-04-20 16:32:47 +02:00
Maarten L. Hekkelman
1f08498d00 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2022-04-20 11:18:50 +02:00
Maarten L. Hekkelman
49ba714a03 - structure id stuff
- added cif::null
- more tests
2022-04-20 11:17:11 +02:00
Maarten L. Hekkelman
85fd9296b2 Add test for loading 2022-04-19 17:03:09 +02:00
Maarten L. Hekkelman
1cda14867f More interface changes in mmcif::Structure 2022-04-19 13:40:38 +02:00
Maarten L. Hekkelman
2d2b26f7dc Fix regression in bondmap calculation 2022-04-19 09:10:54 +02:00
Maarten L. Hekkelman
93b33af44a oops, wrong field name 2022-04-13 10:57:50 +02:00
Maarten L. Hekkelman
eb80490bcd getPolymerByAsymID 2022-04-13 09:47:18 +02:00
Maarten L. Hekkelman
ba2b06f5af reduce complexity 2022-04-13 09:39:43 +02:00
Maarten L. Hekkelman
fecc762db1 - better link validation
- better output (quote reserved strings)
2022-04-12 17:00:47 +02:00
Maarten L. Hekkelman
1e406253ab loading unknown atoms 2022-04-12 12:41:25 +02:00
Maarten L. Hekkelman
6e3b85f43d getResidue, again 2022-04-11 16:36:40 +02:00
Maarten L. Hekkelman
58f1b626e2 change getResidue 2022-04-06 12:49:03 +02:00
Maarten L. Hekkelman
c104a08e16 fixed Atom::charge to pick more sensible default 2022-03-30 11:14:11 +02:00
Maarten L. Hekkelman
dd0f6ca1e6 accept more invalid characters, sigh 2022-03-29 11:45:33 +02:00
Maarten L. Hekkelman
f02ea91b51 label and auth seq id, some improvements 2022-03-28 09:50:37 +02:00
Maarten L. Hekkelman
6768a501a3 access to atoms 2022-03-21 09:58:16 +01:00
Maarten L. Hekkelman
879e15c759 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2022-03-14 16:28:55 +01:00
Maarten L. Hekkelman
89285b4abc construct quaternion from angle/axis 2022-03-14 16:28:41 +01:00
Maarten L. Hekkelman
c584714f91 ion radii 2022-03-09 15:40:32 +01:00
Maarten L. Hekkelman
f5016403b7 refactored mmcif::File 2022-03-02 15:26:29 +01:00
Maarten L. Hekkelman
c8f66ae6bb start remove residue 2022-02-23 08:24:26 +01:00
Maarten L. Hekkelman
858c967e71 Locate mmcif dictionary in CCP4 space 2022-02-15 08:08:01 +01:00
Maarten L. Hekkelman
f9ca5de5bf Add missing include for gcc 8.2 2022-02-09 16:04:24 +01:00
Maarten L. Hekkelman
252c3476a1 Slightly better handling of hetero residues 2022-02-09 14:53:05 +01:00
Maarten L. Hekkelman
19210df6db Fix parsing mmCIF files with an unquoted string ?? 2022-02-08 11:22:10 +01:00
Maarten L. Hekkelman
15c5730749 Remove redundant FindFilesystem include 2022-02-03 10:35:34 +01:00
Maarten L. Hekkelman
3764adb7ef update changelog 2022-02-02 13:44:32 +01:00
Maarten L. Hekkelman
9160adb1cf Merge branch 'develop' into trunk 2022-02-02 13:40:47 +01:00
Maarten L. Hekkelman
3ebf4338ab Do not crash on uninitialized Atoms 2022-02-02 12:41:32 +01:00
Maarten L. Hekkelman
2eb4b7b39b Fix building in Windows 2022-01-25 15:27:15 +01:00
Maarten L. Hekkelman
c241e49b48 fix makefile 2022-01-25 15:13:15 +01:00
Maarten L. Hekkelman
238c881132 Update dependencies, version string 2022-01-25 13:27:58 +01:00
Maarten L. Hekkelman
49dc733536 Create non poly from described atoms 2022-01-25 13:27:19 +01:00
Maarten L. Hekkelman
755bd78f60 Fix declaration for mmcif::Nudge 2022-01-19 13:21:29 +01:00
Maarten L. Hekkelman
77f80cd51f Fix atomic test (apparently, libatomic is only needed for std::atomic<long long>) 2022-01-19 08:25:25 +01:00
Maarten L. Hekkelman
3df6000635 cleaning up code 2022-01-18 16:06:28 +01:00
Maarten L. Hekkelman
5efee2b40d comment adjusted 2022-01-18 13:28:42 +01:00
Maarten L. Hekkelman
f3c2e59184 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2022-01-18 11:26:23 +01:00
Maarten L. Hekkelman
24ab660e6e Change logic for testing std::filesystem and libatomic 2022-01-18 11:24:31 +01:00
Maarten L. Hekkelman
6c0a418068 Revert "Check atomic"
This reverts commit 07a180991e.
2022-01-18 11:12:56 +01:00
Maarten L. Hekkelman
07a180991e Check atomic 2022-01-17 11:40:46 +01:00
Maarten L. Hekkelman
4732004b67 Merge branch 'develop' into trunk 2022-01-12 16:41:18 +01:00
Maarten L. Hekkelman
faa9cd0431 Added another rotate/translate method to mmcif::Structure 2022-01-12 14:06:32 +01:00
Maarten L. Hekkelman
e0c3c2394d Fix Structure::createNonPoly to add atoms... 2022-01-11 11:21:56 +01:00
Maarten L. Hekkelman
2dec584f54 clean up code 2022-01-05 15:54:23 +01:00
Maarten L. Hekkelman
5ab2ccae40 avoid calling cif::Category::size() too often 2022-01-05 15:45:27 +01:00
Maarten L. Hekkelman
1017d08626 skip updating links when changing atom location 2022-01-05 15:24:22 +01:00
Maarten L. Hekkelman
32b1bbd943 combine translate and rotate in a single call 2022-01-05 14:27:16 +01:00
Maarten L. Hekkelman
1abf31ffa5 no-validate option in cif::Row::assign 2022-01-05 14:04:28 +01:00
Maarten L. Hekkelman
aec60829d2 more quiet code 2022-01-05 11:29:10 +01:00
Maarten L. Hekkelman
888c3c38c2 Add a 'quiet' mode (cif::VERBOSE < 0) 2022-01-05 10:36:39 +01:00
Maarten L. Hekkelman
e2c4648037 clean up 2022-01-05 10:24:37 +01:00
Maarten L. Hekkelman
f7b98c0530 refactored AtomImpl 2022-01-05 10:23:15 +01:00
Maarten L. Hekkelman
d4bd3faa16 Merge branch 'profiling-structure' into trunk 2022-01-04 10:29:23 +01:00
Maarten L. Hekkelman
c4f3b1cd7b delay loading atoms in residues 2022-01-04 09:48:41 +01:00
Maarten L. Hekkelman
74add69a83 Finish removing bzip2 support 2022-01-03 15:51:01 +01:00
Maarten L. Hekkelman
a490b19d24 version bump 2022-01-03 15:45:48 +01:00
Maarten L. Hekkelman
44cfa2c1a2 further optimisation 2022-01-03 15:19:50 +01:00
Maarten L. Hekkelman
6dd9522b3f optimized mmcif::Atom 2022-01-03 14:32:42 +01:00
Maarten L. Hekkelman
5e352cb8e4 Removed erronous dependency in config.cmake.in 2021-12-20 13:33:06 +01:00
Maarten L. Hekkelman
2fad7315b8 make DSSP::iterator bidirectional 2021-12-15 15:12:23 +01:00
Maarten L. Hekkelman
520759dfe8 update changelog 2021-12-14 15:17:50 +01:00
Maarten L. Hekkelman
577b44ae11 Fix in processing CCP4 monomers, proline is a peptide 2021-12-14 15:16:23 +01:00
Maarten L. Hekkelman
66f742d6c0 code to facilitate DSSP 2021-12-14 15:14:45 +01:00
Maarten L. Hekkelman
7ba9f688c7 Merge branch 'develop' into trunk 2021-12-10 10:39:34 +01:00
Maarten L. Hekkelman
883f0307a2 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-12-10 10:38:43 +01:00
Maarten L. Hekkelman
c9719f873f Merge branch 'develop' into trunk 2021-12-10 10:37:04 +01:00
Maarten L. Hekkelman
123d25f853 formatting of floating points in cif files
better verbose info for differences
2021-12-10 10:35:02 +01:00
Maarten L. Hekkelman
56da42db84 formatting of floating points in cif files
better verbose info for differences
2021-12-10 10:32:21 +01:00
Maarten L. Hekkelman
7f820449ca formatting 2021-12-08 09:06:09 +01:00
Maarten L. Hekkelman
ecb2cf5f11 Fix for compiling with gcc 11.2 2021-12-08 09:03:21 +01:00
Maarten L. Hekkelman
7f27da9b3b Fixed rename-compound-test to work when not using resources 2021-11-25 16:27:45 +01:00
Maarten L. Hekkelman
01eb499c69 attempt to fix running tests in different directory 2021-11-25 16:27:42 +01:00
Maarten L. Hekkelman
1ff6f70682 changelog update 2021-11-25 16:25:53 +01:00
Maarten L. Hekkelman
ddde996e10 strip newlines from compound names read from CCD 2021-11-25 16:24:55 +01:00
Maarten L. Hekkelman
1c9212c7e0 Fixed rename-compound-test to work when not using resources 2021-11-25 16:09:15 +01:00
Maarten L. Hekkelman
a568143991 unneeded loading of resource removed from test 2021-11-24 13:52:23 +01:00
Maarten L. Hekkelman
2b6f1bd9ee attempt to fix running tests in different directory 2021-11-23 10:51:22 +01:00
Maarten L. Hekkelman
2527aa5ea6 correct version in cmakefile, fix structure-test to no longer load resource 2021-11-24 13:57:36 +01:00
Maarten L. Hekkelman
4c28091ecd Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-11-18 09:56:11 +01:00
Maarten L. Hekkelman
d49725423e re-added group to Compound. Seems to be important 2021-11-18 09:56:01 +01:00
Maarten L. Hekkelman
fcb4dc61b5 fixed writing PDB files (= remove to_upper for all header lines...) 2021-11-17 08:01:24 +01:00
Maarten L. Hekkelman
b7330c074f Fixed Structure::changeResidue to actually change the residue itself as well. 2021-11-16 08:38:14 +01:00
Maarten L. Hekkelman
e8f4123030 strip newlines from names in Compound 2021-11-15 12:37:45 +01:00
Maarten L. Hekkelman
975057c4c4 Fixed bug in structure::changeresidue when removing atoms 2021-11-15 11:32:50 +01:00
Maarten L. Hekkelman
a0e01668d1 take largest in value for best quaternion 2021-11-15 10:22:56 +01:00
Maarten L. Hekkelman
2c77491416 cleaner implementation of matrices 2021-11-12 10:36:38 +01:00
Maarten L. Hekkelman
be19e4a9cb Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-11-12 09:20:01 +01:00
Maarten L. Hekkelman
61ce91a9d7 using expression templates for matrices 2021-11-12 09:17:21 +01:00
Maarten L. Hekkelman
18f1d07e85 clean up code 2021-11-12 08:28:10 +01:00
Maarten L. Hekkelman
b596976194 correct implementation of alignpoints 2021-11-12 08:15:33 +01:00
Maarten L. Hekkelman
1f6b86d516 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-11-11 21:41:05 +01:00
Maarten L. Hekkelman
31499b977d Fix the 3d alignment code 2021-11-11 21:39:52 +01:00
Maarten L. Hekkelman
f83850e380 git and revisions 2021-11-11 09:42:22 +01:00
Maarten L. Hekkelman
1a4ccd86fe changelog update 2021-11-03 09:28:14 +01:00
Maarten L. Hekkelman
5c3c6fec09 strip newlines from compound names read from CCD 2021-10-29 10:48:09 +02:00
Maarten L. Hekkelman
f97e742daa removed a too strict test in loading structures 2021-10-21 08:42:42 +02:00
Maarten L. Hekkelman
7f39d401e2 Optimised assigning data 2021-10-20 14:55:30 +02:00
Maarten L. Hekkelman
af412c284d clean up 2021-10-20 12:27:27 +02:00
Maarten L. Hekkelman
874cd3bae5 Fix symmetry lookup 2021-10-20 11:36:31 +02:00
Maarten L. Hekkelman
ea28ebdd13 optimized caching of items in mmcif::Atom 2021-10-19 16:10:59 +02:00
Maarten L. Hekkelman
3ba468933f Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-10-18 17:17:20 +02:00
Maarten L. Hekkelman
45f33e4bea Merge branch 'trunk' into develop 2021-10-18 11:12:56 +02:00
Maarten L. Hekkelman
021487ed16 Fix reading mmCIF file where model is defined but model 1 is missing. Version bump. 2021-10-18 11:11:03 +02:00
Maarten L. Hekkelman
cb3443ffb1 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-10-18 09:42:30 +02:00
Maarten L. Hekkelman
6b2c9dc3e3 order of compound info, load CCD first 2021-10-18 09:40:33 +02:00
Maarten L. Hekkelman
7513cc1947 Merge branch 'trunk' into develop 2021-10-14 09:30:19 +02:00
Maarten L. Hekkelman
c98b8ae5c9 fix pkgconfig file
fix example for new interface
2021-10-13 14:25:19 +02:00
Maarten L. Hekkelman
ab2dd4b75f Merge branch 'trunk' into develop 2021-10-13 14:03:29 +02:00
Maarten L. Hekkelman
be77316545 Rename options to make them all start with CIFPP_
Add cmake options for CACHE_DIR and DATA_DIR
2021-10-13 14:01:42 +02:00
Maarten L. Hekkelman
cdfb0d9497 remove unused files 2021-10-13 12:30:44 +02:00
Maarten L. Hekkelman
71f7e7c741 Version bump, changelog updated 2021-10-13 11:51:12 +02:00
Maarten L. Hekkelman
cff099596e Fix installation rules for CCD data 2021-10-13 11:32:10 +02:00
Maarten L. Hekkelman
e182604455 Fix all tests to work with embedded data, no CCD, part 4 2021-10-13 11:23:17 +02:00
Maarten L. Hekkelman
45a7defb7e Fix all tests to work with embedded data, no CCD, part 3 2021-10-13 10:55:25 +02:00
Maarten L. Hekkelman
906f6ac1ea Fix all tests to work with embedded data, no CCD 2021-10-13 10:31:57 +02:00
Maarten L. Hekkelman
8d96e513bd rename update script 2021-10-13 09:55:02 +02:00
Maarten L. Hekkelman
cdefd063e2 Added small ccd subset for testing in absense of full CCD 2021-10-13 09:34:58 +02:00
Maarten L. Hekkelman
8bbcba76cf Performance increase by using std::string_view
Updated Structure::changeResidue
2021-10-12 16:04:27 +02:00
Maarten L. Hekkelman
c767e89a5d Fixed symmetry operator table generator 2021-10-06 12:57:18 +02:00
Maarten L. Hekkelman
b78a603dca run tests before installation 2021-10-05 11:49:54 +02:00
Maarten L. Hekkelman
18088457d3 last updates in develop 2021-09-29 14:21:41 +02:00
Maarten L. Hekkelman
056697d901 imported libname for stdc++fs 2021-09-29 13:24:16 +02:00
Maarten L. Hekkelman
2681cfad50 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-09-29 13:21:45 +02:00
Maarten L. Hekkelman
8aaa7925a3 fix lib name 2021-09-29 13:21:37 +02:00
Maarten L. Hekkelman
d4f73e471b No need to search zlib/bzip2 in Windows 2021-09-29 11:08:05 +02:00
Maarten L. Hekkelman
750be0c4a4 for 18.04 2021-09-28 15:26:15 +02:00
Maarten L. Hekkelman
0f4a2a26fc dependencies
check compiler version
2021-09-28 14:15:17 +02:00
Maarten L. Hekkelman
6adb56341d remove boost::date_time dependency 2021-09-28 12:48:15 +02:00
Maarten L. Hekkelman
4524357cd3 zlib, again (use the zlib from boost in Windows) 2021-09-28 11:20:02 +02:00
Maarten L. Hekkelman
0b05b6f6e3 run tests in build dir 2021-09-28 10:40:40 +02:00
Maarten L. Hekkelman
6382170157 zlib 2021-09-28 09:39:15 +02:00
Maarten L. Hekkelman
c2eeb69dcc optional static libs boost 2021-09-28 09:38:35 +02:00
Maarten L. Hekkelman
f32261fc59 revert zlib in boost 2021-09-28 09:37:42 +02:00
Maarten L. Hekkelman
90c967c8c6 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-09-28 09:30:56 +02:00
Maarten L. Hekkelman
4ac90128db zlib in boost 2021-09-28 09:30:43 +02:00
Maarten L. Hekkelman
3d71db1bb7 less stringent test for compiler version 2021-09-28 09:30:22 +02:00
Maarten L. Hekkelman
5e35ea5168 warnings and compiler detection 2021-09-28 09:17:28 +02:00
Maarten L. Hekkelman
2fc88d52eb Updating cmake file to install correct update mechanism 2021-09-28 08:50:28 +02:00
Maarten L. Hekkelman
567b0f3b57 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-09-28 07:57:05 +02:00
Maarten L. Hekkelman
8f29386998 fix file read test 2021-09-26 14:52:34 +02:00
Maarten L. Hekkelman
d2427d57d9 unit test for reading files 2021-09-27 11:36:24 +02:00
Maarten L. Hekkelman
14a9499962 fix cmake files, pkg-config file 2021-09-27 09:36:33 +02:00
Maarten L. Hekkelman
0fafb80d44 Remove bzip2 support 2021-09-27 08:40:05 +02:00
Maarten L. Hekkelman
b5454f0943 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-09-26 13:41:21 +02:00
Maarten L. Hekkelman
b698260d73 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into develop 2021-09-26 13:40:39 +02:00
Maarten L. Hekkelman
ccdd1b74a0 required 2021-09-26 13:39:49 +02:00
Maarten L. Hekkelman
298fe20a1b improve cmake files? 2021-09-25 12:05:42 +02:00
Maarten L. Hekkelman
12a7c45452 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-09-24 16:02:22 +02:00
Maarten L. Hekkelman
02a28c2fd6 clean up makefile 2021-09-24 16:01:48 +02:00
Maarten L. Hekkelman
68e182b0bd Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-09-24 15:31:31 +02:00
Maarten L. Hekkelman
b0ec88f469 link compression libs 2021-09-24 15:30:34 +02:00
Maarten L. Hekkelman
4feee6ac22 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-09-24 15:27:48 +02:00
Maarten L. Hekkelman
ee8a85ec2f static boost libs 2021-09-24 15:27:13 +02:00
Maarten L. Hekkelman
0245d4a881 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-09-24 15:24:42 +02:00
Maarten L. Hekkelman
e8f0058956 add link of stdc++fs 2021-09-24 15:24:30 +02:00
Maarten L. Hekkelman
9c75dbaae0 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-09-24 15:22:15 +02:00
Maarten L. Hekkelman
5bd39b598e Add filesystem check, with libstdc++fs test 2021-09-24 15:21:12 +02:00
Maarten L. Hekkelman
25a43abffd update version number, fix revision file 2021-09-24 14:06:47 +02:00
Maarten L. Hekkelman
9a7ca022e2 check for existence of run-parts to be more compatible 2021-09-21 08:56:22 +02:00
Maarten L. Hekkelman
46fe0d7caf fixed cmake file
reduced verbosity for missing atoms
update script, more cross platform
2021-09-21 08:52:48 +02:00
Maarten L. Hekkelman
0371cec415 Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop 2021-09-21 08:40:25 +02:00
Maarten L. Hekkelman
2b1020cbb9 Merge branch 'trunk' into develop 2021-09-21 08:16:57 +02:00
Andrius Merkys
8c3ce2a87d Update OpenStructure compounds.chemlib if OpenStructure is found (#8)
* Update OpenStructure compounds.chemlib if OpenStructure is found.

* Replacing the code to update OpenStructure compounds.chemlib with a call to its internal script.

* Implementing more flexible approach allowing notifying all dependent software.
2021-09-20 12:39:15 +02:00
Maarten L. Hekkelman
d633622e27 Creating new nonpoly compounds 2021-09-20 11:34:07 +02:00
Maarten L. Hekkelman
19b652f615 Merge commit '37f7dd0' into develop 2021-09-15 16:06:43 +02:00
Maarten L. Hekkelman
37f7dd0631 load compound if needed 2021-09-15 16:04:49 +02:00
Maarten L. Hekkelman
4c99710fb3 Refactored iterators (Category, Row)
const versions of find
2021-09-15 11:38:12 +02:00
Maarten L. Hekkelman
59865cdb44 Rotate and Translate of structure data
update unit-test for create_nonpoly
2021-09-14 16:09:08 +02:00
Maarten L. Hekkelman
c3434507da new find1 2021-09-14 11:25:59 +02:00
Maarten L. Hekkelman
79ecf20b85 Sometimes extracting archives does not work in cmake 2021-09-10 09:24:06 +02:00
Maarten L. Hekkelman
1de9681bb7 revert mrc test
extended nonpoly unit-test
2021-09-10 09:22:37 +02:00
Maarten L. Hekkelman
345c4778e6 start structure-test unit test
added compare functions for Datablock and Category
2021-09-08 16:58:50 +02:00
Maarten L. Hekkelman
0ccb2f88ca Added more 3d code (alignpoints) 2021-09-08 11:44:27 +02:00
Maarten L. Hekkelman
f7bef8b0e9 update dictionary
do not throw from Category::isValid()
2021-09-07 14:50:06 +02:00
Maarten L. Hekkelman
9da8608f8f Merge branch 'cmake' into trunk
install update script (optionally, unix only)
2021-09-07 10:12:46 +02:00
Maarten L. Hekkelman
496cb0b909 last minute changes for building inside CCP4 2021-09-07 10:10:38 +02:00
Maarten L. Hekkelman
583cafa91e Merge branch 'cmake' of github.com:PDB-REDO/libcifpp into cmake 2021-09-07 09:23:02 +02:00
Maarten L. Hekkelman
01da665243 test nieuwe CCP4 regels 2021-09-07 09:15:47 +02:00
Maarten L. Hekkelman
e900cd1e3d Windows 2021-09-06 14:58:47 +02:00
Maarten L. Hekkelman
a8a838b33e extract using gzip for cmake < 3.19 (?) 2021-09-02 08:43:12 +02:00
Maarten L. Hekkelman
072be25335 install update script 2021-09-01 16:53:05 +02:00
Maarten L. Hekkelman
5f50429cdb last minute changes to cmake files 2021-08-31 15:53:29 +02:00
Maarten L. Hekkelman
db3fb04172 Merged cmake changes
Updated version to 1.1.1
2021-08-31 15:50:36 +02:00
Maarten L. Hekkelman
839385c3fb update readme 2021-08-31 15:45:01 +02:00
Maarten L. Hekkelman
d508b7b2df updated changelog 2021-08-31 15:43:43 +02:00
Maarten L. Hekkelman
b1456b87c0 Updated readme 2021-08-31 14:27:51 +02:00
Maarten L. Hekkelman
58d2dcaef2 fix weak linking in Linux (when resources are not available) 2021-08-31 13:56:04 +02:00
Maarten L. Hekkelman
a9468d1cbb Fix test in cmake file 2021-08-31 12:49:52 +02:00
Maarten L. Hekkelman
a29e3e8da3 create symop table 2021-08-30 12:06:36 +02:00
Maarten L. Hekkelman
dd5df1bb2a debug code: print atom id 2021-08-25 13:08:15 +02:00
Maarten L. Hekkelman
d5d6f3a7b3 Merge branch 'newer-cmake-files' of github.com:PDB-REDO/libcifpp into newer-cmake-files 2021-08-24 12:50:26 +02:00
Maarten L. Hekkelman
9d2f2b3026 pkgconfig support 2021-08-24 12:50:20 +02:00
Maarten L. Hekkelman
74717a3047 Do not bail out in bondmap creation if compound is unknown 2021-08-24 10:52:18 +02:00
Maarten L. Hekkelman
c13ee92f1e on windows 2021-08-23 17:20:45 +02:00
Maarten L. Hekkelman
0ca04bed4f Updated cmake file, configurable data dir 2021-08-23 17:00:49 +02:00
Maarten L. Hekkelman
89850de660 export symbols in windows 2021-08-18 21:21:14 +02:00
Maarten L. Hekkelman
8bb4ebd897 update cmake rules 2021-08-18 16:31:44 +02:00
Maarten L. Hekkelman
6a67208d24 date in revision file 2021-08-18 15:19:41 +02:00
Maarten L. Hekkelman
139f32c3e8 correct dependency for tests 2021-08-18 14:29:40 +02:00
Maarten L. Hekkelman
81c0d01944 Cleaning up all warnings in MSVC 2021-08-18 14:12:56 +02:00
Maarten L. Hekkelman
1d2f997efb export stuff 2021-08-18 09:53:46 +02:00
Maarten L. Hekkelman
f126b1dac3 include boost libs 2021-08-17 14:35:51 +02:00
Maarten L. Hekkelman
bbc38262ce setting up tests in cmake 2021-08-17 14:11:19 +02:00
Maarten L. Hekkelman
caeafd3189 clean up 2021-08-17 13:43:01 +02:00
Maarten L. Hekkelman
98e3c47cdf remove configure 2021-08-17 13:38:32 +02:00
Maarten L. Hekkelman
390b230cac generate version string with cmake 2021-08-17 13:30:38 +02:00
Maarten L. Hekkelman
c5c3950c91 revision, first attempt 2021-08-17 11:33:22 +02:00
Maarten L. Hekkelman
2aea0b3b1f resources in Windows 2021-08-13 11:38:48 +02:00
Maarten L. Hekkelman
6b16e02b34 clean up warning on Windows, start using resources on Windows 2021-08-12 12:01:48 +02:00
Maarten L. Hekkelman
bb2f81318a Newer CMakeLists.txt file, now creating more correct config files. 2021-08-10 16:21:02 +02:00
Maarten L. Hekkelman
ec91d0fb22 fix test isWater in Residue when mCompound is null 2021-08-10 11:23:56 +02:00
Maarten L. Hekkelman
760e23693e remove suprious error messages,
fix getUniqueID to use correct key field
2021-08-09 15:11:18 +02:00
Maarten L. Hekkelman
cfba00ae46 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2021-08-09 10:01:20 +02:00
Maarten L. Hekkelman
dc1cfb60ff Fix reading AlphaFold files (missing auth_seq_id error) 2021-08-09 10:00:50 +02:00
Maarten L. Hekkelman
92148304ee Safer CompoundFactoryImpl 2021-07-12 11:25:39 +02:00
Maarten L. Hekkelman
9329c0c0f6 optimisation of creating compound 2021-07-12 10:51:52 +02:00
Maarten L. Hekkelman
de3ac001fc added import statement for Windows DLL 2021-07-07 13:06:43 +02:00
Maarten L. Hekkelman
b1047154a4 missing include, export symbols on Windows 2021-06-29 10:39:47 +02:00
Maarten L. Hekkelman
c361af3792 z and bz2 finding, component.cif download 2021-05-26 14:59:59 +02:00
Maarten L. Hekkelman
73487fc24b updated for windows 2021-05-25 15:51:37 +02:00
Maarten L. Hekkelman
73513fd700 cmake file, now working on windows 2021-05-25 15:25:39 +02:00
Maarten L. Hekkelman
6b7723eb2a cmake file can now build unit-test 2021-05-25 09:42:21 +02:00
Maarten L. Hekkelman
7056c15366 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2021-05-21 09:25:34 +02:00
Maarten L. Hekkelman
8539e050ae add code to load default components.cif file 2021-05-21 09:25:20 +02:00
Maarten L. Hekkelman
5d4c2641f6 fix cmake file, now creates unit-test 2021-05-19 15:37:07 +02:00
Maarten L. Hekkelman
bc32071576 fix cmake file 2021-05-18 15:36:46 +02:00
Maarten L. Hekkelman
3b2c57314f initial cmake file 2021-05-18 13:54:03 +02:00
Maarten L. Hekkelman
36351a0608 fix for libc++, not using in_avail anymore 2021-05-17 10:38:06 +02:00
Maarten L. Hekkelman
047b454c1d disable download ccd in configure, for travis 2021-04-28 16:49:14 +02:00
Maarten L. Hekkelman
3f5e620102 loading compound info, finished with error checking and verbose mode 2021-04-28 16:18:08 +02:00
Maarten L. Hekkelman
851a43ba4b Alternative layering of compound factory object implementations 2021-04-26 16:52:18 +02:00
Maarten L. Hekkelman
47ae50f704 only load compound when really needed 2021-04-26 13:16:58 +02:00
Maarten L. Hekkelman
33bdd6ae82 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2021-04-21 16:03:07 +02:00
Maarten L. Hekkelman
433021727e count molecules 2021-04-21 16:02:22 +02:00
Maarten L. Hekkelman
7f23c84287 clean-up 2021-04-21 15:36:46 +02:00
Maarten L. Hekkelman
74bd2585e7 renaming, nonpoly_scheme 2021-04-21 14:41:06 +02:00
Maarten L. Hekkelman
ebb27638ff renaming compound code... 2021-04-21 14:20:36 +02:00
Maarten L. Hekkelman
3eb7e4c5bf Potential fix 2021-04-21 11:38:36 +02:00
Maarten L. Hekkelman
112f859b19 ms windows work 2021-04-21 08:32:10 +02:00
Maarten L. Hekkelman
fd08678ff6 backup 2021-04-20 17:18:15 +02:00
Maarten L. Hekkelman
2e2fc11fe1 renaming, first steps 2021-04-20 13:33:56 +02:00
Maarten L. Hekkelman
d44ed57cf9 load correct dictionary 2021-04-20 13:30:08 +02:00
Maarten L. Hekkelman
aa31acb056 no atomic? 2021-04-15 09:26:45 +02:00
Maarten L. Hekkelman
232003cb2e added sample PDB file 2021-04-14 17:31:30 +02:00
Maarten L. Hekkelman
db21dd1659 fix making, testing 2021-04-14 17:15:02 +02:00
Maarten L. Hekkelman
2f3279a5ff stand alone, write complete update scripts and config 2021-04-14 17:03:00 +02:00
Maarten L. Hekkelman
ab21317156 at least some tests should now work 2021-04-14 16:25:08 +02:00
Maarten L. Hekkelman
43cb312225 addFileResource added 2021-04-14 16:02:22 +02:00
Maarten L. Hekkelman
ce28cb7a48 Moved BondMap from libpdb-redo to libcifpp 2021-04-14 14:55:55 +02:00
Maarten L. Hekkelman
1e47fa557c indexed CCD file 2021-04-14 11:32:03 +02:00
Maarten L. Hekkelman
1ae3cf7b99 moving towards using CCD, parsing single datablock 2021-04-14 10:59:58 +02:00
Maarten L. Hekkelman
915d6504a2 First steps, remove CCP4 info from compound 2021-04-13 16:39:03 +02:00
Maarten L. Hekkelman
5e63ca7a82 Check for zlib/bzip2 linking 2021-04-13 14:13:25 +02:00
Maarten L. Hekkelman
e0777e74c2 minor updates 2021-04-08 09:26:10 +02:00
Maarten L. Hekkelman
cf465134fd fixing linux again 2021-03-10 16:46:10 +01:00
Maarten L. Hekkelman
873ac70d18 Build with MSVC 2021-03-10 14:10:25 +01:00
Maarten L. Hekkelman
9ca3c50c83 Build with MSVC 2021-03-10 14:10:02 +01:00
Maarten L. Hekkelman
cc1c7c39b1 do not build pdb2cif test for now 2021-03-10 10:37:45 +01:00
Maarten L. Hekkelman
2c6222984d link distance calculation 2021-03-10 08:46:14 +01:00
Maarten L. Hekkelman
58d7e17165 Add Anomalous remark 3 fields for phenix 2021-03-09 11:50:46 +01:00
Maarten L. Hekkelman
57618095bf fix LINK record (length position) 2021-03-09 09:09:33 +01:00
Maarten L. Hekkelman
492a1ad8ec LINK distance 2021-03-08 18:55:54 +01:00
Maarten L. Hekkelman
71da0ce345 fix in fixing distance for LINK records 2021-03-08 16:56:11 +01:00
Maarten L. Hekkelman
7dc574b534 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2021-03-05 11:50:53 +01:00
Maarten L. Hekkelman
ff8a7a1bfd for g++-11 2021-03-05 11:50:08 +01:00
Maarten L. Hekkelman
0f963eeab1 @travis 2021-02-22 19:22:45 +01:00
Maarten L. Hekkelman
8f02665e6d symop table generator, now requires output argument 2021-02-22 17:13:50 +01:00
Maarten L. Hekkelman
8604c3ab03 merged 2021-02-22 15:23:51 +01:00
Maarten L. Hekkelman
7a61129388 refactor configure 2021-02-22 15:21:47 +01:00
Maarten L. Hekkelman
516db3d8be for travis 2021-02-20 19:40:07 +01:00
Maarten L. Hekkelman
34bdc5056a fix parsing PDB files containing carriage returns 2021-02-20 19:23:14 +01:00
Maarten L. Hekkelman
9402fd5cf3 boost flags order 2021-02-17 20:54:27 +01:00
Maarten L. Hekkelman
a6faa5ce0c fix libcifpp.pc, ax_pkg_require is not implemented in this configure yet 2021-02-17 19:44:02 +01:00
Maarten L. Hekkelman
5735a70e47 using resources... 2021-02-17 15:27:05 +01:00
Maarten L. Hekkelman
8f997658d7 update configure to configure data directories 2021-02-17 11:28:50 +01:00
Maarten L. Hekkelman
5069316326 When converting pdb to cif, calculate distance for LINKR records 2021-02-16 09:55:54 +01:00
Maarten L. Hekkelman
143f17fb2a reorganize resources 2021-02-10 13:55:40 +01:00
Maarten L. Hekkelman
900d7fa07a moved dictionaries 2021-02-10 13:28:33 +01:00
Maarten L. Hekkelman
a4389542fd pic 2021-02-10 10:42:49 +01:00
Maarten L. Hekkelman
b7f4e40917 ouch... wrong type in verbose output generation 2021-02-09 16:37:21 +01:00
Maarten L. Hekkelman
a0447ba91c implemented missing function cif::Category::find1 2021-02-08 16:24:02 +01:00
Maarten L. Hekkelman
17c9d208ad revert back to boost::regex for validator since gnu std::regex implementation is recursive without proper checks... 2021-01-25 16:51:12 +01:00
Maarten L. Hekkelman
e89311fcff init compound factory in absence of CLIBD_MON 2021-01-18 14:25:27 +01:00
Maarten L. Hekkelman
bab750fa6c USE_RSRC is really needed 2021-01-12 11:33:39 +01:00
Maarten L. Hekkelman
0caaf23767 Thread local compound factory, for servers that manipulate the factory. 2021-01-12 09:37:38 +01:00
Maarten L. Hekkelman
c5676f1dfb accept compressed raw data in mmcif::File constructor 2021-01-12 09:36:43 +01:00
Maarten L. Hekkelman
86854581f8 Add contructor loading raw data to mmcif::File 2021-01-11 16:37:18 +01:00
Maarten L. Hekkelman
ef81944826 fix hang in pdb parsing with single line file 2021-01-11 16:36:30 +01:00
Maarten L. Hekkelman
b3b53ebcc0 travis job with clang 2021-01-11 13:19:52 +01:00
Maarten L. Hekkelman
3bc0384100 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2021-01-11 13:01:28 +01:00
Maarten L. Hekkelman
24b1c2ae58 remove Config.hpp from list of files to install 2021-01-11 13:01:20 +01:00
Maarten L. Hekkelman
c996bfcfaf added pthread flags 2021-01-11 13:00:06 +01:00
Maarten L. Hekkelman
3814635aa0 shared libraries, disable by default. Not my favourite, but hey, we need to build on MacOS as well... 2021-01-11 11:54:40 +01:00
Maarten L. Hekkelman
2b6c09cfad shared libraries, default action again 2021-01-11 11:19:34 +01:00
Maarten L. Hekkelman
9ee474081e search data_dir as well as cache_dir, for MacOS 2021-01-11 10:38:51 +01:00
Maarten L. Hekkelman
5bab298f74 search data_dir as well as cache_dir, for MacOS 2021-01-11 10:31:57 +01:00
Maarten L. Hekkelman
fd42b7f443 search data_dir as well as cache_dir, for MacOS 2021-01-11 10:28:49 +01:00
Maarten L. Hekkelman
edcea220f6 search data_dir as well as cache_dir, for MacOS 2021-01-11 10:12:14 +01:00
Maarten L. Hekkelman
a92e85f8db config.h file 2021-01-11 09:12:23 +01:00
Maarten L. Hekkelman
6fbe8bb192 refactored config file, avoid clashes in defines by autoconf 2021-01-11 08:54:49 +01:00
Maarten L. Hekkelman
5f45fd9b5b revert writing SOURCE_DATE_EPOCH since that is a Debian thing 2021-01-06 14:54:38 +01:00
Maarten L. Hekkelman
46925331c7 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2021-01-06 13:04:06 +01:00
Maarten L. Hekkelman
84123df996 changes from Debian included 2021-01-06 13:03:52 +01:00
drlemmus
641b446195 Update update-dictionary-script
Updated URL for the mmcif dictionary download to HTTPS.
2021-01-06 08:53:54 +01:00
Maarten L. Hekkelman
34bbf06793 update config.hpp.in, fix update script to write correct filename 2020-12-17 08:49:39 +01:00
Maarten L. Hekkelman
53a88e236b check for libatomic in configure 2020-12-16 08:51:03 +01:00
Maarten L. Hekkelman
c2069a12b4 stupid typo 2020-12-15 20:24:31 +01:00
Maarten L. Hekkelman
791ab245ca Fix makefile for renamed script 2020-12-15 19:15:00 +01:00
Maarten L. Hekkelman
43287736bb Revert renaming update script 2020-12-15 16:46:09 +01:00
Maarten L. Hekkelman
34ee3321d8 update makefile, to match debian 2020-12-15 16:37:53 +01:00
Maarten L. Hekkelman
6e433ae784 fix makefile for new configure script 2020-12-15 16:16:04 +01:00
Maarten L. Hekkelman
cd370275da Fix for 32-bit architectures 2020-12-15 15:47:41 +01:00
Maarten L. Hekkelman
7ed0f4c8ae clean up configure script 2020-12-02 15:21:17 +01:00
Maarten L. Hekkelman
ea7d8ce766 do not install the cron job by default 2020-11-19 16:27:21 +01:00
Maarten L. Hekkelman
f12e877251 for travis 2020-11-19 14:19:47 +01:00
Maarten L. Hekkelman
57ce90ce7c for travis 2020-11-19 14:18:54 +01:00
Maarten L. Hekkelman
b61321e360 for travis 2020-11-19 14:11:13 +01:00
Maarten L. Hekkelman
6416056958 disable resources on macOS 2020-11-19 13:39:14 +01:00
Maarten L. Hekkelman
4fdfd03c04 update clean target 2020-11-19 12:35:17 +01:00
Maarten L. Hekkelman
a1d2438341 and this one too 2020-11-19 12:27:53 +01:00
Maarten L. Hekkelman
952aa15d6e Fixed the --enable-revision argument for configure 2020-11-19 12:15:52 +01:00
Maarten L. Hekkelman
b3e45eb0b6 typo 2020-11-19 12:06:49 +01:00
Maarten L. Hekkelman
a548b39677 revision writing 2020-11-18 16:51:16 +01:00
Maarten L. Hekkelman
d5d96c58e4 resource loading 2020-11-18 08:35:23 +01:00
Maarten L. Hekkelman
9b61a06ef1 Revert "Update .travis.yml"
This reverts commit 7dc999ef39.
2020-11-17 19:24:40 +01:00
Maarten L. Hekkelman
3dbf19ac0b added #include <array> to fix building on FreeBSD 2020-11-17 19:19:54 +01:00
Maarten L. Hekkelman
7dc999ef39 Update .travis.yml
Seems the compiler on macOS is a bit out of date?
2020-11-17 16:06:24 +01:00
Maarten L. Hekkelman
697028b706 - remove mrc usage entirely
- added example as a simple test case
2020-11-17 15:48:59 +01:00
Maarten L. Hekkelman
c260fccdb5 versioning 2020-11-17 12:55:04 +01:00
Maarten L. Hekkelman
d9bf7c941b more fixes for rename 2020-11-17 11:32:32 +01:00
Maarten L. Hekkelman
af64151c60 rename libcif++ to libcifpp 2020-11-17 11:27:40 +01:00
Maarten L. Hekkelman
da94d65c6b prepare for 1.0.0 release 2020-11-17 10:33:25 +01:00
Maarten L. Hekkelman
d51340000f - fix new row iterators
- Residue constructor for sugars
2020-11-10 09:09:02 +01:00
Maarten L. Hekkelman
6f93fa3758 new conditional iterator type 2020-11-04 15:02:58 +01:00
Maarten L. Hekkelman
598f953cc1 writing sugar trees 2020-11-04 09:15:03 +01:00
Maarten L. Hekkelman
c39304281d sugar tree work in pdb2cif 2020-11-02 15:25:39 +01:00
Maarten L. Hekkelman
5e5e5c21ed sugar support, start 2020-11-02 08:47:07 +01:00
Maarten L. Hekkelman
299e270594 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2020-11-02 08:45:35 +01:00
Maarten L. Hekkelman
bd8a4e3639 new pdbx dictionary, clean up 2020-10-30 19:45:56 +01:00
Maarten L. Hekkelman
b3b5d05bfc boost is already installed at travis-ci 2020-10-27 14:30:09 +01:00
Maarten L. Hekkelman
268cefcb51 formal charge format in PDB fix 2020-10-27 10:44:55 +01:00
Maarten L. Hekkelman
f4e860bc2c iterate children, set property of atom 2020-10-27 09:19:24 +01:00
Maarten L. Hekkelman
b65aa46daa better symop table 2020-10-21 08:38:14 +02:00
Maarten L. Hekkelman
faef95a84d new layout of symop table to work around compiler alignment issues 2020-10-19 16:29:41 +02:00
Maarten L. Hekkelman
25dfdd2ff6 - alternates handling
- getResidue in structure
2020-10-14 15:46:16 +02:00
Maarten L. Hekkelman
88879a5de9 for usage with clipper 2020-10-13 15:56:34 +02:00
Maarten L. Hekkelman
52919f96a7 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2020-10-07 10:56:10 +02:00
Maarten L. Hekkelman
402505098a Work around limitation in travis-ci 2020-10-07 10:55:55 +02:00
Maarten L. Hekkelman
e8d8b8be60 do check makefile 2020-10-07 10:28:27 +02:00
Maarten Hekkelman
5ff7d01bd2 added missing #include <iomanip> 2020-10-07 10:20:33 +02:00
Maarten L. Hekkelman
3f8849680b export version number of lib 2020-10-07 10:00:26 +02:00
Maarten L. Hekkelman
e450fee020 Fixed processing links 2020-10-07 09:34:12 +02:00
Maarten L. Hekkelman
d7c162c71c links fixed, again 2020-10-06 16:55:53 +02:00
Maarten L. Hekkelman
f211fa4b5e oops 2020-10-06 16:51:05 +02:00
Maarten L. Hekkelman
c3963bc453 Fixed linked update (regression) 2020-10-06 16:32:36 +02:00
Maarten L. Hekkelman
23bd51ac9c removed ItemReference operator<< 2020-10-05 15:49:26 +02:00
Maarten L. Hekkelman
9e200b947e better check for filesystem 2020-10-05 13:48:00 +02:00
Maarten L. Hekkelman
2e661d5ff4 fixed compare 2020-10-05 13:23:28 +02:00
Maarten L. Hekkelman
766d5a4d7e case insensitve match, when defined by dictionary 2020-10-05 11:25:39 +02:00
Maarten L. Hekkelman
1f24937e65 simplify condtion code. 2020-10-05 10:38:01 +02:00
Maarten L. Hekkelman
aba35c3440 fix writing HETSYN records in PDB 2020-10-05 09:00:26 +02:00
Maarten L. Hekkelman
3a8fd0ebb1 remove requirement of a full CCP4 installation at runtime 2020-09-29 09:45:21 +02:00
Maarten L. Hekkelman
69a2d53277 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2020-09-28 16:42:24 +02:00
Maarten L. Hekkelman
0b66d67ef3 fixing git repository 2020-09-28 16:42:00 +02:00
Maarten L. Hekkelman
1f22326db6 global replace is evil and should be forbidden 2020-09-28 16:23:47 +02:00
Maarten L. Hekkelman
b4dfdb5515 Revert "Work around dropped categories _pdbx_item_linked_group_list and _pdbx_item_linked_group"
This reverts commit 71a46cd10e.
2020-09-28 16:23:22 +02:00
Maarten L. Hekkelman
25512340c8 Revert "Work around dropped categories _pdbx_item_linked_group_list and _pdbx_item_linked_group"
This reverts commit 71a46cd10e.
2020-09-28 15:35:35 +02:00
Maarten L. Hekkelman
d2d322ba30 unit test 2020-09-28 13:22:55 +02:00
Maarten L. Hekkelman
96a26eae4a Added a unit test 2020-09-28 11:48:56 +02:00
Maarten L. Hekkelman
71a46cd10e Work around dropped categories _pdbx_item_linked_group_list and _pdbx_item_linked_group 2020-09-28 10:50:50 +02:00
Maarten L. Hekkelman
013af6af46 update debian files 2020-09-28 09:49:50 +02:00
Maarten L. Hekkelman
1d79a9e915 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2020-09-28 09:03:27 +02:00
Maarten L. Hekkelman
28b4deaf32 added debian files 2020-09-28 09:01:55 +02:00
Maarten L. Hekkelman
694d93f50b Update README.md 2020-09-27 14:00:54 +02:00
Maarten L. Hekkelman
8fed9a1302 Remove version resource building
update gitignore
2020-09-27 13:51:29 +02:00
Maarten L. Hekkelman
4fa2d84374 add travis file 2020-09-27 13:25:07 +02:00
Maarten L. Hekkelman
0e43b81c34 remove boost::regex
add boost::iostreams to libs, for test
2020-09-27 13:24:04 +02:00
Maarten L. Hekkelman
eaf38e6353 Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2020-09-27 13:15:16 +02:00
Maarten L. Hekkelman
4012a71f6c Added travis ci file 2020-09-27 13:14:48 +02:00
Maarten L. Hekkelman
5a71dbecdc compatibility with libstdc++ (LLVM) and BSD install 2020-09-26 11:38:28 +02:00
Maarten L. Hekkelman
6675768ab4 reduce requirements for c++ compliance 2020-09-23 20:39:44 +02:00
Maarten L. Hekkelman
63256f09ad better test of compiler capabilities 2020-09-23 20:22:48 +02:00
Maarten L. Hekkelman
be54866ca4 install rules 2020-09-23 16:32:47 +02:00
Maarten L. Hekkelman
7329009fff install rules 2020-09-23 16:14:36 +02:00
Maarten L. Hekkelman
bb5d72a40e remove dependency on boost::timer 2020-09-23 14:44:05 +02:00
Maarten L. Hekkelman
85d68b93eb only build symop table if CCP4 is found 2020-09-23 14:12:25 +02:00
Maarten L. Hekkelman
360d970033 only build symop table if CCP4 is found 2020-09-23 14:10:40 +02:00
Maarten L. Hekkelman
87d87fe5b0 check configure? not always 2020-09-23 13:59:24 +02:00
Maarten L. Hekkelman
bdfde18728 libtool and install, part 3 2020-09-23 13:49:54 +02:00
Maarten L. Hekkelman
1d0b2b8c6e for libtool, part 2 2020-09-23 13:46:00 +02:00
Maarten L. Hekkelman
7e91ae334f for libtool 2020-09-23 13:40:30 +02:00
Maarten L. Hekkelman
515d16fe79 update for configure 2020-09-23 13:34:28 +02:00
Maarten L. Hekkelman
896a8ebbdc Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk 2020-09-23 13:32:41 +02:00
Maarten L. Hekkelman
ae23938aa4 with pre-created configure script 2020-09-23 13:31:30 +02:00
Maarten L. Hekkelman
4db05c20ae removed wrong files 2020-09-23 13:30:54 +02:00
Maarten L. Hekkelman
8bb0663313 added license file 2020-09-22 16:11:16 +02:00
Maarten L. Hekkelman
83b2c651f1 prune configure script 2020-09-22 15:56:46 +02:00
Maarten L. Hekkelman
e65a782f45 readme updated 2020-09-22 15:49:15 +02:00
Maarten L. Hekkelman
43f418ae67 remove fixdmc 2020-09-22 15:27:40 +02:00
Maarten L. Hekkelman
4b4757ee74 removed using namespace std; 2020-09-22 15:26:37 +02:00
Maarten L. Hekkelman
ac088dd0a4 Added a copy of the generated symop table 2020-09-22 14:09:13 +02:00
Maarten L. Hekkelman
31c86d9c8a stripped to remove dependency on clipper and CCP4 2020-09-22 14:04:22 +02:00
Maarten L. Hekkelman
85b08f9d77 Added license
clean up of code
2020-09-22 09:35:56 +02:00
Maarten L. Hekkelman
d50529c6b9 refactored cif::Key to work around weird g++ behaviour 2020-09-21 15:23:17 +02:00
Maarten L. Hekkelman
bb9d81616b alternative for find 2020-09-21 13:45:15 +02:00
Maarten L. Hekkelman
024607002e lower required boost version 2020-09-21 13:42:24 +02:00
Maarten L. Hekkelman
31ce161543 changed RowSet and added conditional_iterator for category 2020-09-09 13:00:41 +02:00
Maarten L. Hekkelman
359538e170 resources, fix progress' implementations move to std::thread 2020-09-08 16:27:31 +02:00
Maarten L. Hekkelman
d45ce5060d Merge branch 'oeps' into trunk 2020-09-08 11:35:53 +02:00
Maarten L. Hekkelman
6aebf8408f fix 2020-09-08 11:20:49 +02:00
Maarten L. Hekkelman
5ce8d87b19 add debug flag 2020-09-08 11:15:36 +02:00
Maarten L. Hekkelman
f815b8588a accept missing SS bonds 2020-09-08 10:55:29 +02:00
Maarten L. Hekkelman
e812e2e092 Merge branch 'trunk' of gitlab.rhpc.nki.nl:PDB_REDO/libcifpp into trunk 2020-09-02 15:54:59 +02:00
Maarten L. Hekkelman
43eda65dd8 WIP on trunk: 7c5bf01 calculate surface only when needed, added deuterium 2020-09-02 15:53:11 +02:00
Maarten L. Hekkelman
3a94384775 index on trunk: 7c5bf01 calculate surface only when needed, added deuterium 2020-09-02 15:53:11 +02:00
Maarten L. Hekkelman
7c5bf01090 calculate surface only when needed, added deuterium 2020-09-02 15:52:55 +02:00
Maarten L. Hekkelman
3f421f34a2 calculate surface only when needed 2020-09-02 15:50:15 +02:00
Maarten L. Hekkelman
c7f67525ec less verbose 2020-09-02 13:51:40 +02:00
Maarten L. Hekkelman
71a78813e1 Merge branch 'trunk' of gitlab.rhpc.nki.nl:PDB_REDO/libcifpp into trunk 2020-09-02 13:47:07 +02:00
Maarten L. Hekkelman
62face74ee catch when another thread is still running 2020-09-02 13:45:21 +02:00
Maarten L. Hekkelman
1cbfbd8f4f include cmath 2020-08-31 15:48:33 +02:00
Maarten L. Hekkelman
3928be4939 added empty() to DSSP 2020-08-24 14:22:47 +02:00
Maarten L. Hekkelman
a967154625 PP helix assignment 2020-08-24 11:36:58 +02:00
Maarten L. Hekkelman
78dd9a3c02 for PPII helices 2020-08-11 09:56:14 +02:00
Maarten L. Hekkelman
52d6b2eace pp helices, a start 2020-08-10 13:32:07 +02:00
Maarten L. Hekkelman
2ef9e6b843 version string 2020-08-04 11:02:16 +02:00
Maarten L. Hekkelman
48d9d22d70 version tracking 2020-08-03 22:05:32 +02:00
Maarten L. Hekkelman
a3d9bc01a0 UNK should have ATOMS and not HETATOMS... 2020-08-03 17:02:12 +02:00
Maarten L. Hekkelman
52ea0bc7fc added resource to lib 2020-08-03 13:18:32 +02:00
Maarten L. Hekkelman
ba804abb34 auto init rsrc loader 2020-08-03 13:18:06 +02:00
Maarten L. Hekkelman
6835a9808b changes for secondary structure calculations 2020-07-07 11:26:55 +02:00
Maarten L. Hekkelman
77fc4080c5 dssp work 2020-07-01 16:22:06 +02:00
Maarten L. Hekkelman
599d0cb537 Several fixes in DSSP code 2020-07-01 11:52:45 +02:00
Maarten L. Hekkelman
59a7ff68e0 - renaming header files
- atomByID now returns an empty atom
- operator bool for Atom
2020-07-01 08:49:54 +02:00
Maarten L. Hekkelman
80bb24f347 - changed Id to ID in all code
- change ItemReference::as to work with std::optional as well
- changed some DSSP related code
2020-06-30 13:05:36 +02:00
Maarten L. Hekkelman
34c7fd3f54 Changes for DSSP 2020-06-24 17:07:38 +02:00
Maarten L. Hekkelman
fac1eb915a support exporting pdb header lines 2020-06-23 10:50:02 +02:00
Maarten L. Hekkelman
3a84a8e6e8 typo fixed 2020-06-22 17:30:16 +02:00
Maarten L. Hekkelman
dd02b3633d externalize resources 2020-06-22 17:11:32 +02:00
Maarten L. Hekkelman
54728d49be replacing boost libraries with standard version, where possible 2020-06-22 13:05:49 +02:00
Maarten L. Hekkelman
2afddc23ff .pc file added 2020-06-22 09:58:08 +02:00
Maarten L. Hekkelman
700c1d408d clean and distclean 2020-06-17 15:54:31 +02:00
Maarten L. Hekkelman
e3297d0de6 with boost test, should be improved I'm afraid 2020-06-17 15:47:59 +02:00
Maarten L. Hekkelman
e4ced4caef configure work 2020-06-16 09:42:24 +02:00
Maarten L. Hekkelman
14e32bed28 new tribool rules? 2020-06-15 16:27:39 +02:00
Maarten L. Hekkelman
a3cf3343c9 configure script 2020-06-15 16:01:48 +02:00
maarten
188fa4e59c fixed crash definitively
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@525 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2020-03-10 14:09:59 +00:00
maarten
d053492a7c No longer crashing on rowset/find
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@523 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2020-03-10 10:31:05 +00:00
maarten
8d9f4f007b fix prepper (mmcif naming...)
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@521 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2020-03-09 08:16:00 +00:00
maarten
5084e7185a fix rama-angles (in fact, support X as atom type)
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@519 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2020-03-02 13:20:55 +00:00
maarten
11b4c1d399 small changes in prepper
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@518 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2020-02-18 07:55:55 +00:00
maarten
a56b6b136d prepper met HEM/HEC
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@517 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2020-02-17 11:09:33 +00:00
maarten
b6e8f79c1a - fix in prepper code
- added erase_nocascade

git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@515 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2020-02-12 16:35:44 +00:00
maarten
eda40b8eb6 fixed tortoize again
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@512 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2020-01-16 16:08:27 +00:00
maarten
c0c4be78f2 remove dummies
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@505 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-12-16 12:19:52 +00:00
maarten
513dbb6bfd speed up iterators
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@501 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-12-11 11:16:42 +00:00
maarten
77e0b3f776 symmetrie
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@498 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-12-04 13:58:23 +00:00
maarten
a7249eb2ca fix writing atom id in pdb files
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@497 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-11-25 13:51:15 +00:00
maarten
51aebf844a nieuwe symmetrie code
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@495 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-11-19 15:22:44 +00:00
maarten
38121e20f5 backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@494 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-11-13 13:58:45 +00:00
maarten
dbd826867f backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@493 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-11-12 20:17:26 +00:00
maarten
fe519e71a5 backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@492 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-11-11 13:57:43 +00:00
maarten
072882e005 fixed memory leak
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@491 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-11-11 08:45:29 +00:00
maarten
3ab625cb2b fixed hanging bug (infinite loop caused by invalid cell)
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@490 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-11-06 15:13:22 +00:00
maarten
2583975afd platonyzer werk
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@489 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-11-04 14:19:48 +00:00
maarten
9ec0eae41f backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@488 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-11-04 12:19:43 +00:00
maarten
f2449abb79 fix for negative numerator
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@487 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-10-31 09:39:02 +00:00
maarten
4a5312a648 symopnr
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@485 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-10-31 08:33:33 +00:00
maarten
9c636544b3 new Symmetry
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@484 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-10-30 18:16:24 +00:00
maarten
19714ecb0b platonyzer work, adding a symop table generator
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@483 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-10-30 13:58:49 +00:00
maarten
9fbd41aef9 rt werk
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@482 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-10-30 06:57:54 +00:00
maarten
c432ac4d7c backup platonyzer
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@481 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-10-29 12:33:52 +00:00
maarten
267302429c start platonyzer
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@479 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-10-28 10:44:04 +00:00
maarten
db164a2045 aanpassen van gelinkte data bij swapAtoms
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@478 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-10-14 13:12:46 +00:00
maarten
9b1e935628 met zonder resources, optioneel
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@475 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-09-17 13:11:09 +00:00
maarten
f1dfe12c24 address/port
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@472 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-09-17 07:19:25 +00:00
maarten
0bdda4610a stukje apple code weggehaald
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@469 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-09-09 11:03:56 +00:00
maarten
530d1110d9 met selectie outliers/etc
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@460 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-09-02 12:13:59 +00:00
maarten
d170c8da78 more fields added
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@454 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-08-26 18:17:00 +00:00
maarten
fe6d7a11ca don't crash on invalid sym operators, just drop them
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@453 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-08-21 07:41:34 +00:00
maarten
6082b11959 mmCQL klaar, voor nu
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@452 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-07-29 11:50:56 +00:00
maarten
01dbe675c8 a first select
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@451 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-07-29 10:09:02 +00:00
maarten
a825cfc687 fix remark 3 parser
cif-grep output zero when counted none

git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@445 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-07-24 09:01:51 +00:00
maarten
9f81a4ef89 Fix in remark 3 parsing
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@444 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-07-24 08:35:36 +00:00
maarten
c9f37c74b4 orphan handling, UNL in prepper
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@443 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-07-23 14:29:30 +00:00
maarten
ffd82dfee0 bouwen zonder versie
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@441 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-07-22 11:42:40 +00:00
maarten
6ef927cfa1 readme aangepast
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@439 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-07-22 10:47:06 +00:00
maarten
13a38fd011 fix pdb out (charge)
added options to cif-grep

git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@437 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-07-22 09:12:42 +00:00
maarten
f90990507a fix in sorting atoms in prepper
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@436 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-07-17 12:48:52 +00:00
maarten
55b1c56647 phenix
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@434 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-07-10 12:47:19 +00:00
maarten
1c009d481d readme aangepast
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@432 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-07-09 13:56:02 +00:00
maarten
42c72958a8 fix for buster-tnt parser
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@431 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-07-09 13:09:35 +00:00
maarten
13109f767a betere makefiles
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@430 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-07-09 12:34:44 +00:00
maarten
72e6708076 herverkaveling
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@428 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-07-09 08:36:29 +00:00
maarten
aaf25de2d0 reorganizing
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@426 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-07-08 12:57:58 +00:00
maarten
0cbb927b0f berekeningen gaan nu goed
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@420 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-07-03 09:37:03 +00:00
maarten
59f2387b68 backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@419 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-07-03 05:55:05 +00:00
maarten
6d07611e49 start met tortoize
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@415 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-07-01 13:09:42 +00:00
maarten
f7b12dedc0 fix in map-maker... oef
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@414 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-06-13 18:49:15 +00:00
maarten
2cce9e5379 clean up type, recompiling
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@412 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-05-20 12:12:30 +00:00
maarten
5e1fe8211a type rename for ints
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@411 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-05-20 11:47:43 +00:00
maarten
29ebdcf7d2 Cleanup with better C++
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@410 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-05-20 11:37:56 +00:00
maarten
4206f26699 Remove orphans
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@409 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-05-15 12:41:49 +00:00
maarten
dd7a4f1189 Terug naar boost::regex
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@408 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-05-13 18:23:47 +00:00
maarten
d1b3f08d5b When updating a value, all linked child elements are renamed too.
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@407 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-05-08 10:41:23 +00:00
maarten
9a7aeed632 static assert little endianness in mapmaker
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@406 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-05-07 13:57:01 +00:00
maarten
963d51bbcb masked maps
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@404 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-05-07 13:19:29 +00:00
maarten
ef7a6f8f9d Oeps, foutje in geheugen management cif categorie
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@403 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-05-06 11:29:31 +00:00
maarten
85aed9fb40 fix in authSeqID en vrienden
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@400 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-04-30 13:43:29 +00:00
maarten
603a0eca6b met map-maker server
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@393 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-04-30 11:42:04 +00:00
maarten
cccbfe025d map-maker
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@392 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-04-29 13:05:27 +00:00
maarten
778fa86410 silently ignore non-matching TLS group lines for REFMAC
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@391 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-04-18 16:56:13 +00:00
maarten
b4da5aeda3 x-plor guessing
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@390 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-04-18 15:08:12 +00:00
maarten
4ddfe65734 pas op, niet goed, werk aan erase in progress
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@389 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-04-10 07:21:18 +00:00
maarten
95d0d55715 diverse bugs, refactored links
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@388 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-04-08 13:06:12 +00:00
maarten
ec09e7ba57 diverse bug fixes
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@387 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-04-08 10:10:48 +00:00
maarten
74750a7940 drop remark 620
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@386 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-03-25 18:39:06 +00:00
maarten
0de2ae3673 tnt remark 3 output in pdb
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@385 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-03-25 18:37:24 +00:00
maarten
a236547a54 weer een pdb2cif fix
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@384 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-03-21 18:46:45 +00:00
maarten
8f12d15439 reflns mandatory fields
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@383 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-03-21 06:48:39 +00:00
maarten
535ea566de fix buster parser
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@382 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-03-21 06:38:31 +00:00
maarten
e095d3bf67 fix in pdb2cif code
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@380 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-03-09 05:27:07 +00:00
maarten
bb0562ebc1 accept empty lines in PDB
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@379 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-03-08 18:51:30 +00:00
maarten
ec4ea697dd versie nummering op de schop
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@376 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-02-27 10:30:36 +00:00
maarten
4918f572b4 nested werkt blijkbaar toch anders... jammmer
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@375 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-02-24 13:03:28 +00:00
maarten
0119c93aa0 handle empty compnd
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@374 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-02-23 16:59:58 +00:00
maarten
03c95609dd accept headerless files
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@373 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-02-23 07:55:56 +00:00
maarten
3650bc9269 incorrecte veldnamen
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@371 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-02-11 14:00:39 +00:00
maarten
25e2c38076 programma selectie v. REM3
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@370 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-02-11 13:28:23 +00:00
maarten
a265b97058 minder strikte kolom check
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@369 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-02-11 12:13:44 +00:00
maarten
fa7a0de6db aangepaste nonpoly_scheme
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@368 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-02-11 11:32:13 +00:00
maarten
996710728a water numbering scheme...
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@367 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-02-11 10:36:37 +00:00
maarten
b5f159c345 error handling
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@366 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-02-04 10:23:15 +00:00
maarten
958d1fb32c round trip fidelity
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@365 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-29 07:38:50 +00:00
maarten
28620841e0 prolsq/nuclsq
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@364 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-28 12:57:17 +00:00
maarten
e0d2c1328c met prolsq remark 3
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@362 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-25 20:23:21 +00:00
maarten
9a54b6b990 fix in validator (ucode vs code)
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@361 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-24 18:54:08 +00:00
maarten
e27908b9ee preforked server
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@358 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-21 13:23:39 +00:00
maarten
63fa06d656 preforked server
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@357 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-21 13:09:31 +00:00
maarten
9856d0de35 preforked server
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@355 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-21 12:18:48 +00:00
maarten
a2a1e63e06 alles doet 't weer
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@353 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-16 14:26:12 +00:00
maarten
a83fb55961 refactoring
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@351 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-16 14:11:01 +00:00
maarten
83965b9a7f snellere bondmap
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@349 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-16 13:31:24 +00:00
maarten
e5bd42b4c7 fix voor symmetrie
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@348 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-16 13:02:50 +00:00
maarten
602c770a45 betere distancemap
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@347 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-16 12:17:11 +00:00
maarten
64e2793c51 fixed symmetry issue
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@346 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-15 14:47:56 +00:00
maarten
51b6c7eb3f meer snelheid
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@345 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-15 14:34:47 +00:00
maarten
1cf8e7b72a pepflip werkt ook weer
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@344 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-15 10:06:20 +00:00
maarten
bc50f86836 some optimisations
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@343 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-15 08:46:58 +00:00
maarten
909a33c01a optimisation process started
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@342 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-14 14:02:33 +00:00
maarten
f66bd7fdaa backup -- met nieuwe Cif++ optimalisaties
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@341 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-09 14:06:10 +00:00
maarten
7f23427272 added rama-angles application
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@339 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-09 09:48:58 +00:00
maarten
4f2427e95c more timing stats for all
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@338 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-07 13:13:00 +00:00
maarten
1546cedb63 stats met json output
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@337 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-07 12:56:14 +00:00
maarten
d3bc435a2e met skip list
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@336 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-07 10:50:17 +00:00
maarten
4436ad4358 een beetje multithread code en een start met LINK support
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@334 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2019-01-02 14:06:30 +00:00
maarten
5af0296028 multithreaded pepflip
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@333 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-12-27 18:54:31 +00:00
maarten
b450cdf16d backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@326 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-12-19 13:53:06 +00:00
maarten
f96499cdd8 nieuwe dict, validate dict
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@325 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-12-19 09:54:23 +00:00
maarten
274120c9fc meer optimalisatie
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@324 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-12-19 06:57:16 +00:00
maarten
0d50305679 double support
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@322 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-12-18 16:47:53 +00:00
maarten
6999b7a12a werkende minimizer...
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@317 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-12-13 09:03:12 +00:00
maarten
8118c073c4 backup debuggen refinement
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@316 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-12-10 14:08:22 +00:00
maarten
27bda1c6a0 betere bondmap, nu wel
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@315 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-12-06 08:22:46 +00:00
maarten
7927a7c0dd cleanup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@313 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-12-05 15:56:31 +00:00
maarten
39ead681b0 bondmap, toch werkend?
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@312 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-12-05 15:23:57 +00:00
maarten
9c05120753 vergeten!
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@311 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-12-05 15:01:14 +00:00
maarten
91abdc568b werkend, maar niet threaded
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@308 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-12-04 13:54:12 +00:00
maarten
b1a6180a67 merged oud en nieuw
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@307 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-12-03 19:28:40 +00:00
maarten
a789367ee8 update
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@306 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-12-03 18:02:22 +00:00
maarten
61dbb7c137 betere output
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@305 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-12-03 14:07:07 +00:00
maarten
834a3a50f9 werk van vandaaag
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@304 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-11-26 14:09:14 +00:00
maarten
7a423aecf7 fix voor symmetrie
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@303 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-11-21 15:55:33 +00:00
maarten
91f7f9c238 werk
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@302 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-11-21 14:08:34 +00:00
maarten
af930b23a1 aniso density calculation
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@300 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-09-10 13:20:22 +00:00
maarten
8047cda02d grid scaling
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@299 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-09-05 13:20:51 +00:00
maarten
773137e852 betere stats (geen interpolatie cum.prob. als n <= 100)
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@297 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-09-03 09:29:31 +00:00
maarten
efd96f18b4 dict optie in stats
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@296 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-08-24 08:23:08 +00:00
maarten
deeac050f9 accept +Inf in pdb
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@295 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-07-31 08:44:32 +00:00
maarten
0ab408e1b3 accept +Inf in pdb
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@294 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-07-31 08:37:43 +00:00
maarten
521fafdd32 reflns_shell writing
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@293 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-07-30 11:48:46 +00:00
maarten
66fa88ec88 more diagnostics for Robbie
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@292 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-07-30 09:17:08 +00:00
maarten
4738d97948 validate compound type for residues to catch errors in (re-)numbering
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@291 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-07-11 12:43:44 +00:00
maarten
cc2f705a21 pdb id output for stats
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@289 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-07-10 13:22:11 +00:00
maarten
3d9200f990 laatste hand aan pepflip, voor het weekeinde dan. Met coloured voor gekleurde tekst naar een tty
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@287 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-06-27 12:50:44 +00:00
maarten
0af4eca14f restraints/minimizer fixes
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@286 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-06-26 14:01:45 +00:00
maarten
26dc7eaa08 progress improvement
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@285 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-06-20 13:16:44 +00:00
maarten
9b5ef7548f pepflip werk
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@284 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-06-20 13:14:52 +00:00
maarten
41b2231ee5 pepflip werk (met ramachandran scores)
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@283 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-06-19 13:25:41 +00:00
maarten
4a81e814c1 pepflipwerk
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@282 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-06-13 13:22:22 +00:00
maarten
ef6643122f pepflip work
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@281 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-06-12 14:27:57 +00:00
maarten
00742f65af werkende DSSP
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@280 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-06-06 17:13:17 +00:00
maarten
1da97e6ebb secondary structure calculation, started
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@279 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-06-06 13:05:22 +00:00
maarten
38a852ca20 backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@277 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-06-05 13:19:07 +00:00
maarten
a5b64ec805 split off statistics
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@275 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-06-04 13:09:29 +00:00
maarten
6dc402263a dropped newuoa
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@274 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-05-30 18:01:24 +00:00
maarten
54b8cb5c85 met torsions
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@272 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-05-30 12:05:21 +00:00
maarten
8bf1f01926 backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@270 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-05-29 12:45:06 +00:00
maarten
791d8bd98a werkende refinement
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@269 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-05-29 11:52:45 +00:00
maarten
5b963bf62f fix in point
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@268 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-05-29 06:20:30 +00:00
maarten
0dc6531395 betere distance berekening
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@266 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-05-28 10:53:14 +00:00
maarten
07617dcdac backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@265 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-05-23 12:54:37 +00:00
maarten
2a2172a9eb edia fix, missing code from check-in
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@264 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-05-23 05:56:56 +00:00
maarten
065ba540b2 faster distmap
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@257 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-05-14 12:30:33 +00:00
maarten
e19fc4f7b6 fix in compound factory
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@256 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-05-07 10:13:43 +00:00
maarten
db5540cfc6 refactored
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@255 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-05-07 06:08:16 +00:00
maarten
8aaf0d8813 backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@254 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-05-02 13:17:21 +00:00
maarten
e53c77cb7f fixed stupid atomtype indexing bug
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@253 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-05-02 11:54:55 +00:00
maarten
1a8d585b9c betere validatie
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@252 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-05-02 10:10:59 +00:00
maarten
d4de02a229 backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@251 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-05-01 14:25:48 +00:00
maarten
b60face9af refactored CompoundFactory
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@249 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-04-30 13:20:27 +00:00
maarten
91f8ac9edd refactored CompoundFactory
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@248 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-04-30 13:19:10 +00:00
maarten
6d12375691 refactored MapMaker
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@246 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-04-30 09:07:34 +00:00
maarten
924d37b603 fixed mtz-maker
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@245 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-04-30 08:42:43 +00:00
maarten
6105c1f6fd laatste fix map making
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@243 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-04-23 16:54:20 +00:00
maarten
93a96960f1 fix in map making...
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@242 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-04-23 16:25:15 +00:00
maarten
53091a321a backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@241 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-04-23 14:35:13 +00:00
maarten
eccf6c8855 refactored MapMaker
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@240 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-04-18 10:49:27 +00:00
maarten
2a2fefa80d betere mtzfix...
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@239 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-04-17 13:23:40 +00:00
maarten
65f17a7b26 re-ref en pep-shuffle
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@238 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-04-16 13:06:38 +00:00
maarten
d2716b4ea2 re-ref started
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@237 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-04-11 13:21:19 +00:00
maarten
9af1dabbe6 other clipper impl
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@236 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-04-11 06:58:27 +00:00
maarten
1e4e0638b7 less strict parsing
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@234 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-04-04 18:43:43 +00:00
maarten
724c834656 block out code
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@232 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-04-04 10:49:49 +00:00
maarten
ef4120fd72 el scf optie
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@231 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-03-28 08:47:59 +00:00
maarten
3a9688089c electron scattering support
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@230 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-03-28 07:08:40 +00:00
maarten
eb4f2f778f backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@228 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-03-26 10:49:07 +00:00
maarten
a793eb4c51 backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@226 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-03-19 14:15:04 +00:00
maarten
6f1a592cc9 OPIA score toegevoegd aan stats programma
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@225 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-03-19 07:51:11 +00:00
maarten
1242ce1d29 backup voor het weekend
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@224 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-03-14 14:18:01 +00:00
maarten
7dedb46d08 optimalisatie?
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@223 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-03-14 08:17:57 +00:00
maarten
0d18d47775 global ignore
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@222 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-03-12 13:00:34 +00:00
maarten
4664c0bbe0 edit ignore
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@221 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-03-12 12:58:02 +00:00
maarten
4543e34c00 edit ignore
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@220 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-03-12 12:57:42 +00:00
maarten
b85f340cc7 chiron, updates
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@219 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-03-07 07:51:16 +00:00
maarten
c73e18fc26 first attempt for chiron
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@218 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-02-20 14:50:05 +00:00
maarten
17034d3bff backup voor nu
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@217 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-02-20 10:48:52 +00:00
maarten
4632a41763 chiron started
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@215 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-02-19 14:16:37 +00:00
maarten
16492f6f5b refactored
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@213 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-02-14 11:00:07 +00:00
maarten
4362fd9d6e map maker, a start
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@212 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-02-14 10:04:39 +00:00
maarten
3b44209b15 stats met EDIAm
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@210 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-02-13 13:09:56 +00:00
maarten
679a15b637 fix in peptidedb
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@209 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-02-12 13:57:05 +00:00
maarten
b4a52806e6 bah ...
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@202 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-01-24 14:21:09 +00:00
maarten
311961e80b baby steps towards stats
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@194 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-01-17 12:41:27 +00:00
maarten
0e31a9b17f backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@193 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-01-15 16:29:57 +00:00
maarten
d3194f8cc0 backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@192 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-01-15 07:23:41 +00:00
maarten
2a9bb5752a stats werk
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@191 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-01-09 16:22:14 +00:00
maarten
821c1ef94f last fixes for pdb2cif
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@189 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-01-03 14:29:19 +00:00
maarten
ba0e39c0da fixed aligment in pdb2cif
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@188 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-01-03 12:12:33 +00:00
maarten
371c2aa876 better REFMAC-bug workarounds
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@186 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2018-01-02 08:58:34 +00:00
maarten
05ec536059 betere check op overflow van waarden in REMARK 3
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@185 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2017-12-20 11:27:31 +00:00
maarten
b0272a1cd5 removed warning
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@184 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2017-12-13 14:20:30 +00:00
maarten
caf80a0efe cif2map toegevoegd
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@183 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2017-12-13 07:33:31 +00:00
maarten
032138c4d3 accept corrupt remark 3 fields, improve cif-grep
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@182 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2017-12-11 10:50:44 +00:00
maarten
e0dc9f1c95 pdb2cif fixes
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@181 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2017-12-06 13:28:37 +00:00
maarten
c5d277fb43 refactored main using nested exceptions, fixed PDB parser a bit
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@180 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2017-12-06 08:47:46 +00:00
maarten
d0b7e21c77 various fixes in pdb2cif
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@179 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2017-12-04 14:19:10 +00:00
maarten
b1de54f8b1 backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@176 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2017-12-04 08:50:22 +00:00
maarten
141764edf5 update for edia
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@175 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2017-11-27 08:00:59 +00:00
maarten
00fe32b76f update for edia
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@174 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2017-11-27 08:00:41 +00:00
maarten
bfd74e6f01 update for edia
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@173 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2017-11-27 07:56:40 +00:00
maarten
edf132c4bd backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@172 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2017-11-22 14:18:36 +00:00
maarten
f3878b3760 backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@171 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2017-11-22 07:17:12 +00:00
maarten
6c93599687 renaming intermediate backup
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@170 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2017-11-21 16:03:25 +00:00
maarten
ca881b82b5 reshuffled files
git-svn-id: svn+ssh://gitlab/srv/svn-repos/pdb-redo/trunk@169 a1961a4f-ab94-4bcc-80e8-33b5a54de466
2017-11-21 12:42:48 +00:00
86 changed files with 370559 additions and 0 deletions

8
.gitignore vendored Normal file
View File

@@ -0,0 +1,8 @@
build/
.vscode/
.vs/
tools/update-libcifpp-data
data/components.cif*
CMakeSettings.json
msvc/
src/revision.hpp

6
.gitmodules vendored Normal file
View File

@@ -0,0 +1,6 @@
[submodule "regex"]
path = regex
url = https://github.com/boostorg/regex
[submodule "gxrio"]
path = gxrio
url = https://github.com/mhekkel/gxrio.git

466
CMakeLists.txt Normal file
View File

@@ -0,0 +1,466 @@
# SPDX-License-Identifier: BSD-2-Clause
# Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required(VERSION 3.16)
# set the project name
project(cifpp VERSION 5.0.2 LANGUAGES CXX)
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
include(GNUInstallDirs)
include(CheckFunctionExists)
include(CheckIncludeFiles)
include(CheckLibraryExists)
include(CMakePackageConfigHelpers)
include(CheckCXXSourceCompiles)
include(AddGitSubmodule)
set(CXX_EXTENSIONS OFF)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
elseif(MSVC)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
endif()
# Building shared libraries?
option(BUILD_SHARED_LIBS "Build a shared library instead of a static one" OFF)
# We do not want to write an export file for all our symbols...
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
# Optionally build a version to be installed inside CCP4
option(BUILD_FOR_CCP4 "Build a version to be installed in CCP4" OFF)
# Lots of code depend on the availability of the components.cif file
option(CIFPP_DOWNLOAD_CCD "Download the CCD file components.cif during installation" ON)
# An optional cron script can be installed to keep the data files up-to-date
if(UNIX)
option(CIFPP_INSTALL_UPDATE_SCRIPT "Install the script to update CCD and dictionary files" ON)
endif()
# When CCP4 is sourced in the environment, we can recreate the symmetry operations table
if(EXISTS "$ENV{CCP4}")
if(EXISTS "$ENV{CLIBD}/syminfo.lib")
option(CIFPP_RECREATE_SYMOP_DATA "Recreate SymOp data table in case it is out of date" ON)
else()
set(CIFPP_RECREATE_SYMOP_DATA OFF)
message(WARNING "Symop data table recreation requested, but file syminfo.lib was not found in $ENV{CLIBD}")
endif()
else()
set(CIFPP_RECREATE_SYMOP_DATA OFF)
message("Not trying to recreate symop_table_data.hpp since CCP4 is not defined")
endif()
# Unit tests
option(ENABLE_TESTING "Build test exectuables" OFF)
if(BUILD_FOR_CCP4)
if("$ENV{CCP4}" STREQUAL "" OR NOT EXISTS $ENV{CCP4})
message(FATAL_ERROR "A CCP4 built was requested but CCP4 was not sourced")
else()
list(APPEND CMAKE_MODULE_PATH "$ENV{CCP4}")
list(APPEND CMAKE_PREFIX_PATH "$ENV{CCP4}")
set(CMAKE_INSTALL_PREFIX "$ENV{CCP4}")
if(WIN32)
set(BUILD_SHARED_LIBS ON)
endif()
endif()
endif()
if(MSVC)
# make msvc standards compliant...
add_compile_options(/permissive-)
macro(get_WIN32_WINNT version)
if(WIN32 AND CMAKE_SYSTEM_VERSION)
set(ver ${CMAKE_SYSTEM_VERSION})
string(REPLACE "." "" ver ${ver})
string(REGEX REPLACE "([0-9])" "0\\1" ver ${ver})
set(${version} "0x${ver}")
endif()
endmacro()
get_WIN32_WINNT(ver)
add_definitions(-D_WIN32_WINNT=${ver})
endif()
# Libraries
# Start by finding out if std:regex is usable. Note that the current
# implementation in GCC is not acceptable, it crashes on long lines.
# The implementation in libc++ (clang) and MSVC seem to be OK.
check_cxx_source_compiles("
#include <iostream>
#ifndef __GLIBCXX__
#error
#endif
int main(int argc, char *argv[]) { return 0; }" GXX_LIBSTDCPP)
if(GXX_LIBSTDCPP)
message(STATUS "Testing for known regex bug, since you're using GNU libstdc++")
try_run(STD_REGEX_RUNNING STD_REGEX_COMPILING
${CMAKE_CURRENT_BINARY_DIR}/test ${PROJECT_SOURCE_DIR}/cmake/test-rx.cpp)
if(STD_REGEX_RUNNING STREQUAL FAILED_TO_RUN)
message(STATUS "You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will try to use boost::regex instead")
find_package(Boost COMPONENTS regex QUIET)
if(Boost_FOUND)
if(Boost_VERSION VERSION_GREATER_EQUAL 1.77)
set(BOOST_REGEX_STANDALONE ON)
set(BOOST_REGEX_INCLUDE ${Boost_INCLUDE_DIRS})
else()
list(APPEND CIFPP_REQUIRED_LIBRARIES Boost::regex)
set(BOOST_REGEX_SYSTEM ON)
endif()
else()
add_git_submodule("https://github.com/boostorg/regex" regex EXCLUDE_FROM_ALL)
set(BOOST_REGEX_STANDALONE ON)
set(BOOST_REGEX_INCLUDE regex/include)
endif()
endif()
endif()
set(CMAKE_THREAD_PREFER_PTHREAD)
set(THREADS_PREFER_PTHREAD_FLAG)
find_package(Threads)
find_package(ZLIB REQUIRED)
include(FindFilesystem)
list(APPEND CIFPP_REQUIRED_LIBRARIES ${STDCPPFS_LIBRARY})
include(FindAtomic)
list(APPEND CIFPP_REQUIRED_LIBRARIES ${STDCPPATOMIC_LIBRARY})
# Create a revision file, containing the current git version info
include(VersionString)
write_version_header(${PROJECT_SOURCE_DIR}/src/ "LibCIFPP")
# SymOp data table
if(CIFPP_RECREATE_SYMOP_DATA)
# The tool to create the table
add_executable(symop-map-generator "${PROJECT_SOURCE_DIR}/src/symop-map-generator.cpp")
add_custom_command(
OUTPUT ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
COMMAND $<TARGET_FILE:symop-map-generator> $ENV{CLIBD}/syminfo.lib ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
)
add_custom_target(
OUTPUT ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib"
)
endif()
# Sources
set(project_sources
${PROJECT_SOURCE_DIR}/src/category.cpp
${PROJECT_SOURCE_DIR}/src/condition.cpp
${PROJECT_SOURCE_DIR}/src/datablock.cpp
${PROJECT_SOURCE_DIR}/src/dictionary_parser.cpp
${PROJECT_SOURCE_DIR}/src/file.cpp
${PROJECT_SOURCE_DIR}/src/item.cpp
${PROJECT_SOURCE_DIR}/src/parser.cpp
${PROJECT_SOURCE_DIR}/src/row.cpp
${PROJECT_SOURCE_DIR}/src/validate.cpp
${PROJECT_SOURCE_DIR}/src/text.cpp
${PROJECT_SOURCE_DIR}/src/utilities.cpp
${PROJECT_SOURCE_DIR}/src/atom_type.cpp
${PROJECT_SOURCE_DIR}/src/compound.cpp
${PROJECT_SOURCE_DIR}/src/point.cpp
${PROJECT_SOURCE_DIR}/src/symmetry.cpp
${PROJECT_SOURCE_DIR}/src/model.cpp
${PROJECT_SOURCE_DIR}/src/pdb/cif2pdb.cpp
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif.cpp
${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.cpp
${PROJECT_SOURCE_DIR}/src/pdb/tls.cpp
)
set(project_headers
${PROJECT_SOURCE_DIR}/include/cif++.hpp
${PROJECT_SOURCE_DIR}/include/cif++/utilities.hpp
${PROJECT_SOURCE_DIR}/include/cif++/item.hpp
${PROJECT_SOURCE_DIR}/include/cif++/datablock.hpp
${PROJECT_SOURCE_DIR}/include/cif++/file.hpp
${PROJECT_SOURCE_DIR}/include/cif++/validate.hpp
${PROJECT_SOURCE_DIR}/include/cif++/list.hpp
${PROJECT_SOURCE_DIR}/include/cif++/iterator.hpp
${PROJECT_SOURCE_DIR}/include/cif++/parser.hpp
${PROJECT_SOURCE_DIR}/include/cif++/forward_decl.hpp
${PROJECT_SOURCE_DIR}/include/cif++/dictionary_parser.hpp
${PROJECT_SOURCE_DIR}/include/cif++/condition.hpp
${PROJECT_SOURCE_DIR}/include/cif++/category.hpp
${PROJECT_SOURCE_DIR}/include/cif++/row.hpp
${PROJECT_SOURCE_DIR}/include/cif++/atom_type.hpp
${PROJECT_SOURCE_DIR}/include/cif++/compound.hpp
${PROJECT_SOURCE_DIR}/include/cif++/point.hpp
${PROJECT_SOURCE_DIR}/include/cif++/symmetry.hpp
${PROJECT_SOURCE_DIR}/include/cif++/model.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/cif2pdb.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/io.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif_remark_3.hpp
${PROJECT_SOURCE_DIR}/include/cif++/pdb/tls.hpp
)
add_library(cifpp ${project_sources} ${project_headers} ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp)
add_library(cifpp::cifpp ALIAS cifpp)
set_target_properties(cifpp PROPERTIES POSITION_INDEPENDENT_CODE ON)
if(BOOST_REGEX_STANDALONE)
target_compile_definitions(cifpp PRIVATE USE_BOOST_REGEX=1 BOOST_REGEX_STANDALONE=1)
elseif(BOOST_REGEX_SYSTEM)
target_compile_definitions(cifpp PRIVATE USE_BOOST_REGEX=1)
endif()
target_include_directories(cifpp
PUBLIC
"$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
)
target_link_libraries(cifpp PUBLIC Threads::Threads ZLIB::ZLIB ${CIFPP_REQUIRED_LIBRARIES})
if(BOOST_REGEX_STANDALONE)
target_include_directories(cifpp PRIVATE ${BOOST_REGEX_INCLUDE})
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
endif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
if(CIFPP_DOWNLOAD_CCD)
# download the components.cif file from CCD
set(COMPONENTS_CIF ${PROJECT_SOURCE_DIR}/data/components.cif)
if(NOT EXISTS ${COMPONENTS_CIF})
if(NOT EXISTS ${PROJECT_SOURCE_DIR}/data)
file(MAKE_DIRECTORY ${PROJECT_SOURCE_DIR}/data/)
endif()
find_program(GUNZIP gunzip)
if(GUNZIP)
file(DOWNLOAD ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz ${COMPONENTS_CIF}.gz
SHOW_PROGRESS)
add_custom_command(OUTPUT ${COMPONENTS_CIF}
COMMAND ${GUNZIP} ${COMPONENTS_CIF}.gz
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/data/)
else()
file(DOWNLOAD ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif ${COMPONENTS_CIF}
SHOW_PROGRESS)
endif()
endif()
add_custom_target(COMPONENTS ALL DEPENDS ${COMPONENTS_CIF})
endif()
if(UNIX)
set(CIFPP_CACHE_DIR "/var/cache/libcifpp" CACHE STRING "The cache directory to use")
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
endif()
set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR})
set(LIBRARY_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR})
set(SHARE_INSTALL_DIR ${CMAKE_INSTALL_DATADIR}/libcifpp)
set(CIFPP_DATA_DIR "${CMAKE_INSTALL_PREFIX}/${SHARE_INSTALL_DIR}" CACHE STRING "The directory containing the provided data files")
target_compile_definitions(cifpp PUBLIC DATA_DIR="${CIFPP_DATA_DIR}")
# Install rules
install(TARGETS cifpp
EXPORT cifppTargets
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
if(MSVC AND BUILD_SHARED_LIBS)
install(
FILES $<TARGET_PDB_FILE:${PROJECT_NAME}>
DESTINATION ${CMAKE_INSTALL_LIBDIR}
OPTIONAL)
endif()
install(EXPORT cifppTargets
FILE "cifppTargets.cmake"
NAMESPACE cifpp::
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
)
install(
DIRECTORY include/cif++
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
COMPONENT Devel
)
install(
FILES include/cif++.hpp
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
COMPONENT Devel
)
install(FILES
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic
${COMPONENTS_CIF}
DESTINATION ${CIFPP_DATA_DIR}
)
if(BOOST_REGEX_STANDALONE)
set(CONFIG_TEMPLATE_FILE ${PROJECT_SOURCE_DIR}/cmake/cifppConfig.cmake.in)
else()
set(CONFIG_TEMPLATE_FILE ${PROJECT_SOURCE_DIR}/cmake/cifppConfig-boost-regex.cmake.in)
endif()
configure_package_config_file(
${CONFIG_TEMPLATE_FILE}
${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfig.cmake
INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
PATH_VARS INCLUDE_INSTALL_DIR LIBRARY_INSTALL_DIR SHARE_INSTALL_DIR
)
install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfig.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfigVersion.cmake"
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
COMPONENT Devel
)
set(cifpp_MAJOR_VERSION ${CMAKE_PROJECT_VERSION_MAJOR})
set_target_properties(cifpp PROPERTIES
VERSION ${PROJECT_VERSION}
SOVERSION ${cifpp_MAJOR_VERSION}
INTERFACE_cifpp_MAJOR_VERSION ${cifpp_MAJOR_VERSION})
set_property(TARGET cifpp APPEND PROPERTY
COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION
)
write_basic_package_version_file(
"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifppConfigVersion.cmake"
VERSION ${PROJECT_VERSION}
COMPATIBILITY AnyNewerVersion
)
# pkgconfig support
set(prefix ${CMAKE_INSTALL_PREFIX})
set(exec_prefix ${CMAKE_INSTALL_PREFIX})
set(libdir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
set(includedir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})
configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libcifpp.pc.in
${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc.in @ONLY)
file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc
INPUT ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc.in)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
if(ENABLE_TESTING)
enable_testing()
find_package(Boost REQUIRED)
list(APPEND CIFPP_tests unit-v2 unit-3d format model rename-compound sugar
)
foreach(CIFPP_TEST IN LISTS CIFPP_tests)
set(CIFPP_TEST "${CIFPP_TEST}-test")
set(CIFPP_TEST_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/test/${CIFPP_TEST}.cpp")
add_executable(${CIFPP_TEST} ${CIFPP_TEST_SOURCE})
target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp::cifpp Boost::boost)
if(MSVC)
# Specify unwind semantics so that MSVC knowns how to handle exceptions
target_compile_options(${CIFPP_TEST} PRIVATE /EHsc)
endif()
add_custom_target("run-${CIFPP_TEST}" DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch ${CIFPP_TEST})
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch
COMMAND $<TARGET_FILE:${CIFPP_TEST}> -- ${PROJECT_SOURCE_DIR}/test)
add_test(NAME ${CIFPP_TEST}
COMMAND $<TARGET_FILE:${CIFPP_TEST}> -- ${PROJECT_SOURCE_DIR}/test)
endforeach()
endif()
message("Will install in ${CMAKE_INSTALL_PREFIX}")
# Optionally install the update scripts for CCD and dictionary files
if(CIFPP_INSTALL_UPDATE_SCRIPT)
set(CIFPP_CRON_DIR "$ENV{DESTDIR}/etc/cron.weekly")
configure_file(${PROJECT_SOURCE_DIR}/tools/update-libcifpp-data.in update-libcifpp-data @ONLY)
install(
FILES ${CMAKE_CURRENT_BINARY_DIR}/update-libcifpp-data
DESTINATION ${CIFPP_CRON_DIR}
PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ
)
install(DIRECTORY DESTINATION ${CIFPP_CACHE_DIR})
install(DIRECTORY DESTINATION "$ENV{DESTDIR}/etc/libcifpp/cache-update.d")
# a config to, to make it complete
if(NOT EXISTS "$ENV{DESTDIR}/etc/libcifpp.conf")
file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf [[# Uncomment the next line to enable automatic updates
# update=true
]])
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf DESTINATION "$ENV{DESTDIR}/etc")
install(CODE "message(\"A configuration file has been written to $ENV{DESTDIR}/etc/libcifpp.conf, please edit this file to enable automatic updates\")")
endif()
target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
endif()
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
set(CPACK_SOURCE_TGZ ON)
set(CPACK_SOURCE_TBZ2 OFF)
set(CPACK_SOURCE_TXZ OFF)
set(CPACK_SOURCE_TZ OFF)
set(CPACK_SOURCE_IGNORE_FILES "/data/components.cif;/build;/.vscode;/.git;/regex")
set(CPACK_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PROJECT_VERSION}")
set(CPACK_SOURCE_PACKAGE_FILE_NAME ${CPACK_PACKAGE_FILE_NAME})
include(CPack)

24
LICENSE Normal file
View File

@@ -0,0 +1,24 @@
BSD-2-Clause License
Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

46
README.md Normal file
View File

@@ -0,0 +1,46 @@
libcifpp
========
This library contains code to work with mmCIF and PDB files.
Requirements
------------
The code for this library was written in C++17. You therefore need a
recent compiler to build it. For the development gcc 9.3 and clang 9.0
have been used as well as MSVC version 2019.
Other requirements are:
- [mrc](https://github.com/mhekkel/mrc), a resource compiler that
allows including data files into the executable making them easier to
install. Strictly this is optional, but at the expense of functionality.
Building
--------
This library uses [cmake](https://cmake.org). The usual way of building
and installing is to create a `build` directory and run cmake there.
On linux e.g. you would issue the following commands:
```
git clone https://github.com/PDB-REDO/libcifpp.git
cd libcifpp
mkdir build
cd build
cmake ..
cmake --build . --config Release
ctest -C Release
cmake --install .
```
This checks out the source code from github, creates a new directory
where cmake stores its files. Run a configure, build the code and run
tests. And then it installs the library and auxiliary files.
The default is to install everything in `$HOME/.local` on Linux and
`%LOCALAPPDATA%` on Windows (the AppData/Local folder in your home directory).
You can change this by specifying the prefix with the
[CMAKE_INSTALL_PREFIX](https://cmake.org/cmake/help/v3.21/variable/CMAKE_INSTALL_PREFIX.html)
variable.

99
changelog Normal file
View File

@@ -0,0 +1,99 @@
Version 5.0.2
- Fix export of CISPEP records in PDB format
- Better support for exporting package_source
Version 5.0.1
- Fix loading dictionaries
- Support for cifv1.0 files
Version 5.0.0
- Total rewrite of cif part
- Removed DSSP code, moved into dssp project itself
Version 4.2.1
- Improved REMARK 3 parser (for TLS in large molecules)
Version 4.2.0
- Yet another rewrite of resource loading
Version 4.1.1
- Fall back to zero charge for scattering factors if the atom
was not found in the table.
- Improve code to locate resources, failing less.
Version 4.1.0
- Some interface changes for mmcif::Atom
Version 4.0.1
- Added a bunch of const methods to Datablock and Category.
- Changed PDB writing interface to accept Datablock instead of File.
Version 4.0.0
- getResidue in mmcif::Structure now requires both a
sequence ID and an auth sequence ID. As a result the code was cleaned
up considerably.
Version 3.0.5
- mmcif::Structure redesign. It is now a wrapper around a cif::Datablock.
Version 3.0.4
- Fix in mmCIF parser, now correctly handles the unquoted
string ??
Version 3.0.3
- Better configuration checks, for atomic e.g.
- Fixed a problem introduced in refactoring mmcif::Atom
- Version string creation
Version 3.0.2
- refactored mmcif::Atom for performance reasons
Version 3.0.1
- Fixed processing of proline restraints file from CCP4, proline
is a peptide, really.
- Added code to facilitate DSSP
Version 3.0.0
- Replaced many strings in the API with string_view for
performance reasons.
- Upgraded mmcif::Structure
- various other small fixes
Version 2.0.5
- Backporting updated CMakeLists.txt file
Version 2.0.4
- Reverted a too strict test when reading cif files.
Version 2.0.3
- Fixed reading mmCIF files where model numbers are used and
model number 1 is missing.
Version 2.0.2
- Added configuration flag to disable downloading CCD data during build
Note that there are now two flags for CCD data:
DOWNLOAD_CCD to enable downloading during build
INSTALL_UPDATE_SCRIPT to install an update mechanism for this file
- Updated unit tests to work even if no CCD data is available
Version 2.0.1
- Fixed the generator for the symmetry operator table
Version 2.0.0
- New API interface for accessing query results
- Removed bzip2 support
- improved makefiles
Version 1.1.1
- Now with full support for MS Windows
Version 1.1.0
- Changed from GNU configure to CMake.
- Loading compound information from CCD file
Version 1.0.1
- Changed the way resources are looked up, local dir first,
then /var/cache and finally compiled in resources (with mrc).
Version 1.0.0
- First public release

View File

@@ -0,0 +1,40 @@
cmake_minimum_required(VERSION 3.16..3.19)
function(add_git_submodule repo dir)
# add a Git submodule directory to CMake, assuming the
# Git submodule directory is a CMake project.
#
# Usage: in CMakeLists.txt
#
# include(AddGitSubmodule.cmake)
# add_git_submodule(mysubmod_dir)
find_package(Git QUIET)
if(NOT EXISTS "${PROJECT_SOURCE_DIR}/${dir}/CMakeLists.txt")
if(NOT(GIT_FOUND))
message(FATAL_ERROR "${CMAKE_CURRENT_SOURCE_DIR} is not a git repository and the submodule ${dir} is not complete. Cannot continue.")
elseif(IS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/.git") # We're in a git repo, we can use submodules
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.19)
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive -- ${dir}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMAND_ERROR_IS_FATAL ANY)
else()
execute_process(COMMAND ${GIT_EXECUTABLE} submodule update --init --recursive -- ${dir}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
endif()
else()
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.19)
execute_process(COMMAND ${GIT_EXECUTABLE} clone "${repo}" --recursive -- ${dir}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMAND_ERROR_IS_FATAL ANY)
else()
execute_process(COMMAND ${GIT_EXECUTABLE} clone "${repo}" --recursive -- ${dir}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
endif()
endif()
endif()
set(ENABLE_TESTING OFF)
add_subdirectory(${dir} ${ARGV2})
endfunction(add_git_submodule)

62
cmake/FindAtomic.cmake Normal file
View File

@@ -0,0 +1,62 @@
# Simple check to see if we need a library for std::atomic
if(TARGET std::atomic)
return()
endif()
cmake_minimum_required(VERSION 3.10)
include(CMakePushCheckState)
include(CheckIncludeFileCXX)
include(CheckCXXSourceRuns)
cmake_push_check_state()
check_include_file_cxx("atomic" _CXX_ATOMIC_HAVE_HEADER)
mark_as_advanced(_CXX_ATOMIC_HAVE_HEADER)
set(code [[
#include <atomic>
int main(int argc, char** argv) {
std::atomic<long long> s;
++s;
return 0;
}
]])
check_cxx_source_runs("${code}" _CXX_ATOMIC_BUILTIN)
if(_CXX_ATOMIC_BUILTIN)
set(_found 1)
else()
list(APPEND CMAKE_REQUIRED_LIBRARIES atomic)
list(APPEND FOLLY_LINK_LIBRARIES atomic)
check_cxx_source_runs("${code}" _CXX_ATOMIC_LIB_NEEDED)
if (NOT _CXX_ATOMIC_LIB_NEEDED)
message(FATAL_ERROR "unable to link C++ std::atomic code: you may need \
to install GNU libatomic")
else()
set(_found 1)
endif()
endif()
if(_found)
add_library(std::atomic INTERFACE IMPORTED)
set_property(TARGET std::atomic APPEND PROPERTY INTERFACE_COMPILE_FEATURES cxx_std_14)
if(_CXX_ATOMIC_BUILTIN)
# Nothing to add...
elseif(_CXX_ATOMIC_LIB_NEEDED)
set_target_properties(std::atomic PROPERTIES IMPORTED_LIBNAME atomic)
set(STDCPPATOMIC_LIBRARY atomic)
endif()
endif()
cmake_pop_check_state()
set(Atomic_FOUND ${_found} CACHE BOOL "TRUE if we can run a program using std::atomic" FORCE)
if(Atomic_FIND_REQUIRED AND NOT Atomic_FOUND)
message(FATAL_ERROR "Cannot run simple program using std::atomic")
endif()

View File

@@ -0,0 +1,74 @@
# Simplistic reimplementation of https://github.com/vector-of-bool/CMakeCM/blob/master/modules/FindFilesystem.cmake
if(TARGET std::filesystem)
return()
endif()
cmake_minimum_required(VERSION 3.10)
include(CMakePushCheckState)
include(CheckIncludeFileCXX)
include(CheckCXXSourceCompiles)
cmake_push_check_state()
check_include_file_cxx("filesystem" _CXX_FILESYSTEM_HAVE_HEADER)
mark_as_advanced(_CXX_FILESYSTEM_HAVE_HEADER)
set(code [[
#include <cstdlib>
#include <filesystem>
int main() {
auto cwd = std::filesystem::current_path();
return EXIT_SUCCESS;
}
]])
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS_EQUAL 8.4.0)
# >> https://stackoverflow.com/questions/63902528/program-crashes-when-filesystempath-is-destroyed
set(CXX_FILESYSTEM_NO_LINK_NEEDED 0)
else()
# Check a simple filesystem program without any linker flags
check_cxx_source_compiles("${code}" CXX_FILESYSTEM_NO_LINK_NEEDED)
endif()
if(CXX_FILESYSTEM_NO_LINK_NEEDED)
set(_found 1)
else()
set(prev_libraries ${CMAKE_REQUIRED_LIBRARIES})
# Add the libstdc++ flag
set(CMAKE_REQUIRED_LIBRARIES ${prev_libraries} -lstdc++fs)
check_cxx_source_compiles("${code}" CXX_FILESYSTEM_STDCPPFS_NEEDED)
set(_found ${CXX_FILESYSTEM_STDCPPFS_NEEDED})
if(NOT CXX_FILESYSTEM_STDCPPFS_NEEDED)
# Try the libc++ flag
set(CMAKE_REQUIRED_LIBRARIES ${prev_libraries} -lc++fs)
check_cxx_source_compiles("${code}" CXX_FILESYSTEM_CPPFS_NEEDED)
set(_found ${CXX_FILESYSTEM_CPPFS_NEEDED})
endif()
endif()
if(_found)
add_library(std::filesystem INTERFACE IMPORTED)
set_property(TARGET std::filesystem APPEND PROPERTY INTERFACE_COMPILE_FEATURES cxx_std_17)
if(CXX_FILESYSTEM_NO_LINK_NEEDED)
# Nothing to add...
elseif(CXX_FILESYSTEM_STDCPPFS_NEEDED)
set_target_properties(std::filesystem PROPERTIES IMPORTED_LIBNAME stdc++fs)
set(STDCPPFS_LIBRARY stdc++fs)
elseif(CXX_FILESYSTEM_CPPFS_NEEDED)
set_target_properties(std::filesystem PROPERTIES IMPORTED_LIBNAME c++fs)
set(STDCPPFS_LIBRARY c++fs)
endif()
endif()
cmake_pop_check_state()
set(Filesystem_FOUND ${_found} CACHE BOOL "TRUE if we can run a program using std::filesystem" FORCE)
if(Filesystem_FIND_REQUIRED AND NOT Filesystem_FOUND)
message(FATAL_ERROR "Cannot run simple program using std::filesystem")
endif()

View File

@@ -0,0 +1,284 @@
# - Returns a version string from Git
#
# These functions force a re-configure on each git commit so that you can
# trust the values of the variables in your build system.
#
# get_git_head_revision(<refspecvar> <hashvar> [ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR])
#
# Returns the refspec and sha hash of the current head revision
#
# git_describe(<var> [<additional arguments to git describe> ...])
#
# Returns the results of git describe on the source tree, and adjusting
# the output so that it tests false if an error occurs.
#
# git_describe_working_tree(<var> [<additional arguments to git describe> ...])
#
# Returns the results of git describe on the working tree (--dirty option),
# and adjusting the output so that it tests false if an error occurs.
#
# git_get_exact_tag(<var> [<additional arguments to git describe> ...])
#
# Returns the results of git describe --exact-match on the source tree,
# and adjusting the output so that it tests false if there was no exact
# matching tag.
#
# git_local_changes(<var>)
#
# Returns either "CLEAN" or "DIRTY" with respect to uncommitted changes.
# Uses the return code of "git diff-index --quiet HEAD --".
# Does not regard untracked files.
#
# Requires CMake 2.6 or newer (uses the 'function' command)
#
# Original Author:
# 2009-2020 Ryan Pavlik <ryan.pavlik@gmail.com> <abiryan@ryand.net>
# http://academic.cleardefinition.com
#
# Copyright 2009-2013, Iowa State University.
# Copyright 2013-2020, Ryan Pavlik
# Copyright 2013-2020, Contributors
# SPDX-License-Identifier: BSL-1.0
# Distributed under the Boost Software License, Version 1.0.
# (See accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt)
if(__get_git_revision_description)
return()
endif()
set(__get_git_revision_description YES)
# We must run the following at "include" time, not at function call time,
# to find the path to this module rather than the path to a calling list file
get_filename_component(_gitdescmoddir ${CMAKE_CURRENT_LIST_FILE} PATH)
# Function _git_find_closest_git_dir finds the next closest .git directory
# that is part of any directory in the path defined by _start_dir.
# The result is returned in the parent scope variable whose name is passed
# as variable _git_dir_var. If no .git directory can be found, the
# function returns an empty string via _git_dir_var.
#
# Example: Given a path C:/bla/foo/bar and assuming C:/bla/.git exists and
# neither foo nor bar contain a file/directory .git. This wil return
# C:/bla/.git
#
function(_git_find_closest_git_dir _start_dir _git_dir_var)
set(cur_dir "${_start_dir}")
set(git_dir "${_start_dir}/.git")
while(NOT EXISTS "${git_dir}")
# .git dir not found, search parent directories
set(git_previous_parent "${cur_dir}")
get_filename_component(cur_dir "${cur_dir}" DIRECTORY)
if(cur_dir STREQUAL git_previous_parent)
# We have reached the root directory, we are not in git
set(${_git_dir_var}
""
PARENT_SCOPE)
return()
endif()
set(git_dir "${cur_dir}/.git")
endwhile()
set(${_git_dir_var}
"${git_dir}"
PARENT_SCOPE)
endfunction()
function(get_git_head_revision _refspecvar _hashvar)
_git_find_closest_git_dir("${CMAKE_CURRENT_SOURCE_DIR}" GIT_DIR)
if("${ARGN}" STREQUAL "ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR")
set(ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR TRUE)
else()
set(ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR FALSE)
endif()
if(NOT "${GIT_DIR}" STREQUAL "")
file(RELATIVE_PATH _relative_to_source_dir "${CMAKE_SOURCE_DIR}"
"${GIT_DIR}")
if("${_relative_to_source_dir}" MATCHES "[.][.]" AND NOT ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR)
# We've gone above the CMake root dir.
set(GIT_DIR "")
endif()
endif()
if("${GIT_DIR}" STREQUAL "")
set(${_refspecvar}
"GITDIR-NOTFOUND"
PARENT_SCOPE)
set(${_hashvar}
"GITDIR-NOTFOUND"
PARENT_SCOPE)
return()
endif()
# Check if the current source dir is a git submodule or a worktree.
# In both cases .git is a file instead of a directory.
#
if(NOT IS_DIRECTORY ${GIT_DIR})
# The following git command will return a non empty string that
# points to the super project working tree if the current
# source dir is inside a git submodule.
# Otherwise the command will return an empty string.
#
execute_process(
COMMAND "${GIT_EXECUTABLE}" rev-parse
--show-superproject-working-tree
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
OUTPUT_VARIABLE out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT "${out}" STREQUAL "")
# If out is empty, GIT_DIR/CMAKE_CURRENT_SOURCE_DIR is in a submodule
file(READ ${GIT_DIR} submodule)
string(REGEX REPLACE "gitdir: (.*)$" "\\1" GIT_DIR_RELATIVE
${submodule})
string(STRIP ${GIT_DIR_RELATIVE} GIT_DIR_RELATIVE)
get_filename_component(SUBMODULE_DIR ${GIT_DIR} PATH)
get_filename_component(GIT_DIR ${SUBMODULE_DIR}/${GIT_DIR_RELATIVE}
ABSOLUTE)
set(HEAD_SOURCE_FILE "${GIT_DIR}/HEAD")
else()
# GIT_DIR/CMAKE_CURRENT_SOURCE_DIR is in a worktree
file(READ ${GIT_DIR} worktree_ref)
# The .git directory contains a path to the worktree information directory
# inside the parent git repo of the worktree.
#
string(REGEX REPLACE "gitdir: (.*)$" "\\1" git_worktree_dir
${worktree_ref})
string(STRIP ${git_worktree_dir} git_worktree_dir)
_git_find_closest_git_dir("${git_worktree_dir}" GIT_DIR)
set(HEAD_SOURCE_FILE "${git_worktree_dir}/HEAD")
endif()
else()
set(HEAD_SOURCE_FILE "${GIT_DIR}/HEAD")
endif()
set(GIT_DATA "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/git-data")
if(NOT EXISTS "${GIT_DATA}")
file(MAKE_DIRECTORY "${GIT_DATA}")
endif()
if(NOT EXISTS "${HEAD_SOURCE_FILE}")
return()
endif()
set(HEAD_FILE "${GIT_DATA}/HEAD")
configure_file("${HEAD_SOURCE_FILE}" "${HEAD_FILE}" COPYONLY)
configure_file("${_gitdescmoddir}/GetGitRevisionDescription.cmake.in"
"${GIT_DATA}/grabRef.cmake" @ONLY)
include("${GIT_DATA}/grabRef.cmake")
set(${_refspecvar}
"${HEAD_REF}"
PARENT_SCOPE)
set(${_hashvar}
"${HEAD_HASH}"
PARENT_SCOPE)
endfunction()
function(git_describe _var)
if(NOT GIT_FOUND)
find_package(Git QUIET)
endif()
get_git_head_revision(refspec hash)
if(NOT GIT_FOUND)
set(${_var}
"GIT-NOTFOUND"
PARENT_SCOPE)
return()
endif()
if(NOT hash)
set(${_var}
"HEAD-HASH-NOTFOUND"
PARENT_SCOPE)
return()
endif()
# TODO sanitize
#if((${ARGN}" MATCHES "&&") OR
# (ARGN MATCHES "||") OR
# (ARGN MATCHES "\\;"))
# message("Please report the following error to the project!")
# message(FATAL_ERROR "Looks like someone's doing something nefarious with git_describe! Passed arguments ${ARGN}")
#endif()
#message(STATUS "Arguments to execute_process: ${ARGN}")
execute_process(
COMMAND "${GIT_EXECUTABLE}" describe --tags --always ${hash} ${ARGN}
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
RESULT_VARIABLE res
OUTPUT_VARIABLE out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT res EQUAL 0)
set(out "${out}-${res}-NOTFOUND")
endif()
set(${_var}
"${out}"
PARENT_SCOPE)
endfunction()
function(git_describe_working_tree _var)
if(NOT GIT_FOUND)
find_package(Git QUIET)
endif()
if(NOT GIT_FOUND)
set(${_var}
"GIT-NOTFOUND"
PARENT_SCOPE)
return()
endif()
execute_process(
COMMAND "${GIT_EXECUTABLE}" describe --dirty ${ARGN}
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
RESULT_VARIABLE res
OUTPUT_VARIABLE out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT res EQUAL 0)
set(out "${out}-${res}-NOTFOUND")
endif()
set(${_var}
"${out}"
PARENT_SCOPE)
endfunction()
function(git_get_exact_tag _var)
git_describe(out --exact-match ${ARGN})
set(${_var}
"${out}"
PARENT_SCOPE)
endfunction()
function(git_local_changes _var)
if(NOT GIT_FOUND)
find_package(Git QUIET)
endif()
get_git_head_revision(refspec hash)
if(NOT GIT_FOUND)
set(${_var}
"GIT-NOTFOUND"
PARENT_SCOPE)
return()
endif()
if(NOT hash)
set(${_var}
"HEAD-HASH-NOTFOUND"
PARENT_SCOPE)
return()
endif()
execute_process(
COMMAND "${GIT_EXECUTABLE}" diff-index --quiet HEAD --
WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
RESULT_VARIABLE res
OUTPUT_VARIABLE out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(res EQUAL 0)
set(${_var}
"CLEAN"
PARENT_SCOPE)
else()
set(${_var}
"DIRTY"
PARENT_SCOPE)
endif()
endfunction()

View File

@@ -0,0 +1,43 @@
#
# Internal file for GetGitRevisionDescription.cmake
#
# Requires CMake 2.6 or newer (uses the 'function' command)
#
# Original Author:
# 2009-2010 Ryan Pavlik <rpavlik@iastate.edu> <abiryan@ryand.net>
# http://academic.cleardefinition.com
# Iowa State University HCI Graduate Program/VRAC
#
# Copyright 2009-2012, Iowa State University
# Copyright 2011-2015, Contributors
# Distributed under the Boost Software License, Version 1.0.
# (See accompanying file LICENSE_1_0.txt or copy at
# http://www.boost.org/LICENSE_1_0.txt)
# SPDX-License-Identifier: BSL-1.0
set(HEAD_HASH)
file(READ "@HEAD_FILE@" HEAD_CONTENTS LIMIT 1024)
string(STRIP "${HEAD_CONTENTS}" HEAD_CONTENTS)
if(HEAD_CONTENTS MATCHES "ref")
# named branch
string(REPLACE "ref: " "" HEAD_REF "${HEAD_CONTENTS}")
if(EXISTS "@GIT_DIR@/${HEAD_REF}")
configure_file("@GIT_DIR@/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY)
else()
configure_file("@GIT_DIR@/packed-refs" "@GIT_DATA@/packed-refs" COPYONLY)
file(READ "@GIT_DATA@/packed-refs" PACKED_REFS)
if(${PACKED_REFS} MATCHES "([0-9a-z]*) ${HEAD_REF}")
set(HEAD_HASH "${CMAKE_MATCH_1}")
endif()
endif()
else()
# detached HEAD
configure_file("@GIT_DIR@/HEAD" "@GIT_DATA@/head-ref" COPYONLY)
endif()
if(NOT HEAD_HASH)
file(READ "@GIT_DATA@/head-ref" HEAD_HASH LIMIT 1024)
string(STRIP "${HEAD_HASH}" HEAD_HASH)
endif()

81
cmake/VersionString.cmake Normal file
View File

@@ -0,0 +1,81 @@
# SPDX-License-Identifier: BSD-2-Clause
# Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
cmake_minimum_required(VERSION 3.15)
# Create a revision file, containing the current git version info, if any
function(write_version_header dir)
# parameter check
if(NOT IS_DIRECTORY ${dir})
message(FATAL_ERROR "First parameter to write_version_header should be a directory where the final revision.hpp file will be placed")
endif()
include(GetGitRevisionDescription)
if(NOT(GIT-NOTFOUND OR HEAD-HASH-NOTFOUND))
git_describe_working_tree(BUILD_VERSION_STRING --match=build --dirty)
if(BUILD_VERSION_STRING MATCHES "build-([0-9]+)-g([0-9a-f]+)(-dirty)?")
set(BUILD_GIT_TAGREF "${CMAKE_MATCH_2}")
if(CMAKE_MATCH_3)
set(BUILD_VERSION_STRING "${CMAKE_MATCH_1}*")
else()
set(BUILD_VERSION_STRING "${CMAKE_MATCH_1}")
endif()
endif()
else()
message(WARNING "no git info available, cannot update version string")
endif()
string(TIMESTAMP BUILD_DATE_TIME "%Y-%m-%dT%H:%M:%SZ" UTC)
if(ARGC GREATER 1)
set(VAR_PREFIX "${ARGV1}")
endif()
file(WRITE "${PROJECT_BINARY_DIR}/revision.hpp.in" [[// Generated revision file
#pragma once
#include <ostream>
const char k@VAR_PREFIX@ProjectName[] = "@PROJECT_NAME@";
const char k@VAR_PREFIX@VersionNumber[] = "@PROJECT_VERSION@";
const char k@VAR_PREFIX@VersionGitTag[] = "@BUILD_GIT_TAGREF@";
const char k@VAR_PREFIX@BuildInfo[] = "@BUILD_VERSION_STRING@";
const char k@VAR_PREFIX@BuildDate[] = "@BUILD_DATE_TIME@";
inline void write_version_string(std::ostream &os, bool verbose)
{
os << k@VAR_PREFIX@ProjectName << " version " << k@VAR_PREFIX@VersionNumber << std::endl;
if (verbose)
{
os << "build: " << k@VAR_PREFIX@BuildInfo << ' ' << k@VAR_PREFIX@BuildDate << std::endl;
if (k@VAR_PREFIX@VersionGitTag[0] != 0)
os << "git tag: " << k@VAR_PREFIX@VersionGitTag << std::endl;
}
}
]])
configure_file("${PROJECT_BINARY_DIR}/revision.hpp.in" "${dir}/revision.hpp" @ONLY)
endfunction()

View File

@@ -0,0 +1,16 @@
@PACKAGE_INIT@
include(CMakeFindDependencyMacro)
find_dependency(Threads)
find_dependency(ZLIB REQUIRED)
find_dependency(LibLZMA REQUIRED)
find_dependency(Boost COMPONENTS regex)
INCLUDE("${CMAKE_CURRENT_LIST_DIR}/cifppTargets.cmake")
set_and_check(CIFPP_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@")
set_and_check(CIFPP_LIBRARY_DIR "@PACKAGE_LIBRARY_INSTALL_DIR@")
set_and_check(CIFPP_SHARE_DIR "@PACKAGE_SHARE_INSTALL_DIR@")
check_required_components(cifpp)

View File

@@ -0,0 +1,15 @@
@PACKAGE_INIT@
include(CMakeFindDependencyMacro)
find_dependency(Threads)
find_dependency(ZLIB REQUIRED)
find_dependency(LibLZMA REQUIRED)
INCLUDE("${CMAKE_CURRENT_LIST_DIR}/cifppTargets.cmake")
set_and_check(CIFPP_INCLUDE_DIR "@PACKAGE_INCLUDE_INSTALL_DIR@")
set_and_check(CIFPP_LIBRARY_DIR "@PACKAGE_LIBRARY_INSTALL_DIR@")
set_and_check(CIFPP_SHARE_DIR "@PACKAGE_SHARE_INSTALL_DIR@")
check_required_components(cifpp)

18
cmake/test-rx.cpp Normal file
View File

@@ -0,0 +1,18 @@
// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86164
#include <iostream>
#include <regex>
int main()
{
std::string s(100'000, '*');
std::smatch m;
std::regex r("^(.*?)$");
std::regex_search(s, m, r);
std::cout << s.substr(0, 10) << std::endl;
std::cout << m.str(1).substr(0, 10) << std::endl;
return 0;
}

2715
data/ccd-subset.cif Normal file

File diff suppressed because it is too large Load Diff

BIN
examples/1cbs.cif.gz Normal file

Binary file not shown.

27
examples/example.cpp Normal file
View File

@@ -0,0 +1,27 @@
#include <iostream>
#include <filesystem>
#include <cif++.hpp>
namespace fs = std::filesystem;
int main()
{
cif::file file;
file.load("1cbs.cif.gz");
auto& db = file.front();
auto &atom_site = db["atom_site"];
auto n = atom_site.find(cif::key("label_atom_id") == "OXT").size();
std::cout << "File contains " << atom_site.size() << " atoms of which " << n << (n == 1 ? " is" : " are") << " OXT" << std::endl
<< "residues with an OXT are:" << std::endl;
for (const auto& [asym, comp, seqnr]: atom_site.find<std::string,std::string,int>(
cif::key("label_atom_id") == "OXT", "label_asym_id", "label_comp_id", "label_seq_id"))
{
std::cout << asym << ' ' << comp << ' ' << seqnr << std::endl;
}
return 0;
}

8
examples/makefile Normal file
View File

@@ -0,0 +1,8 @@
CXX = c++ -std=c++17
CXXFLAGS = $(shell pkg-config --cflags libcifpp)
LIBS = $(shell pkg-config --libs libcifpp)
all: example
example: example.cpp
$(CXX) -o $@ $? $(CXXFLAGS) $(LIBS)

40
include/cif++.hpp Normal file
View File

@@ -0,0 +1,40 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cif++/utilities.hpp>
#include <cif++/file.hpp>
#include <cif++/parser.hpp>
#include <cif++/format.hpp>
#include <cif++/compound.hpp>
#include <cif++/point.hpp>
#include <cif++/symmetry.hpp>
#include <cif++/model.hpp>
#include <cif++/pdb/io.hpp>

277
include/cif++/atom_type.hpp Normal file
View File

@@ -0,0 +1,277 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// Lib for working with structures as contained in mmCIF and PDB files
#pragma once
#include <cstdint>
#include <stdexcept>
#include <string>
namespace cif
{
enum atom_type : uint8_t
{
Nn = 0, // Unknown
H = 1, // Hydro­gen
He = 2, // He­lium
Li = 3, // Lith­ium
Be = 4, // Beryl­lium
B = 5, // Boron
C = 6, // Carbon
N = 7, // Nitro­gen
O = 8, // Oxy­gen
F = 9, // Fluor­ine
Ne = 10, // Neon
Na = 11, // So­dium
Mg = 12, // Magne­sium
Al = 13, // Alumin­ium
Si = 14, // Sili­con
P = 15, // Phos­phorus
S = 16, // Sulfur
Cl = 17, // Chlor­ine
Ar = 18, // Argon
K = 19, // Potas­sium
Ca = 20, // Cal­cium
Sc = 21, // Scan­dium
Ti = 22, // Tita­nium
V = 23, // Vana­dium
Cr = 24, // Chrom­ium
Mn = 25, // Manga­nese
Fe = 26, // Iron
Co = 27, // Cobalt
Ni = 28, // Nickel
Cu = 29, // Copper
Zn = 30, // Zinc
Ga = 31, // Gallium
Ge = 32, // Germa­nium
As = 33, // Arsenic
Se = 34, // Sele­nium
Br = 35, // Bromine
Kr = 36, // Kryp­ton
Rb = 37, // Rubid­ium
Sr = 38, // Stront­ium
Y = 39, // Yttrium
Zr = 40, // Zirco­nium
Nb = 41, // Nio­bium
Mo = 42, // Molyb­denum
Tc = 43, // Tech­netium
Ru = 44, // Ruthe­nium
Rh = 45, // Rho­dium
Pd = 46, // Pallad­ium
Ag = 47, // Silver
Cd = 48, // Cad­mium
In = 49, // Indium
Sn = 50, // Tin
Sb = 51, // Anti­mony
Te = 52, // Tellurium
I = 53, // Iodine
Xe = 54, // Xenon
Cs = 55, // Cae­sium
Ba = 56, // Ba­rium
La = 57, // Lan­thanum
Hf = 72, // Haf­nium
Ta = 73, // Tanta­lum
W = 74, // Tung­sten
Re = 75, // Rhe­nium
Os = 76, // Os­mium
Ir = 77, // Iridium
Pt = 78, // Plat­inum
Au = 79, // Gold
Hg = 80, // Mer­cury
Tl = 81, // Thallium
Pb = 82, // Lead
Bi = 83, // Bis­muth
Po = 84, // Polo­nium
At = 85, // Asta­tine
Rn = 86, // Radon
Fr = 87, // Fran­cium
Ra = 88, // Ra­dium
Ac = 89, // Actin­ium
Rf = 104, // Ruther­fordium
Db = 105, // Dub­nium
Sg = 106, // Sea­borgium
Bh = 107, // Bohr­ium
Hs = 108, // Has­sium
Mt = 109, // Meit­nerium
Ds = 110, // Darm­stadtium
Rg = 111, // Roent­genium
Cn = 112, // Coper­nicium
Nh = 113, // Nihon­ium
Fl = 114, // Flerov­ium
Mc = 115, // Moscov­ium
Lv = 116, // Liver­morium
Ts = 117, // Tenness­ine
Og = 118, // Oga­nesson
Ce = 58, // Cerium
Pr = 59, // Praseo­dymium
Nd = 60, // Neo­dymium
Pm = 61, // Prome­thium
Sm = 62, // Sama­rium
Eu = 63, // Europ­ium
Gd = 64, // Gadolin­ium
Tb = 65, // Ter­bium
Dy = 66, // Dyspro­sium
Ho = 67, // Hol­mium
Er = 68, // Erbium
Tm = 69, // Thulium
Yb = 70, // Ytter­bium
Lu = 71, // Lute­tium
Th = 90, // Thor­ium
Pa = 91, // Protac­tinium
U = 92, // Ura­nium
Np = 93, // Neptu­nium
Pu = 94, // Pluto­nium
Am = 95, // Ameri­cium
Cm = 96, // Curium
Bk = 97, // Berkel­ium
Cf = 98, // Califor­nium
Es = 99, // Einstei­nium
Fm = 100, // Fer­mium
Md = 101, // Mende­levium
No = 102, // Nobel­ium
Lr = 103, // Lawren­cium
D = 119, // Deuterium
};
// --------------------------------------------------------------------
// atom_type_info
enum class radius_type
{
calculated,
empirical,
covalent_empirical,
single_bond,
double_bond,
triple_bond,
van_der_waals,
type_count
};
constexpr size_t kRadiusTypeCount = static_cast<size_t>(radius_type::type_count);
enum class ionic_radius_type
{
effective, crystal
};
struct atom_type_info
{
atom_type type;
std::string name;
std::string symbol;
float weight;
bool metal;
float radii[kRadiusTypeCount];
};
extern const atom_type_info kKnownAtoms[];
// --------------------------------------------------------------------
// AtomTypeTraits
class atom_type_traits
{
public:
atom_type_traits(atom_type a);
atom_type_traits(const std::string &symbol);
atom_type type() const { return m_info->type; }
std::string name() const { return m_info->name; }
std::string symbol() const { return m_info->symbol; }
float weight() const { return m_info->weight; }
bool is_metal() const { return m_info->metal; }
static bool is_element(const std::string &symbol);
static bool is_metal(const std::string &symbol);
float radius(radius_type type = radius_type::single_bond) const
{
if (type >= radius_type::type_count)
throw std::invalid_argument("invalid radius requested");
return m_info->radii[static_cast<size_t>(type)] / 100.f;
}
/// \brief Return the radius for a charged version of this atom in a solid crystal
///
/// \param charge The charge of the ion
/// \return The radius of the ion
float crystal_ionic_radius(int charge) const;
/// \brief Return the radius for a charged version of this atom in a non-solid environment
///
/// \param charge The charge of the ion
/// \return The radius of the ion
float effective_ionic_radius(int charge) const;
/// \brief Return the radius for a charged version of this atom, returns the effective radius by default
///
/// \param charge The charge of the ion
/// \return The radius of the ion
float ionic_radius(int charge, ionic_radius_type type = ionic_radius_type::effective) const
{
return type == ionic_radius_type::effective ? effective_ionic_radius(charge) : crystal_ionic_radius(charge);
}
// data type encapsulating the Waasmaier & Kirfel scattering factors
// in a simplified form (only a and b).
// Added the electrion scattering factors as well
struct SFData
{
double a[6], b[6];
};
// to get the Cval and Siva values, use this constant as charge:
enum
{
kWKSFVal = -99
};
const SFData &wksf(int charge = 0) const;
const SFData &elsf() const;
private:
const struct atom_type_info *m_info;
};
} // namespace pdbx

598
include/cif++/category.hpp Normal file
View File

@@ -0,0 +1,598 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <array>
#include <cif++/forward_decl.hpp>
#include <cif++/condition.hpp>
#include <cif++/iterator.hpp>
#include <cif++/row.hpp>
#include <cif++/validate.hpp>
// TODO: implement all of:
// https://en.cppreference.com/w/cpp/named_req/Container
// https://en.cppreference.com/w/cpp/named_req/SequenceContainer
// and more?
namespace cif
{
// --------------------------------------------------------------------
// special exception
class duplicate_key_error : public std::runtime_error
{
public:
duplicate_key_error(const std::string &msg)
: std::runtime_error(msg) {}
};
// --------------------------------------------------------------------
class category
{
public:
friend class row_handle;
template <typename, typename...>
friend class iterator_impl;
using value_type = row_handle;
using reference = value_type;
using const_reference = const value_type;
using iterator = iterator_impl<category>;
using const_iterator = iterator_impl<const category>;
category() = default;
category(std::string_view name);
category(const category &rhs);
category(category &&rhs);
category &operator=(const category &rhs);
category &operator=(category &&rhs);
~category();
// --------------------------------------------------------------------
const std::string &name() const { return m_name; }
iset key_fields() const;
std::set<uint16_t> key_field_indices() const;
void set_validator(const validator *v, datablock &db);
void update_links(datablock &db);
const validator *get_validator() const { return m_validator; }
const category_validator *get_cat_validator() const { return m_cat_validator; }
bool is_valid() const;
bool validate_links() const;
bool operator==(const category &rhs) const;
bool operator!=(const category &rhs) const
{
return not operator==(rhs);
}
// --------------------------------------------------------------------
reference front()
{
return {*this, *m_head};
}
const_reference front() const
{
return {const_cast<category &>(*this), const_cast<row &>(*m_head)};
}
reference back()
{
return {*this, *m_tail};
}
const_reference back() const
{
return {const_cast<category &>(*this), const_cast<row &>(*m_tail)};
}
iterator begin()
{
return {*this, m_head};
}
iterator end()
{
return {*this, nullptr};
}
const_iterator begin() const
{
return {*this, m_head};
}
const_iterator end() const
{
return {*this, nullptr};
}
const_iterator cbegin() const
{
return {*this, m_head};
}
const_iterator cend() const
{
return {*this, nullptr};
}
size_t size() const
{
return std::distance(cbegin(), cend());
}
bool empty() const
{
return m_head == nullptr;
}
// --------------------------------------------------------------------
// A category can have a key, as defined by the validator/dictionary
/// @brief The key type
using key_type = row_initializer;
/// @brief Return a row_handle for the row specified by \a key
/// @param key The value for the key, fields specified in the dictionary should have a value
/// @return The row found in the index, or an undefined row_handle
row_handle operator[](const key_type &key);
const row_handle operator[](const key_type &key) const
{
return const_cast<category *>(this)->operator[](key);
}
// --------------------------------------------------------------------
template <typename... Ts, typename... Ns>
iterator_proxy<const category, Ts...> rows(Ns... names) const
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
return iterator_proxy<const category, Ts...>(*this, begin(), {names...});
}
template <typename... Ts, typename... Ns>
iterator_proxy<category, Ts...> rows(Ns... names)
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
return iterator_proxy<category, Ts...>(*this, begin(), {names...});
}
// --------------------------------------------------------------------
conditional_iterator_proxy<category> find(condition &&cond)
{
return find(begin(), std::forward<condition>(cond));
}
conditional_iterator_proxy<category> find(iterator pos, condition &&cond)
{
return {*this, pos, std::forward<condition>(cond)};
}
conditional_iterator_proxy<const category> find(condition &&cond) const
{
return find(cbegin(), std::forward<condition>(cond));
}
conditional_iterator_proxy<const category> find(const_iterator pos, condition &&cond) const
{
return conditional_iterator_proxy<const category>{*this, pos, std::forward<condition>(cond)};
}
template <typename... Ts, typename... Ns>
conditional_iterator_proxy<category, Ts...> find(condition &&cond, Ns... names)
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
return find<Ts...>(cbegin(), std::forward<condition>(cond), std::forward<Ns>(names)...);
}
template <typename... Ts, typename... Ns>
conditional_iterator_proxy<const category, Ts...> find(condition &&cond, Ns... names) const
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
return find<Ts...>(cbegin(), std::forward<condition>(cond), std::forward<Ns>(names)...);
}
template <typename... Ts, typename... Ns>
conditional_iterator_proxy<category, Ts...> find(const_iterator pos, condition &&cond, Ns... names)
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
return {*this, pos, std::forward<condition>(cond), std::forward<Ns>(names)...};
}
template <typename... Ts, typename... Ns>
conditional_iterator_proxy<const category, Ts...> find(const_iterator pos, condition &&cond, Ns... names) const
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "The number of column titles should be equal to the number of types to return");
return {*this, pos, std::forward<condition>(cond), std::forward<Ns>(names)...};
}
// --------------------------------------------------------------------
// if you only expect a single row
row_handle find1(condition &&cond)
{
return find1(begin(), std::forward<condition>(cond));
}
row_handle find1(iterator pos, condition &&cond)
{
auto h = find(pos, std::forward<condition>(cond));
return h.size() != 1 ? row_handle{} : *h.begin();
}
const row_handle find1(condition &&cond) const
{
return find1(cbegin(), std::forward<condition>(cond));
}
const row_handle find1(const_iterator pos, condition &&cond) const
{
auto h = find(pos, std::forward<condition>(cond));
return h.size() != 1 ? row_handle{} : *h.begin();
}
template <typename T>
T find1(condition &&cond, const char *column) const
{
return find1<T>(cbegin(), std::forward<condition>(cond), column);
}
template <typename T>
T find1(const_iterator pos, condition &&cond, const char *column) const
{
auto h = find<T>(pos, std::forward<condition>(cond), column);
return h.size() == 1 ? *h.begin() : T{};
}
template <typename... Ts, typename... Cs, typename U = std::enable_if_t<sizeof...(Ts) != 1>>
std::tuple<Ts...> find1(condition &&cond, Cs... columns) const
{
static_assert(sizeof...(Ts) == sizeof...(Cs), "The number of column titles should be equal to the number of types to return");
// static_assert(std::is_same_v<Cs, const char*>..., "The column names should be const char");
return find1<Ts...>(cbegin(), std::forward<condition>(cond), std::forward<Cs>(columns)...);
}
template <typename... Ts, typename... Cs, typename U = std::enable_if_t<sizeof...(Ts) != 1>>
std::tuple<Ts...> find1(const_iterator pos, condition &&cond, Cs... columns) const
{
static_assert(sizeof...(Ts) == sizeof...(Cs), "The number of column titles should be equal to the number of types to return");
auto h = find<Ts...>(pos, std::forward<condition>(cond), std::forward<Cs>(columns)...);
return h.size() == 1 ? *h.begin() : std::tuple<Ts...>{};
}
bool exists(condition &&cond) const
{
bool result = false;
if (cond)
{
cond.prepare(*this);
auto sh = cond.single();
if (sh.has_value() and *sh)
result = true;
else
{
for (auto r : *this)
{
if (cond(r))
{
result = true;
break;
}
}
}
}
return result;
}
// --------------------------------------------------------------------
bool has_children(row_handle r) const;
bool has_parents(row_handle r) const;
std::vector<row_handle> get_children(row_handle r, const category &childCat) const;
std::vector<row_handle> get_parents(row_handle r, const category &parentCat) const;
std::vector<row_handle> get_linked(row_handle r, const category &cat) const;
// --------------------------------------------------------------------
// void insert(const_iterator pos, const row_initializer &row)
// {
// insert_impl(pos, row);
// }
// void insert(const_iterator pos, row_initializer &&row)
// {
// insert_impl(pos, std::move(row));
// }
iterator erase(iterator pos);
void erase(row_handle rh)
{
erase(iterator(*this, rh.m_row));
}
size_t erase(condition &&cond);
size_t erase(condition &&cond, std::function<void(row_handle)> &&visit);
iterator emplace(row_initializer &&ri)
{
return this->emplace(ri.begin(), ri.end());
}
template <typename ItemIter>
iterator emplace(ItemIter b, ItemIter e)
{
row *r = this->create_row();
try
{
for (auto i = b; i != e; ++i)
{
// item_value *new_item = this->create_item(*i);
r->append(add_column(i->name()), { i->value() });
}
}
catch (...)
{
if (r != nullptr)
this->delete_row(r);
throw;
}
return insert_impl(cend(), r);
}
void clear();
// --------------------------------------------------------------------
/// \brief generate a new, unique ID. Pass it an ID generating function
/// based on a sequence number. This function will be called until the
/// result is unique in the context of this category
std::string get_unique_id(std::function<std::string(int)> generator = cif::cif_id_for_number);
std::string get_unique_id(const std::string &prefix)
{
return get_unique_id([prefix](int nr)
{ return prefix + std::to_string(nr + 1); });
}
// --------------------------------------------------------------------
/// \brief Rename a single column in the rows that match \a cond to value \a value
/// making sure the linked categories are updated according to the link.
/// That means, child categories are updated if the links are absolute
/// and unique. If they are not, the child category rows are split.
void update_value(condition &&cond, std::string_view tag, std::string_view value)
{
auto rs = find(std::move(cond));
std::vector<row_handle> rows;
std::copy(rs.begin(), rs.end(), std::back_inserter(rows));
update_value(rows, tag, value);
}
void update_value(const std::vector<row_handle> &rows, std::string_view tag, std::string_view value);
// --------------------------------------------------------------------
/// \brief Return the index number for \a column_name
uint16_t get_column_ix(std::string_view column_name) const
{
uint16_t result;
for (result = 0; result < m_columns.size(); ++result)
{
if (iequals(column_name, m_columns[result].m_name))
break;
}
if (VERBOSE > 0 and result == m_columns.size() and m_cat_validator != nullptr) // validate the name, if it is known at all (since it was not found)
{
auto iv = m_cat_validator->get_validator_for_item(column_name);
if (iv == nullptr)
std::cerr << "Invalid name used '" << column_name << "' is not a known column in " + m_name << std::endl;
}
return result;
}
std::string_view get_column_name(uint16_t ix) const
{
if (ix >= m_columns.size())
throw std::out_of_range("column index is out of range");
return m_columns[ix].m_name;
}
uint16_t add_column(std::string_view column_name)
{
using namespace std::literals;
size_t result = get_column_ix(column_name);
if (result == m_columns.size())
{
const item_validator *item_validator = nullptr;
if (m_cat_validator != nullptr)
{
item_validator = m_cat_validator->get_validator_for_item(column_name);
if (item_validator == nullptr)
m_validator->report_error("tag " + std::string(column_name) + " not allowed in category " + m_name, false);
}
m_columns.emplace_back(column_name, item_validator);
}
return result;
}
bool has_column(std::string_view name) const
{
return get_column_ix(name) < m_columns.size();
}
iset get_columns() const;
// --------------------------------------------------------------------
void sort(std::function<int(row_handle,row_handle)> f);
void reorder_by_index();
// --------------------------------------------------------------------
std::vector<std::string> get_tag_order() const;
void write(std::ostream &os) const;
void write(std::ostream &os, const std::vector<std::string> &order, bool addMissingColumns = true);
private:
void write(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyColumns) const;
public:
friend std::ostream &operator<<(std::ostream &os, const category &cat)
{
cat.write(os);
return os;
}
private:
void update_value(row *row, size_t column, std::string_view value, bool updateLinked, bool validate = true);
private:
void erase_orphans(condition &&cond, category &parent);
using allocator_type = std::allocator<void>;
constexpr allocator_type get_allocator() const
{
return {};
}
using char_allocator_type = typename std::allocator_traits<allocator_type>::template rebind_alloc<char>;
using char_allocator_traits = std::allocator_traits<char_allocator_type>;
using row_allocator_type = typename std::allocator_traits<allocator_type>::template rebind_alloc<row>;
using row_allocator_traits = std::allocator_traits<row_allocator_type>;
row_allocator_traits::pointer get_row()
{
row_allocator_type ra(get_allocator());
return row_allocator_traits::allocate(ra, 1);
}
row *create_row()
{
auto p = this->get_row();
row_allocator_type ra(get_allocator());
row_allocator_traits::construct(ra, p);
return p;
}
row *clone_row(const row &r);
void delete_row(row *r);
row_handle create_copy(row_handle r);
struct item_column
{
std::string m_name;
const item_validator *m_validator;
item_column(std::string_view name, const item_validator *validator)
: m_name(name)
, m_validator(validator)
{
}
};
struct link
{
link(category *linked, const link_validator *v)
: linked(linked)
, v(v)
{
}
category *linked;
const link_validator *v;
};
// proxy methods for every insertion
iterator insert_impl(const_iterator pos, row *n);
iterator erase_impl(const_iterator pos);
// --------------------------------------------------------------------
condition get_parents_condition(row_handle rh, const category &parentCat) const;
condition get_children_condition(row_handle rh, const category &childCat) const;
// --------------------------------------------------------------------
void swap_item(size_t column_ix, row_handle &a, row_handle &b);
// --------------------------------------------------------------------
std::string m_name;
std::vector<item_column> m_columns;
const validator *m_validator = nullptr;
const category_validator *m_cat_validator = nullptr;
std::vector<link> m_parent_links, m_child_links;
bool m_cascade = true;
uint32_t m_last_unique_num = 0;
class category_index *m_index = nullptr;
row *m_head = nullptr, *m_tail = nullptr;
};
} // namespace cif

191
include/cif++/compound.hpp Normal file
View File

@@ -0,0 +1,191 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020-2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
/// \file This file contains the definition for the class compound, encapsulating
/// the information found for compounds in the CCD.
#include <map>
#include <set>
#include <tuple>
#include <vector>
#include <cif++.hpp>
#include <cif++/atom_type.hpp>
namespace cif
{
// --------------------------------------------------------------------
class compound;
struct compound_atom;
class compound_factory_impl;
/// \brief The bond type as defined in the CCD, possible values taken from the mmcif_pdbx file
enum class bond_type
{
sing, // 'single bond'
doub, // 'double bond'
trip, // 'triple bond'
quad, // 'quadruple bond'
arom, // 'aromatic bond'
poly, // 'polymeric bond'
delo, // 'delocalized double bond'
pi, // 'pi bond'
};
std::string to_string(bond_type bondType);
bond_type from_string(const std::string &bondType);
/// --------------------------------------------------------------------
/// \brief struct containing information about an atom in a chemical compound.
/// This is a subset of the available information. Contact the author if you need more fields.
struct compound_atom
{
std::string id;
atom_type type_symbol;
int charge = 0;
bool aromatic = false;
bool leaving_atom = false;
bool stereo_config = false;
float x, y, z;
};
/// --------------------------------------------------------------------
/// \brief struct containing information about the bonds
struct compound_bond
{
std::string atom_id[2];
bond_type type;
bool aromatic = false, stereo_config = false;
};
/// --------------------------------------------------------------------
/// \brief a class that contains information about a chemical compound.
/// This information is derived from the CDD by default.
///
/// To create compounds, you use the factory method. You can add your own
/// compound definitions by calling the addExtraComponents function and
/// pass it a valid CCD formatted file.
class compound
{
public:
// accessors
std::string id() const { return m_id; }
std::string name() const { return m_name; }
std::string type() const { return m_type; }
std::string group() const { return m_group; }
std::string formula() const { return m_formula; }
float formula_weight() const { return m_formula_weight; }
int formal_charge() const { return m_formal_charge; }
const std::vector<compound_atom> &atoms() const { return m_atoms; }
const std::vector<compound_bond> &bonds() const { return m_bonds; }
compound_atom get_atom_by_atom_id(const std::string &atom_id) const;
bool atoms_bonded(const std::string &atomId_1, const std::string &atomId_2) const;
bool is_water() const
{
return m_id == "HOH" or m_id == "H2O" or m_id == "WAT";
}
private:
friend class compound_factory_impl;
friend class CCD_compound_factory_impl;
friend class CCP4_compound_factory_impl;
compound(cif::datablock &db);
compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group);
std::string m_id;
std::string m_name;
std::string m_type;
std::string m_group;
std::string m_formula;
float m_formula_weight = 0;
int m_formal_charge = 0;
std::vector<compound_atom> m_atoms;
std::vector<compound_bond> m_bonds;
};
// --------------------------------------------------------------------
// Factory class for compound and Link objects
class compound_factory
{
public:
/// \brief Initialise a singleton instance.
///
/// If you have a multithreaded application and want to have different
/// compounds in each thread (e.g. a web service processing user requests
/// with different sets of compounds) you can set the \a useThreadLocalInstanceOnly
/// flag to true.
static void init(bool useThreadLocalInstanceOnly);
static compound_factory &instance();
static void clear();
void set_default_dictionary(const std::filesystem::path &inDictFile);
void push_dictionary(const std::filesystem::path &inDictFile);
void pop_dictionary();
bool is_known_peptide(const std::string &res_name) const;
bool is_known_base(const std::string &res_name) const;
/// \brief Create the compound object for \a id
///
/// This will create the compound instance for \a id if it doesn't exist already.
/// The result is owned by this factory and should not be deleted by the user.
/// \param id The compound ID, a three letter code usually
/// \result The compound, or nullptr if it could not be created (missing info)
const compound *create(std::string id);
~compound_factory();
static const std::map<std::string, char> kAAMap, kBaseMap;
private:
compound_factory();
compound_factory(const compound_factory &) = delete;
compound_factory &operator=(const compound_factory &) = delete;
static std::unique_ptr<compound_factory> s_instance;
static thread_local std::unique_ptr<compound_factory> tl_instance;
static bool s_use_thread_local_instance;
std::shared_ptr<compound_factory_impl> m_impl;
};
} // namespace pdbx

745
include/cif++/condition.hpp Normal file
View File

@@ -0,0 +1,745 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cassert>
#include <functional>
#include <iostream>
#include <regex>
#include <utility>
#include <cif++/row.hpp>
namespace cif
{
// --------------------------------------------------------------------
// let's make life easier
iset get_category_fields(const category &cat);
uint16_t get_column_ix(const category &cat, std::string_view col);
bool is_column_type_uchar(const category &cat, std::string_view col);
// --------------------------------------------------------------------
// some more templates to be able to do querying
namespace detail
{
struct condition_impl
{
virtual ~condition_impl() {}
virtual condition_impl *prepare(const category &c) { return this; }
virtual bool test(row_handle r) const = 0;
virtual void str(std::ostream &os) const = 0;
virtual std::optional<row_handle> single() const { return {}; };
};
struct all_condition_impl : public condition_impl
{
bool test(row_handle r) const override { return true; }
void str(std::ostream &os) const override { os << "*"; }
};
struct or_condition_impl;
struct and_condition_impl;
struct not_condition_impl;
} // namespace detail
class condition
{
public:
using condition_impl = detail::condition_impl;
condition()
: m_impl(nullptr)
{
}
explicit condition(condition_impl *impl)
: m_impl(impl)
{
}
condition(const condition &) = delete;
condition(condition &&rhs) noexcept
: m_impl(nullptr)
{
std::swap(m_impl, rhs.m_impl);
}
condition &operator=(const condition &) = delete;
condition &operator=(condition &&rhs) noexcept
{
std::swap(m_impl, rhs.m_impl);
return *this;
}
~condition()
{
delete m_impl;
m_impl = nullptr;
}
void prepare(const category &c);
bool operator()(row_handle r) const
{
assert(this->m_impl != nullptr);
assert(this->m_prepared);
return m_impl ? m_impl->test(r) : false;
}
explicit operator bool() { return not empty(); }
bool empty() const { return m_impl == nullptr; }
std::optional<row_handle> single() const
{
return m_impl ? m_impl->single() : std::optional<row_handle>();
}
friend condition operator||(condition &&a, condition &&b);
friend condition operator&&(condition &&a, condition &&b);
friend struct detail::or_condition_impl;
friend struct detail::and_condition_impl;
friend struct detail::not_condition_impl;
void swap(condition &rhs)
{
std::swap(m_impl, rhs.m_impl);
std::swap(m_prepared, rhs.m_prepared);
}
friend std::ostream &operator<<(std::ostream &os, const condition &cond)
{
if (cond.m_impl)
cond.m_impl->str(os);
return os;
}
private:
void optimise(condition_impl *&impl);
condition_impl *m_impl;
bool m_prepared = false;
};
namespace detail
{
struct key_is_empty_condition_impl : public condition_impl
{
key_is_empty_condition_impl(const std::string &item_tag)
: m_item_tag(item_tag)
{
}
condition_impl *prepare(const category &c) override
{
m_item_ix = get_column_ix(c, m_item_tag);
return this;
}
bool test(row_handle r) const override
{
return r[m_item_ix].empty();
}
void str(std::ostream &os) const override
{
os << m_item_tag << " IS NULL";
}
std::string m_item_tag;
size_t m_item_ix = 0;
};
struct key_equals_condition_impl : public condition_impl
{
key_equals_condition_impl(item &&i)
: m_item_tag(i.name())
, m_value(i.value())
{
}
condition_impl *prepare(const category &c) override;
bool test(row_handle r) const override
{
return m_single_hit.has_value() ?
*m_single_hit == r :
r[m_item_ix].compare(m_value, m_icase) == 0;
}
void str(std::ostream &os) const override
{
os << m_item_tag << (m_icase ? "^ " : " ") << " == " << m_value;
}
virtual std::optional<row_handle> single() const override
{
return m_single_hit;
}
std::string m_item_tag;
size_t m_item_ix = 0;
bool m_icase = false;
std::string m_value;
std::optional<row_handle> m_single_hit;
};
struct key_equals_or_empty_condition_impl : public condition_impl
{
key_equals_or_empty_condition_impl(key_equals_condition_impl *equals, key_is_empty_condition_impl *empty)
: m_item_tag(equals->m_item_tag)
, m_value(equals->m_value)
, m_icase(equals->m_icase)
, m_single_hit(equals->m_single_hit)
{
assert(empty->m_item_ix == equals->m_item_ix);
}
condition_impl *prepare(const category &c) override
{
m_item_ix = get_column_ix(c, m_item_tag);
m_icase = is_column_type_uchar(c, m_item_tag);
return this;
}
bool test(row_handle r) const override
{
bool result = false;
if (m_single_hit.has_value())
result = *m_single_hit == r;
else
result = r[m_item_ix].empty() or r[m_item_ix].compare(m_value, m_icase) == 0;
return result;
}
void str(std::ostream &os) const override
{
os << m_item_tag << (m_icase ? "^ " : " ") << " == " << m_value << " OR " << m_item_tag << " IS NULL";
}
virtual std::optional<row_handle> single() const override
{
return m_single_hit;
}
std::string m_item_tag;
size_t m_item_ix = 0;
std::string m_value;
bool m_icase = false;
std::optional<row_handle> m_single_hit;
};
struct key_compare_condition_impl : public condition_impl
{
template <typename COMP>
key_compare_condition_impl(const std::string &item_tag, COMP &&comp, const std::string &s)
: m_item_tag(item_tag)
, m_compare(std::move(comp))
, m_str(s)
{
}
condition_impl *prepare(const category &c) override
{
m_item_ix = get_column_ix(c, m_item_tag);
m_icase = is_column_type_uchar(c, m_item_tag);
return this;
}
bool test(row_handle r) const override
{
return m_compare(r, m_icase);
}
void str(std::ostream &os) const override
{
os << m_item_tag << (m_icase ? "^ " : " ") << m_str;
}
std::string m_item_tag;
size_t m_item_ix = 0;
bool m_icase = false;
std::function<bool(row_handle, bool)> m_compare;
std::string m_str;
};
struct key_matches_condition_impl : public condition_impl
{
key_matches_condition_impl(const std::string &item_tag, const std::regex &rx)
: m_item_tag(item_tag)
, m_item_ix(0)
, mRx(rx)
{
}
condition_impl *prepare(const category &c) override
{
m_item_ix = get_column_ix(c, m_item_tag);
return this;
}
bool test(row_handle r) const override
{
std::string_view txt = r[m_item_ix].text();
return std::regex_match(txt.begin(), txt.end(), mRx);
}
void str(std::ostream &os) const override
{
os << m_item_tag << " =~ expression";
}
std::string m_item_tag;
size_t m_item_ix;
std::regex mRx;
};
template <typename T>
struct any_is_condition_impl : public condition_impl
{
typedef T valueType;
any_is_condition_impl(const valueType &value)
: mValue(value)
{
}
bool test(row_handle r) const override
{
auto &c = r.get_category();
bool result = false;
for (auto &f : get_category_fields(c))
{
try
{
if (r[f].compare(mValue) == 0)
{
result = true;
break;
}
}
catch (...)
{
}
}
return result;
}
void str(std::ostream &os) const override
{
os << "<any> == " << mValue;
}
valueType mValue;
};
struct any_matches_condition_impl : public condition_impl
{
any_matches_condition_impl(const std::regex &rx)
: mRx(rx)
{
}
bool test(row_handle r) const override
{
auto &c = r.get_category();
bool result = false;
for (auto &f : get_category_fields(c))
{
try
{
std::string_view txt = r[f].text();
if (std::regex_match(txt.begin(), txt.end(), mRx))
{
result = true;
break;
}
}
catch (...)
{
}
}
return result;
}
void str(std::ostream &os) const override
{
os << "<any> =~ expression";
}
std::regex mRx;
};
// TODO: Optimize and_condition by having a list of sub items.
// That way you can also collapse multiple _is_ conditions in
// case they make up an indexed tuple.
struct and_condition_impl : public condition_impl
{
and_condition_impl(condition &&a, condition &&b)
{
mSub.emplace_back(std::exchange(a.m_impl, nullptr));
mSub.emplace_back(std::exchange(b.m_impl, nullptr));
}
~and_condition_impl()
{
for (auto sub : mSub)
delete sub;
}
condition_impl *prepare(const category &c) override;
bool test(row_handle r) const override
{
bool result = true;
for (auto sub : mSub)
{
if (sub->test(r))
continue;
result = false;
break;
}
return result;
}
void str(std::ostream &os) const override
{
os << '(';
bool first = true;
for (auto sub : mSub)
{
if (first)
first = false;
else
os << " AND ";
sub->str(os);
}
os << ')';
}
virtual std::optional<row_handle> single() const override
{
std::optional<row_handle> result;
for (auto sub : mSub)
{
auto s = sub->single();
if (not result.has_value())
{
result = s;
continue;
}
if (s == result)
continue;
result.reset();
break;
}
return result;
}
std::vector<condition_impl *> mSub;
};
struct or_condition_impl : public condition_impl
{
or_condition_impl(condition &&a, condition &&b)
: mA(nullptr)
, mB(nullptr)
{
std::swap(mA, a.m_impl);
std::swap(mB, b.m_impl);
}
~or_condition_impl()
{
delete mA;
delete mB;
}
condition_impl *prepare(const category &c) override;
bool test(row_handle r) const override
{
return mA->test(r) or mB->test(r);
}
void str(std::ostream &os) const override
{
os << '(';
mA->str(os);
os << ") OR (";
mB->str(os);
os << ')';
}
virtual std::optional<row_handle> single() const override
{
auto sa = mA->single();
auto sb = mB->single();
if (sa.has_value() and sb.has_value() and sa != sb)
sa.reset();
else if (not sa.has_value())
sa = sb;
return sa;
}
condition_impl *mA;
condition_impl *mB;
};
struct not_condition_impl : public condition_impl
{
not_condition_impl(condition &&a)
: mA(nullptr)
{
std::swap(mA, a.m_impl);
}
~not_condition_impl()
{
delete mA;
}
condition_impl *prepare(const category &c) override
{
mA = mA->prepare(c);
return this;
}
bool test(row_handle r) const override
{
return not mA->test(r);
}
void str(std::ostream &os) const override
{
os << "NOT (";
mA->str(os);
os << ')';
}
condition_impl *mA;
};
} // namespace detail
inline condition operator&&(condition &&a, condition &&b)
{
if (a.m_impl and b.m_impl)
return condition(new detail::and_condition_impl(std::move(a), std::move(b)));
if (a.m_impl)
return condition(std::move(a));
return condition(std::move(b));
}
inline condition operator||(condition &&a, condition &&b)
{
if (a.m_impl and b.m_impl)
return condition(new detail::or_condition_impl(std::move(a), std::move(b)));
if (a.m_impl)
return condition(std::move(a));
return condition(std::move(b));
}
struct empty_type
{
};
/// \brief A helper to make it possible to have conditions like ("id"_key == cif::null)
inline constexpr empty_type null = empty_type();
struct key
{
explicit key(const std::string &itemTag)
: m_item_tag(itemTag)
{
}
explicit key(const char *itemTag)
: m_item_tag(itemTag)
{
}
key(const key &) = delete;
key &operator=(const key &) = delete;
std::string m_item_tag;
};
template <typename T>
condition operator==(const key &key, const T &v)
{
return condition(new detail::key_equals_condition_impl({ key.m_item_tag, v }));
}
inline condition operator==(const key &key, const char *value)
{
if (value != nullptr and *value != 0)
return condition(new detail::key_equals_condition_impl({ key.m_item_tag, value }));
else
return condition(new detail::key_is_empty_condition_impl(key.m_item_tag));
}
// inline condition_t operator==(const key& key, const detail::ItemReference& v)
// {
// if (v.empty())
// return condition_t(new detail::key_is_empty_condition_impl(key.m_item_tag));
// else
// return condition_t(new detail::key_compare_condition_impl(key.m_item_tag, [tag = key.m_item_tag, v](const category& c, const row& r, bool icase)
// { return r[tag].template compare<(v, icase) == 0; }));
// }
template <typename T>
condition operator!=(const key &key, const T &v)
{
return condition(new detail::not_condition_impl(operator==(key, v)));
}
inline condition operator!=(const key &key, const char *v)
{
std::string value(v ? v : "");
return condition(new detail::not_condition_impl(operator==(key, value)));
}
template <typename T>
condition operator>(const key &key, const T &v)
{
std::ostringstream s;
s << " > " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_tag, [tag = key.m_item_tag, v](row_handle r, bool icase)
{ return r[tag].template compare<T>(v, icase) > 0; },
s.str()));
}
template <typename T>
condition operator>=(const key &key, const T &v)
{
std::ostringstream s;
s << " >= " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_tag, [tag = key.m_item_tag, v](row_handle r, bool icase)
{ return r[tag].template compare<T>(v, icase) >= 0; },
s.str()));
}
template <typename T>
condition operator<(const key &key, const T &v)
{
std::ostringstream s;
s << " < " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_tag, [tag = key.m_item_tag, v](row_handle r, bool icase)
{ return r[tag].template compare<T>(v, icase) < 0; },
s.str()));
}
template <typename T>
condition operator<=(const key &key, const T &v)
{
std::ostringstream s;
s << " <= " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_tag, [tag = key.m_item_tag, v](row_handle r, bool icase)
{ return r[tag].template compare<T>(v, icase) <= 0; },
s.str()));
}
inline condition operator==(const key &key, const std::regex &rx)
{
return condition(new detail::key_matches_condition_impl(key.m_item_tag, rx));
}
inline condition operator==(const key &key, const empty_type &)
{
return condition(new detail::key_is_empty_condition_impl(key.m_item_tag));
}
inline condition operator !(condition &&rhs)
{
return condition(new detail::not_condition_impl(std::move(rhs)));
}
struct any_type
{
};
inline constexpr any_type any = any_type{};
template <typename T>
condition operator==(const any_type &, const T &v)
{
return condition(new detail::any_is_condition_impl<T>(v));
}
inline condition operator==(const any_type &, const std::regex &rx)
{
return condition(new detail::any_matches_condition_impl(rx));
}
inline condition all()
{
return condition(new detail::all_condition_impl());
}
namespace literals
{
inline key operator""_key(const char *text, size_t length)
{
return key(std::string(text, length));
}
} // namespace literals
} // namespace cif

100
include/cif++/datablock.hpp Normal file
View File

@@ -0,0 +1,100 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cif++/forward_decl.hpp>
#include <cif++/category.hpp>
namespace cif
{
// --------------------------------------------------------------------
class datablock : public std::list<category>
{
public:
datablock() = default;
datablock(std::string_view name)
: m_name(name)
{
}
datablock(const datablock &) = default;
datablock(datablock &&) = default;
datablock &operator=(const datablock &) = default;
datablock &operator=(datablock &&) = default;
// --------------------------------------------------------------------
const std::string &name() const { return m_name; }
void set_name(std::string_view name)
{
m_name = name;
}
void set_validator(const validator *v);
const validator *get_validator() const;
bool is_valid() const;
bool validate_links() const;
// --------------------------------------------------------------------
category &operator[](std::string_view name);
const category &operator[](std::string_view name) const;
category *get(std::string_view name);
const category *get(std::string_view name) const;
std::tuple<iterator, bool> emplace(std::string_view name);
std::vector<std::string> get_tag_order() const;
void write(std::ostream &os) const;
void write(std::ostream &os, const std::vector<std::string> &tag_order);
friend std::ostream &operator<<(std::ostream &os, const datablock &db)
{
db.write(os);
return os;
}
// --------------------------------------------------------------------
bool operator==(const datablock &rhs) const;
private:
std::string m_name;
const validator *m_validator = nullptr;
};
} // namespace cif

View File

@@ -0,0 +1,36 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cif++/validate.hpp>
namespace cif
{
validator parse_dictionary(std::string_view name, std::istream &is);
} // namespace cif

122
include/cif++/file.hpp Normal file
View File

@@ -0,0 +1,122 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <list>
#include <cif++/datablock.hpp>
#include <cif++/parser.hpp>
namespace cif
{
// --------------------------------------------------------------------
class file : public std::list<datablock>
{
public:
file() = default;
explicit file(const std::filesystem::path &p)
{
load(p);
}
explicit file(std::istream &is)
{
load(is);
}
explicit file(const char *data, size_t length)
{
struct membuf : public std::streambuf
{
membuf(char *text, size_t length)
{
this->setg(text, text, text + length);
}
} buffer(const_cast<char *>(data), length);
std::istream is(&buffer);
load(is);
}
file(const file &) = default;
file(file &&) = default;
file &operator=(const file &) = default;
file &operator=(file &&) = default;
void set_validator(const validator *v);
const validator *get_validator() const
{
return m_validator;
}
bool is_valid() const;
bool is_valid();
bool validate_links() const;
void load_dictionary();
void load_dictionary(std::string_view name);
bool contains(std::string_view name) const;
datablock &front()
{
assert(not empty());
return std::list<datablock>::front();
}
const datablock &front() const
{
assert(not empty());
return std::list<datablock>::front();
}
datablock &operator[](std::string_view name);
const datablock &operator[](std::string_view name) const;
std::tuple<iterator, bool> emplace(std::string_view name);
void load(const std::filesystem::path &p);
void load(std::istream &is);
void save(const std::filesystem::path &p) const;
void save(std::ostream &os) const;
friend std::ostream &operator<<(std::ostream &os, const file &f)
{
f.save(os);
return os;
}
private:
const validator *m_validator = nullptr;
};
} // namespace cif

221
include/cif++/format.hpp Normal file
View File

@@ -0,0 +1,221 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <string>
/// \file format.hpp
/// File containing a basic reimplementation of boost::format
/// but then a bit more simplistic. Still this allowed me to move my code
/// from using boost::format to something without external dependency easily.
namespace cif
{
namespace detail
{
template <typename T>
struct to_varg
{
using type = T;
to_varg(const T &v)
: m_value(v)
{
}
type operator*() { return m_value; }
T m_value;
};
// template <>
// struct to_varg<char>
// {
// using type = const char *;
// to_varg(const char &v)
// : m_value({ v })
// {
// }
// type operator*() { return m_value.c_str(); }
// std::string m_value;
// };
template <>
struct to_varg<const char *>
{
using type = const char *;
to_varg(const char *v)
: m_value(v)
{
}
type operator*() { return m_value.c_str(); }
std::string m_value;
};
template <>
struct to_varg<std::string>
{
using type = const char *;
to_varg(const std::string &v)
: m_value(v)
{
}
type operator*() { return m_value.c_str(); }
std::string m_value;
};
} // namespace
template <typename... Args>
class format_plus_arg
{
public:
using args_vector_type = std::tuple<detail::to_varg<Args>...>;
using vargs_vector_type = std::tuple<typename detail::to_varg<Args>::type...>;
format_plus_arg(const format_plus_arg &) = delete;
format_plus_arg &operator=(const format_plus_arg &) = delete;
format_plus_arg(std::string_view fmt, Args... args)
: m_fmt(fmt)
, m_args(std::forward<Args>(args)...)
{
auto ix = std::make_index_sequence<sizeof...(Args)>();
copy_vargs(ix);
}
std::string str()
{
char buffer[1024];
std::string::size_type r = std::apply(snprintf, std::tuple_cat(std::make_tuple(buffer, sizeof(buffer), m_fmt.c_str()), m_vargs));
return { buffer, r };
}
friend std::ostream &operator<<(std::ostream &os, const format_plus_arg &f)
{
char buffer[1024];
std::string::size_type r = std::apply(snprintf, std::tuple_cat(std::make_tuple(buffer, sizeof(buffer), f.m_fmt.c_str()), f.m_vargs));
os.write(buffer, r);
return os;
}
private:
template <size_t... I>
void copy_vargs(std::index_sequence<I...>)
{
((std::get<I>(m_vargs) = *std::get<I>(m_args)), ...);
}
std::string m_fmt;
args_vector_type m_args;
vargs_vector_type m_vargs;
};
template <typename... Args>
constexpr auto format(std::string_view fmt, Args... args)
{
return format_plus_arg(fmt, std::forward<Args>(args)...);
}
// --------------------------------------------------------------------
/// A streambuf that fills out lines with spaces up until a specified width
class fill_out_streambuf : public std::streambuf
{
public:
using base_type = std::streambuf;
using int_type = base_type::int_type;
using char_type = base_type::char_type;
using traits_type = base_type::traits_type;
fill_out_streambuf(std::ostream &os, int width = 80)
: m_os(os)
, m_upstream(os.rdbuf())
, m_width(width)
{
}
~fill_out_streambuf()
{
m_os.rdbuf(m_upstream);
}
virtual int_type
overflow(int_type ic = traits_type::eof())
{
char ch = traits_type::to_char_type(ic);
int_type result = ic;
if (ch == '\n')
{
for (int i = m_column_count; result != traits_type::eof() and i < m_width; ++i)
result = m_upstream->sputc(' ');
}
if (result != traits_type::eof())
result = m_upstream->sputc(ch);
if (result != traits_type::eof())
{
if (ch == '\n')
{
m_column_count = 0;
++m_line_count;
}
else
++m_column_count;
}
return result;
}
std::streambuf *get_upstream() const { return m_upstream; }
int get_line_count() const { return m_line_count; }
private:
std::ostream &m_os;
std::streambuf *m_upstream;
int m_width;
int m_line_count = 0;
int m_column_count = 0;
};
} // namespace pdbx

View File

@@ -0,0 +1,46 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <string>
#include <vector>
namespace cif
{
class category;
class datablock;
class file;
class parser;
class row;
class row_handle;
class item;
class item_handle;
} // namespace cif

1036
include/cif++/gzio.hpp Normal file

File diff suppressed because it is too large Load Diff

567
include/cif++/item.hpp Normal file
View File

@@ -0,0 +1,567 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <charconv>
#include <cstring>
#include <iomanip>
#include <iostream>
#include <limits>
#include <memory>
#include <optional>
#include <utility>
#include <cif++/forward_decl.hpp>
#include <cif++/text.hpp>
/// \file item.hpp
/// This file contains the declaration of item but also the item_value and item_handle
/// These handle the storage of and access to the data for a single data field.
namespace cif
{
extern int VERBOSE;
// --------------------------------------------------------------------
/// \brief item is a transient class that is used to pass data into rows
/// but it also takes care of formatting data.
class item
{
public:
/// \brief Default constructor, empty item
item() = default;
/// \brief constructor for an item with name \a name and as
/// content a single character string with content \a value
item(std::string_view name, char value)
: m_name(name)
, m_value({ value })
{
}
/// \brief constructor for an item with name \a name and as
/// content a the formatted floating point value \a value with
/// precision \a precision
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
item(std::string_view name, const T &value, int precision)
: m_name(name)
{
using namespace std;
using namespace cif;
char buffer[32];
auto r = to_chars(buffer, buffer + sizeof(buffer) - 1, value, chars_format::fixed, precision);
if (r.ec != std::errc())
throw std::runtime_error("Could not format number");
assert(r.ptr >= buffer and r.ptr < buffer + sizeof(buffer));
*r.ptr = 0;
m_value.assign(buffer, r.ptr - buffer);
}
/// \brief constructor for an item with name \a name and as
/// content a formatted floating point value \a value with
/// so-called general formatting
template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
item(const std::string_view name, const T &value)
: m_name(name)
{
using namespace std;
using namespace cif;
char buffer[32];
auto r = to_chars(buffer, buffer + sizeof(buffer) - 1, value, chars_format::general);
if (r.ec != std::errc())
throw std::runtime_error("Could not format number");
assert(r.ptr >= buffer and r.ptr < buffer + sizeof(buffer));
*r.ptr = 0;
m_value.assign(buffer, r.ptr - buffer);
}
/// \brief constructor for an item with name \a name and as
/// content a the formatted integral value \a value
template <typename T, std::enable_if_t<std::is_integral_v<T> and not std::is_same_v<T,bool>, int> = 0>
item(const std::string_view name, const T &value)
: m_name(name)
{
char buffer[32];
auto r = std::to_chars(buffer, buffer + sizeof(buffer) - 1, value);
if (r.ec != std::errc())
throw std::runtime_error("Could not format number");
assert(r.ptr >= buffer and r.ptr < buffer + sizeof(buffer));
*r.ptr = 0;
m_value.assign(buffer, r.ptr - buffer);
}
/// \brief constructor for an item with name \a name and as
/// content a the formatted boolean value \a value
template <typename T, std::enable_if_t<std::is_same_v<T,bool>, int> = 0>
item(const std::string_view name, const T &value)
: m_name(name)
{
m_value.assign(value ? "y" : "n");
}
/// \brief constructor for an item with name \a name and as
/// content value \a value
item(const std::string_view name, const std::string_view value)
: m_name(name)
, m_value(value)
{
}
item(const item &rhs) = default;
item(item &&rhs) noexcept = default;
item &operator=(const item &rhs) = default;
item &operator=(item &&rhs) noexcept = default;
std::string_view name() const { return m_name; }
std::string_view value() const { return m_value; }
/// \brief replace the content of the stored value with \a v
void value(std::string_view v) { m_value = v; }
/// \brief empty means either null or unknown
bool empty() const { return m_value.empty(); }
/// \brief returns true if the field contains '.'
bool is_null() const { return m_value == "."; }
/// \brief returns true if the field contains '?'
bool is_unknown() const { return m_value == "?"; }
/// \brief the length of the value string
size_t length() const { return m_value.length(); }
/// \brief support for structured binding
template<size_t N>
decltype(auto) get() const
{
if constexpr (N == 0) return name();
else if constexpr (N == 1) return value();
}
private:
std::string_view m_name;
std::string m_value;
};
// --------------------------------------------------------------------
/// \brief the internal storage for items in a category
///
/// Internal storage, strictly forward linked list with minimal space
/// requirements. Strings of size 7 or shorter are stored internally.
/// Typically, more than 99% of the strings in an mmCIF file are less
/// than 8 bytes in length.
struct item_value
{
item_value() = default;
/// \brief constructor
item_value(std::string_view text)
: m_length(text.length())
{
if (m_length >= kBufferSize)
{
m_data = new char[m_length + 1];
std::copy(text.begin(), text.end(), m_data);
m_data[m_length] = 0;
}
else
{
std::copy(text.begin(), text.end(), m_local_data);
m_local_data[m_length] = 0;
}
}
item_value(item_value &&rhs)
: m_length(std::exchange(rhs.m_length, 0))
, m_data(std::exchange(rhs.m_data, nullptr))
{
}
item_value &operator=(item_value &&rhs)
{
if (this != &rhs)
{
m_length = std::exchange(rhs.m_length, m_length);
m_data = std::exchange(rhs.m_data, m_data);
}
return *this;
}
~item_value()
{
if (m_length >= kBufferSize)
delete[] m_data;
m_data = nullptr;
m_length = 0;
}
item_value(const item_value &) = delete;
item_value &operator=(const item_value &) = delete;
explicit operator bool() const
{
return m_length != 0;
}
size_t m_length = 0;
union
{
char m_local_data[8];
char *m_data;
};
static constexpr size_t kBufferSize = sizeof(m_local_data);
// By using std::string_view instead of c_str we obain a
// nice performance gain since we avoid many calls to strlen.
constexpr inline std::string_view text() const
{
return { m_length >= kBufferSize ? m_data : m_local_data, m_length };
}
};
// static_assert(sizeof(item_value) == 24, "sizeof(item_value) should be 24 bytes");
static_assert(sizeof(item_value) == 16, "sizeof(item_value) should be 16 bytes");
// --------------------------------------------------------------------
// Transient object to access stored data
/// \brief This is item_handle, it is used to access the data stored in item_value.
struct item_handle
{
public:
// conversion helper class
template <typename T, typename = void>
struct item_value_as;
template <typename T>
item_handle &operator=(const T &value)
{
item v{ "", value };
assign_value(v);
return *this;
}
template <typename... Ts>
void os(const Ts &...v)
{
std::ostringstream ss;
((ss << v), ...);
this->operator=(ss.str());
}
void swap(item_handle &b);
template <typename T = std::string>
auto as() const -> T
{
using value_type = std::remove_cv_t<std::remove_reference_t<T>>;
return item_value_as<value_type>::convert(*this);
}
template <typename T>
auto value_or(const T &dv) const
{
return empty() ? dv : this->as<T>();
}
template <typename T>
int compare(const T &value, bool icase = true) const
{
return item_value_as<T>::compare(*this, value, icase);
}
template <typename T>
bool operator==(const T &value) const
{
// TODO: icase or not icase?
return item_value_as<T>::compare(*this, value, true) == 0;
}
// We may not have C++20 yet...
template <typename T>
bool operator!=(const T &value) const
{
return not operator==(value);
}
// empty means either null or unknown
bool empty() const
{
auto txt = text();
return txt.empty() or (txt.length() == 1 and (txt.front() == '.' or txt.front() == '?'));
}
explicit operator bool() const { return not empty(); }
// is_null means the field contains '.'
bool is_null() const
{
auto txt = text();
return txt.length() == 1 and txt.front() == '.';
}
// is_unknown means the field contains '?'
bool is_unknown() const
{
auto txt = text();
return txt.length() == 1 and txt.front() == '?';
}
std::string_view text() const;
item_handle(uint16_t column, row_handle &row)
: m_column(column)
, m_row_handle(row)
{
}
static const item_handle s_null_item;
friend void swap(item_handle a, item_handle b)
{
a.swap(b);
}
private:
item_handle();
uint16_t m_column;
row_handle &m_row_handle;
void assign_value(const item &value);
};
// So sad that older gcc implementations of from_chars did not support floats yet...
template <typename T>
struct item_handle::item_value_as<T, std::enable_if_t<std::is_arithmetic_v<T> and not std::is_same_v<T, bool>>>
{
using value_type = std::remove_reference_t<std::remove_cv_t<T>>;
static value_type convert(const item_handle &ref)
{
value_type result = {};
if (not ref.empty())
{
auto txt = ref.text();
std::from_chars_result r = selected_charconv<value_type>::from_chars(txt.data(), txt.data() + txt.size(), result);
if (r.ec != std::errc())
{
result = {};
if (cif::VERBOSE)
{
if (r.ec == std::errc::invalid_argument)
std::cerr << "Attempt to convert " << std::quoted(txt) << " into a number" << std::endl;
else if (r.ec == std::errc::result_out_of_range)
std::cerr << "Conversion of " << std::quoted(txt) << " into a type that is too small" << std::endl;
}
}
}
return result;
}
static int compare(const item_handle &ref, const T &value, bool icase)
{
int result = 0;
auto txt = ref.text();
if (txt.empty())
result = 1;
else
{
value_type v = {};
std::from_chars_result r = selected_charconv<value_type>::from_chars(txt.data(), txt.data() + txt.size(), v);
if (r.ec != std::errc())
{
if (cif::VERBOSE)
{
if (r.ec == std::errc::invalid_argument)
std::cerr << "Attempt to convert " << std::quoted(txt) << " into a number" << std::endl;
else if (r.ec == std::errc::result_out_of_range)
std::cerr << "Conversion of " << std::quoted(txt) << " into a type that is too small" << std::endl;
}
result = 1;
}
else if (v < value)
result = -1;
else if (v > value)
result = 1;
}
return result;
}
};
template <typename T>
struct item_handle::item_value_as<std::optional<T>>
{
static std::optional<T> convert(const item_handle &ref)
{
std::optional<T> result;
if (ref)
result = ref.as<T>();
return result;
}
static int compare(const item_handle &ref, std::optional<T> value, bool icase)
{
if (ref.empty() and not value)
return 0;
if (ref.empty())
return -1;
else if (not value)
return 1;
else
return ref.compare(*value, icase);
}
};
template <typename T>
struct item_handle::item_value_as<T, std::enable_if_t<std::is_same_v<T, bool>>>
{
static bool convert(const item_handle &ref)
{
bool result = false;
if (not ref.empty())
result = iequals(ref.text(), "y");
return result;
}
static int compare(const item_handle &ref, bool value, bool icase)
{
bool rv = convert(ref);
return value && rv ? 0
: (rv < value ? -1 : 1);
}
};
template <size_t N>
struct item_handle::item_value_as<char[N]>
{
static std::string convert(const item_handle &ref)
{
if (ref.empty())
return {};
return { ref.text().data(), ref.text().size() };
}
static int compare(const item_handle &ref, const char (&value)[N], bool icase)
{
return icase ? cif::icompare(ref.text(), value) : ref.text().compare(value);
}
};
template <typename T>
struct item_handle::item_value_as<T, std::enable_if_t<std::is_same_v<T, const char *>>>
{
static std::string convert(const item_handle &ref)
{
if (ref.empty())
return {};
return { ref.text().data(), ref.text().size() };
}
static int compare(const item_handle &ref, const char *value, bool icase)
{
return icase ? cif::icompare(ref.text(), value) : ref.text().compare(value);
}
};
template <typename T>
struct item_handle::item_value_as<T, std::enable_if_t<std::is_same_v<T, std::string_view>>>
{
static std::string convert(const item_handle &ref)
{
if (ref.empty())
return {};
return { ref.text().data(), ref.text().size() };
}
static int compare(const item_handle &ref, const std::string_view &value, bool icase)
{
return icase ? cif::icompare(ref.text(), value) : ref.text().compare(value);
}
};
template <typename T>
struct item_handle::item_value_as<T, std::enable_if_t<std::is_same_v<T, std::string>>>
{
static std::string convert(const item_handle &ref)
{
if (ref.empty())
return {};
return { ref.text().data(), ref.text().size() };
}
static int compare(const item_handle &ref, const std::string &value, bool icase)
{
return icase ? cif::icompare(ref.text(), value) : ref.text().compare(value);
}
};
} // namespace cif
namespace std
{
template<> struct tuple_size<::cif::item>
: public std::integral_constant<std::size_t, 2> {};
template<> struct tuple_element<0, ::cif::item>
{
using type = decltype(std::declval<::cif::item>().name());
};
template<> struct tuple_element<1, ::cif::item>
{
using type = decltype(std::declval<::cif::item>().value());
};
}

676
include/cif++/iterator.hpp Normal file
View File

@@ -0,0 +1,676 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cif++/row.hpp>
namespace cif
{
// --------------------------------------------------------------------
template <typename Category, typename... Ts>
class iterator_impl
{
public:
template <typename, typename...>
friend class iterator_impl;
friend class category;
static constexpr size_t N = sizeof...(Ts);
using category_type = std::remove_cv_t<Category>;
using row_type = std::conditional_t<std::is_const_v<Category>, const row, row>;
using tuple_type = std::tuple<Ts...>;
using iterator_category = std::forward_iterator_tag;
using value_type = tuple_type;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
using reference = value_type &;
iterator_impl() = default;
iterator_impl(const iterator_impl &rhs) = default;
template <typename C2, typename... T2s>
iterator_impl(const iterator_impl<C2, T2s...> &rhs)
: m_category(rhs.m_category)
, m_current(rhs.m_current)
, m_value(rhs.m_value)
, m_column_ix(rhs.m_column_ix)
{
}
template <typename IRowType>
iterator_impl(iterator_impl<IRowType, Ts...> &rhs)
: m_category(rhs.m_category)
, m_current(const_cast<row_type *>(rhs.m_current))
, m_value(rhs.m_value)
, m_column_ix(rhs.m_column_ix)
{
m_value = get(std::make_index_sequence<N>());
}
template <typename IRowType>
iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<size_t, N> &cix)
: m_category(rhs.m_category)
, m_current(rhs.m_current)
, m_column_ix(cix)
{
m_value = get(std::make_index_sequence<N>());
}
iterator_impl &operator=(const iterator_impl &i)
{
m_category = i.m_category;
m_current = i.m_current;
m_column_ix = i.m_column_ix;
m_value = i.m_value;
return *this;
}
virtual ~iterator_impl() = default;
reference operator*()
{
return m_value;
}
pointer operator->()
{
return &m_value;
}
operator const row_handle() const
{
return { *m_category, *m_current };
}
operator row_handle()
{
return { *m_category, *m_current };
}
iterator_impl &operator++()
{
if (m_current != nullptr)
m_current = m_current->m_next;
m_value = get(std::make_index_sequence<N>());
return *this;
}
iterator_impl operator++(int)
{
iterator_impl result(*this);
this->operator++();
return result;
}
bool operator==(const iterator_impl &rhs) const { return m_current == rhs.m_current; }
bool operator!=(const iterator_impl &rhs) const { return m_current != rhs.m_current; }
template <typename IRowType, typename... ITs>
bool operator==(const iterator_impl<IRowType, ITs...> &rhs) const
{
return m_current == rhs.m_current;
}
template <typename IRowType, typename... ITs>
bool operator!=(const iterator_impl<IRowType, ITs...> &rhs) const
{
return m_current != rhs.m_current;
}
private:
template <std::size_t... Is>
tuple_type get(std::index_sequence<Is...>) const
{
if (m_current != nullptr)
{
row_handle rh{*m_category, *m_current};
return tuple_type{rh[m_column_ix[Is]].template as<Ts>()...};
}
return {};
}
category_type *m_category = nullptr;
row_type *m_current = nullptr;
value_type m_value;
std::array<size_t, N> m_column_ix;
};
template<typename Category>
class iterator_impl<Category>
{
public:
template <typename, typename...>
friend class iterator_impl;
friend class category;
using category_type = std::remove_cv_t<Category>;
using row_type = std::conditional_t<std::is_const_v<Category>, const row, row>;
using iterator_category = std::forward_iterator_tag;
using value_type = row_handle;
using difference_type = std::ptrdiff_t;
using pointer = row_handle;
using reference = row_handle;
iterator_impl() = default;
iterator_impl(const iterator_impl &rhs) = default;
template <typename C2>
iterator_impl(const iterator_impl<C2> &rhs)
: m_category(rhs.m_category)
, m_current(const_cast<row_type*>(rhs.m_current))
{
}
iterator_impl(Category &cat, row *current)
: m_category(const_cast<category_type *>(&cat))
, m_current(current)
{
}
template <typename IRowType>
iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<size_t, 0> &cix)
: m_category(rhs.m_category)
, m_current(rhs.m_current)
{
}
iterator_impl &operator=(const iterator_impl &i)
{
m_category = i.m_category;
m_current = i.m_current;
return *this;
}
virtual ~iterator_impl() = default;
reference operator*()
{
return {*m_category, *m_current};
}
pointer operator->()
{
return &m_current;
}
operator const row_handle() const
{
return { *m_category, *m_current };
}
operator row_handle()
{
return { *m_category, *m_current };
}
iterator_impl &operator++()
{
if (m_current != nullptr)
m_current = m_current->m_next;
return *this;
}
iterator_impl operator++(int)
{
iterator_impl result(*this);
this->operator++();
return result;
}
bool operator==(const iterator_impl &rhs) const { return m_current == rhs.m_current; }
bool operator!=(const iterator_impl &rhs) const { return m_current != rhs.m_current; }
template <typename IRowType, typename... ITs>
bool operator==(const iterator_impl<IRowType, ITs...> &rhs) const
{
return m_current == rhs.m_current;
}
template <typename IRowType, typename... ITs>
bool operator!=(const iterator_impl<IRowType, ITs...> &rhs) const
{
return m_current != rhs.m_current;
}
private:
category_type *m_category = nullptr;
row_type *m_current = nullptr;
};
template<typename Category, typename T>
class iterator_impl<Category, T>
{
public:
template <typename, typename...>
friend class iterator_impl;
friend class category;
using category_type = std::remove_cv_t<Category>;
using row_type = std::conditional_t<std::is_const_v<Category>, const row, row>;
using iterator_category = std::forward_iterator_tag;
using value_type = T;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
using reference = value_type &;
iterator_impl() = default;
iterator_impl(const iterator_impl &rhs) = default;
template <typename C2, typename T2>
iterator_impl(const iterator_impl<C2, T2> &rhs)
: m_category(rhs.m_category)
, m_current(rhs.m_current)
, m_value(rhs.m_value)
, m_column_ix(rhs.m_column_ix)
{
}
template <typename IRowType>
iterator_impl(iterator_impl<IRowType, T> &rhs)
: m_category(rhs.m_category)
, m_current(const_cast<row_type *>(rhs.m_current))
, m_value(rhs.m_value)
, m_column_ix(rhs.m_column_ix)
{
m_value = get(m_current);
}
template <typename IRowType>
iterator_impl(const iterator_impl<IRowType> &rhs, const std::array<size_t, 1> &cix)
: m_category(rhs.m_category)
, m_current(rhs.m_current)
, m_column_ix(cix[0])
{
m_value = get();
}
iterator_impl &operator=(const iterator_impl &i)
{
m_category = i.m_category;
m_current = i.m_current;
m_column_ix = i.m_column_ix;
m_value = i.m_value;
return *this;
}
virtual ~iterator_impl() = default;
reference operator*()
{
return m_value;
}
pointer operator->()
{
return &m_value;
}
operator const row_handle() const
{
return { *m_category, *m_current };
}
operator row_handle()
{
return { *m_category, *m_current };
}
iterator_impl &operator++()
{
if (m_current != nullptr)
m_current = m_current->m_next;
m_value = get();
return *this;
}
iterator_impl operator++(int)
{
iterator_impl result(*this);
this->operator++();
return result;
}
bool operator==(const iterator_impl &rhs) const { return m_current == rhs.m_current; }
bool operator!=(const iterator_impl &rhs) const { return m_current != rhs.m_current; }
template <typename IRowType, typename... ITs>
bool operator==(const iterator_impl<IRowType, ITs...> &rhs) const
{
return m_current == rhs.m_current;
}
template <typename IRowType, typename... ITs>
bool operator!=(const iterator_impl<IRowType, ITs...> &rhs) const
{
return m_current != rhs.m_current;
}
private:
value_type get() const
{
if (m_current != nullptr)
{
row_handle rh{*m_category, *m_current};
return rh[m_column_ix].template as<T>();
}
return {};
}
category_type *m_category = nullptr;
row_type *m_current = nullptr;
value_type m_value;
size_t m_column_ix;
};
// --------------------------------------------------------------------
// iterator proxy
template <typename Category, typename... Ts>
class iterator_proxy
{
public:
static constexpr const size_t N = sizeof...(Ts);
using category_type = Category;
using row_type = std::conditional_t<std::is_const_v<category_type>, const row, row>;
using iterator = iterator_impl<category_type, Ts...>;
using row_iterator = iterator_impl<category_type>;
iterator_proxy(category_type &cat, row_iterator pos, char const *const columns[N]);
iterator_proxy(category_type &cat, row_iterator pos, std::initializer_list<char const *> columns);
iterator_proxy(iterator_proxy &&p);
iterator_proxy &operator=(iterator_proxy &&p);
iterator_proxy(const iterator_proxy &) = delete;
iterator_proxy &operator=(const iterator_proxy &) = delete;
iterator begin() const { return iterator(m_begin, m_column_ix); }
iterator end() const { return iterator(m_end, m_column_ix); }
bool empty() const { return m_begin == m_end; }
explicit operator bool() const { return not empty(); }
size_t size() const { return std::distance(begin(), end()); }
// row front() { return *begin(); }
// row back() { return *(std::prev(end())); }
category_type &category() const { return *m_category; }
void swap(iterator_proxy &rhs)
{
std::swap(m_category, rhs.m_category);
std::swap(m_begin, rhs.m_begin);
std::swap(m_end, rhs.m_end);
std::swap(m_column_ix, rhs.m_column_ix);
}
private:
category_type *m_category;
row_iterator m_begin, m_end;
std::array<size_t, N> m_column_ix;
};
// --------------------------------------------------------------------
// conditional iterator proxy
template <typename CategoryType, typename... Ts>
class conditional_iterator_proxy
{
public:
static constexpr const size_t N = sizeof...(Ts);
using category_type = std::remove_cv_t<CategoryType>;
using base_iterator = iterator_impl<CategoryType, Ts...>;
using value_type = typename base_iterator::value_type;
using row_type = typename base_iterator::row_type;
using row_iterator = iterator_impl<CategoryType>;
class conditional_iterator_impl
{
public:
using iterator_category = std::forward_iterator_tag;
using value_type = conditional_iterator_proxy::value_type;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
using reference = value_type;
conditional_iterator_impl(CategoryType &cat, row_iterator pos, const condition &cond, const std::array<size_t, N> &cix);
conditional_iterator_impl(const conditional_iterator_impl &i) = default;
conditional_iterator_impl &operator=(const conditional_iterator_impl &i) = default;
virtual ~conditional_iterator_impl() = default;
reference operator*()
{
return *mBegin;
}
pointer operator->()
{
return &*mBegin;
}
conditional_iterator_impl &operator++()
{
while (mBegin != mEnd)
{
if (++mBegin == mEnd)
break;
if (m_condition->operator()(mBegin))
break;
}
return *this;
}
conditional_iterator_impl operator++(int)
{
conditional_iterator_impl result(*this);
this->operator++();
return result;
}
bool operator==(const conditional_iterator_impl &rhs) const { return mBegin == rhs.mBegin; }
bool operator!=(const conditional_iterator_impl &rhs) const { return mBegin != rhs.mBegin; }
template <typename IRowType, typename... ITs>
bool operator==(const iterator_impl<IRowType, ITs...> &rhs) const { return mBegin == rhs; }
template <typename IRowType, typename... ITs>
bool operator!=(const iterator_impl<IRowType, ITs...> &rhs) const { return mBegin != rhs; }
private:
CategoryType *mCat;
base_iterator mBegin, mEnd;
const condition *m_condition;
};
using iterator = conditional_iterator_impl;
using reference = typename iterator::reference;
template <typename... Ns>
conditional_iterator_proxy(CategoryType &cat, row_iterator pos, condition &&cond, Ns... names);
conditional_iterator_proxy(conditional_iterator_proxy &&p);
conditional_iterator_proxy &operator=(conditional_iterator_proxy &&p);
conditional_iterator_proxy(const conditional_iterator_proxy &) = delete;
conditional_iterator_proxy &operator=(const conditional_iterator_proxy &) = delete;
iterator begin() const;
iterator end() const;
bool empty() const;
explicit operator bool() const { return not empty(); }
size_t size() const { return std::distance(begin(), end()); }
row_handle front() { return *begin(); }
// row_handle back() { return *begin(); }
CategoryType &category() const { return *m_cat; }
void swap(conditional_iterator_proxy &rhs);
private:
CategoryType *m_cat;
condition m_condition;
row_iterator mCBegin, mCEnd;
std::array<size_t, N> mCix;
};
// --------------------------------------------------------------------
template <typename Category, typename... Ts>
iterator_proxy<Category, Ts...>::iterator_proxy(Category &cat, row_iterator pos, char const *const columns[N])
: m_category(&cat)
, m_begin(pos)
, m_end(cat.end())
{
for (size_t i = 0; i < N; ++i)
m_column_ix[i] = m_category->get_column_ix(columns[i]);
}
template <typename Category, typename... Ts>
iterator_proxy<Category, Ts...>::iterator_proxy(Category &cat, row_iterator pos, std::initializer_list<char const *> columns)
: m_category(&cat)
, m_begin(pos)
, m_end(cat.end())
{
// static_assert(columns.size() == N, "The list of column names should be exactly the same as the list of requested columns");
std::size_t i = 0;
for (auto column : columns)
m_column_ix[i++] = m_category->get_column_ix(column);
}
// --------------------------------------------------------------------
template <typename Category, typename... Ts>
conditional_iterator_proxy<Category, Ts...>::conditional_iterator_impl::conditional_iterator_impl(
Category &cat, row_iterator pos, const condition &cond, const std::array<size_t, N> &cix)
: mCat(&cat)
, mBegin(pos, cix)
, mEnd(cat.end(), cix)
, m_condition(&cond)
{
}
template <typename Category, typename... Ts>
conditional_iterator_proxy<Category, Ts...>::conditional_iterator_proxy(conditional_iterator_proxy &&p)
: m_cat(nullptr)
, mCBegin(p.mCBegin)
, mCEnd(p.mCEnd)
, mCix(p.mCix)
{
std::swap(m_cat, p.m_cat);
std::swap(mCix, p.mCix);
m_condition.swap(p.m_condition);
}
template <typename Category, typename... Ts>
template <typename... Ns>
conditional_iterator_proxy<Category, Ts...>::conditional_iterator_proxy(Category &cat, row_iterator pos, condition &&cond, Ns... names)
: m_cat(&cat)
, m_condition(std::move(cond))
, mCBegin(pos)
, mCEnd(cat.end())
{
static_assert(sizeof...(Ts) == sizeof...(Ns), "Number of column names should be equal to number of requested value types");
m_condition.prepare(cat);
while (mCBegin != mCEnd and not m_condition(*mCBegin))
++mCBegin;
size_t i = 0;
((mCix[i++] = m_cat->get_column_ix(names)), ...);
}
template <typename Category, typename... Ts>
conditional_iterator_proxy<Category, Ts...> &conditional_iterator_proxy<Category, Ts...>::operator=(conditional_iterator_proxy &&p)
{
swap(p);
return *this;
}
template <typename Category, typename... Ts>
typename conditional_iterator_proxy<Category, Ts...>::iterator conditional_iterator_proxy<Category, Ts...>::begin() const
{
return iterator(*m_cat, mCBegin, m_condition, mCix);
}
template <typename Category, typename... Ts>
typename conditional_iterator_proxy<Category, Ts...>::iterator conditional_iterator_proxy<Category, Ts...>::end() const
{
return iterator(*m_cat, mCEnd, m_condition, mCix);
}
template <typename Category, typename... Ts>
bool conditional_iterator_proxy<Category, Ts...>::empty() const
{
return mCBegin == mCEnd;
}
template <typename Category, typename... Ts>
void conditional_iterator_proxy<Category, Ts...>::swap(conditional_iterator_proxy &rhs)
{
std::swap(m_cat, rhs.m_cat);
m_condition.swap(rhs.m_condition);
std::swap(mCBegin, rhs.mCBegin);
std::swap(mCEnd, rhs.mCEnd);
std::swap(mCix, rhs.mCix);
}
} // namespace cif

79
include/cif++/list.hpp Normal file
View File

@@ -0,0 +1,79 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <memory>
namespace cif
{
// --------------------------------------------------------------------
template<typename Allocator = std::allocator<void>>
class list
{
public:
protected:
struct list_item
{
list_item *m_next = nullptr;
};
using list_item_allocator_type = typename std::allocator_traits<Alloc>::template rebind_alloc<list_item>;
using list_item_allocator_traits = std::allocator_traits<item_allocator_type>;
list_item_allocator_traits::pointer get_item()
{
list_item_allocator_type ia(get_allocator());
return list_item_allocator_traits::allocate(ia, 1);
}
template<typename ...Arguments>
list_item *create_list_item(uint16_t column_ix, Arguments... args)
{
auto p = this->get_item();
list_item_allocator_type ia(get_allocator());
list_item_allocator_traits::construct(ia, p, std::forward<Arguments>(args)...);
return p;
}
void delete_list_item(list_item *iv)
{
list_item_allocator_type ia(get_allocator());
list_item_allocator_traits::destroy(ia, iv);
list_item_allocator_traits::deallocate(ia, iv, 1);
}
list_item *m_head = nullptr, *m_tail = nullptr;
};
} // namespace cif

928
include/cif++/model.hpp Normal file
View File

@@ -0,0 +1,928 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <numeric>
#include <cif++/atom_type.hpp>
#if __cpp_lib_format
#include <format>
#endif
namespace cif::mm
{
class atom;
class residue;
class monomer;
class polymer;
class structure;
// --------------------------------------------------------------------
class atom
{
private:
struct atom_impl : public std::enable_shared_from_this<atom_impl>
{
atom_impl(datablock &db, std::string_view id)
: m_db(db)
, m_cat(db["atom_site"])
, m_id(id)
{
auto r = row();
if (r)
tie(m_location.m_x, m_location.m_y, m_location.m_z) = r.get("Cartn_x", "Cartn_y", "Cartn_z");
}
// constructor for a symmetry copy of an atom
atom_impl(const atom_impl &impl, const point &loc, const std::string &sym_op)
: atom_impl(impl)
{
m_location = loc;
m_symop = sym_op;
}
atom_impl(const atom_impl &i) = default;
void prefetch();
int compare(const atom_impl &b) const;
// bool getAnisoU(float anisou[6]) const;
int get_charge() const;
void moveTo(const point &p);
// const compound *compound() const;
std::string get_property(std::string_view name) const;
int get_property_int(std::string_view name) const;
float get_property_float(std::string_view name) const;
void set_property(const std::string_view name, const std::string &value);
row_handle row()
{
return m_cat[{{"id", m_id}}];
}
const row_handle row() const
{
return m_cat[{{"id", m_id}}];
}
row_handle row_aniso()
{
auto cat = m_db.get("atom_site_anisotrop");
return cat ? cat->find1(key("id") == m_id) : row_handle{};
}
const row_handle row_aniso() const
{
auto cat = m_db.get("atom_site_anisotrop");
return cat ? cat->find1(key("id") == m_id) : row_handle{};
}
const datablock &m_db;
category &m_cat;
std::string m_id;
point m_location;
std::string m_symop = "1_555";
};
public:
atom() {}
atom(std::shared_ptr<atom_impl> impl)
: m_impl(impl)
{
}
atom(const atom &rhs)
: m_impl(rhs.m_impl)
{
}
atom(datablock &db, row_handle &row)
: atom(std::make_shared<atom_impl>(db, row["id"].as<std::string>()))
{
}
// a special constructor to create symmetry copies
atom(const atom &rhs, const point &symmmetry_location, const std::string &symmetry_operation)
: atom(std::make_shared<atom_impl>(*rhs.m_impl, symmmetry_location, symmetry_operation))
{
}
explicit operator bool() const { return (bool)m_impl; }
// // return a copy of this atom, with data copied instead of referenced
// atom clone() const
// {
// auto copy = std::make_shared<atom_impl>(*m_impl);
// copy->mClone = true;
// return atom(copy);
// }
atom &operator=(const atom &rhs) = default;
// template <typename T>
// T get_property(const std::string_view name) const;
std::string get_property(std::string_view name) const
{
if (not m_impl)
throw std::logic_error("Error trying to fetch a property from an uninitialized atom");
return m_impl->get_property(name);
}
int get_property_int(std::string_view name) const
{
if (not m_impl)
throw std::logic_error("Error trying to fetch a property from an uninitialized atom");
return m_impl->get_property_int(name);
}
float get_property_float(std::string_view name) const
{
if (not m_impl)
throw std::logic_error("Error trying to fetch a property from an uninitialized atom");
return m_impl->get_property_float(name);
}
void set_property(const std::string_view name, const std::string &value)
{
if (not m_impl)
throw std::logic_error("Error trying to modify an uninitialized atom");
m_impl->set_property(name, value);
}
template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
void set_property(const std::string_view name, const T &value)
{
set_property(name, std::to_string(value));
}
const std::string &id() const { return impl().m_id; }
cif::atom_type get_type() const { return atom_type_traits(get_property("type_symbol")).type(); }
point get_location() const { return impl().m_location; }
void set_location(point p)
{
if (not m_impl)
throw std::logic_error("Error trying to modify an uninitialized atom");
m_impl->moveTo(p);
}
/// \brief Translate the position of this atom by \a t
void translate(point t)
{
set_location(get_location() + t);
}
/// \brief Rotate the position of this atom by \a q
void rotate(quaternion q)
{
auto loc = get_location();
loc.rotate(q);
set_location(loc);
}
/// \brief Translate and rotate the position of this atom by \a t and \a q
void translate_and_rotate(point t, quaternion q)
{
auto loc = get_location();
loc += t;
loc.rotate(q);
set_location(loc);
}
/// \brief Translate, rotate and translate again the coordinates this atom by \a t1 , \a q and \a t2
void translate_rotate_and_translate(point t1, quaternion q, point t2)
{
auto loc = get_location();
loc += t1;
loc.rotate(q);
loc += t2;
set_location(loc);
}
// for direct access to underlying data, be careful!
const row_handle get_row() const { return impl().row(); }
const row_handle get_row_aniso() const { return impl().row_aniso(); }
bool is_symmetry_copy() const { return impl().m_symop != "1_555"; }
std::string symmetry() const { return impl().m_symop; }
// const compound &compound() const;
bool is_water() const
{
auto comp_id = get_label_comp_id();
return comp_id == "HOH" or comp_id == "H2O" or comp_id == "WAT";
}
int get_charge() const { return impl().get_charge(); }
// float uIso() const;
// bool getAnisoU(float anisou[6]) const { return impl().getAnisoU(anisou); }
float get_occupancy() const { return get_property_float("occupancy"); }
// specifications
std::string get_label_asym_id() const { return get_property("label_asym_id"); }
int get_label_seq_id() const { return get_property_int("label_seq_id"); }
std::string get_label_atom_id() const { return get_property("label_atom_id"); }
std::string get_label_alt_id() const { return get_property("label_alt_id"); }
std::string get_label_comp_id() const { return get_property("label_comp_id"); }
std::string get_label_entity_id() const { return get_property("label_entity_id"); }
std::string get_auth_asym_id() const { return get_property("auth_asym_id"); }
std::string get_auth_seq_id() const { return get_property("auth_seq_id"); }
std::string get_auth_atom_id() const { return get_property("auth_atom_id"); }
std::string get_auth_alt_id() const { return get_property("auth_alt_id"); }
std::string get_auth_comp_id() const { return get_property("auth_comp_id"); }
std::string get_pdb_ins_code() const { return get_property("pdbx_PDB_ins_code"); }
bool is_alternate() const { return not get_label_alt_id().empty(); }
// std::string labelID() const; // label_comp_id + '_' + label_asym_id + '_' + label_seq_id
std::string pdb_id() const
{
return get_label_comp_id() + '_' + get_auth_asym_id() + '_' + get_auth_seq_id() + get_pdb_ins_code();
}
bool operator==(const atom &rhs) const
{
if (m_impl == rhs.m_impl)
return true;
if (not(m_impl and rhs.m_impl))
return false;
return &m_impl->m_db == &rhs.m_impl->m_db and m_impl->m_id == rhs.m_impl->m_id;
}
bool operator!=(const atom &rhs) const
{
return not operator==(rhs);
}
// // access data in compound for this atom
// convenience routine
bool is_back_bone() const
{
auto atomID = get_label_atom_id();
return atomID == "N" or atomID == "O" or atomID == "C" or atomID == "CA";
}
void swap(atom &b)
{
std::swap(m_impl, b.m_impl);
}
int compare(const atom &b) const { return impl().compare(*b.m_impl); }
bool operator<(const atom &rhs) const
{
return compare(rhs) < 0;
}
friend std::ostream &operator<<(std::ostream &os, const atom &atom);
// /// \brief Synchronize data with underlying cif data
// void sync()
// {
// if (m_impl)
// m_impl->prefetch();
// }
private:
friend class structure;
const atom_impl &impl() const
{
if (not m_impl)
throw std::runtime_error("Uninitialized atom, not found?");
return *m_impl;
}
std::shared_ptr<atom_impl> m_impl;
};
// template <>
// inline std::string atom::get_property<std::string>(const std::string_view name) const
// {
// return get_property(name);
// }
// template <>
// inline int atom::get_property<int>(const std::string_view name) const
// {
// auto v = impl().get_property(name);
// return v.empty() ? 0 : stoi(v);
// }
// template <>
// inline float atom::get_property<float>(const std::string_view name) const
// {
// return stof(impl().get_property(name));
// }
inline void swap(atom &a, atom &b)
{
a.swap(b);
}
inline float distance(const atom &a, const atom &b)
{
return distance(a.get_location(), b.get_location());
}
inline float distance_squared(const atom &a, const atom &b)
{
return distance_squared(a.get_location(), b.get_location());
}
// --------------------------------------------------------------------
enum class EntityType
{
polymer,
NonPolymer,
Macrolide,
Water,
Branched
};
// --------------------------------------------------------------------
class residue
{
public:
friend class structure;
// constructor
residue(const structure &structure, const std::string &compoundID,
const std::string &asymID, int seqID,
const std::string &authAsymID, const std::string &authSeqID,
const std::string &pdbInsCode)
: m_structure(&structure)
, m_compound_id(compoundID)
, m_asym_id(asymID)
, m_seq_id(seqID)
, m_auth_asym_id(authAsymID)
, m_auth_seq_id(authSeqID)
, m_pdb_ins_code(pdbInsCode)
{
}
residue(const residue &rhs) = delete;
residue &operator=(const residue &rhs) = delete;
residue(residue &&rhs) = default;
residue &operator=(residue &&rhs) = default;
virtual ~residue() = default;
std::string get_entity_id() const;
EntityType entity_type() const;
const std::string &get_asym_id() const { return m_asym_id; }
int get_seq_id() const { return m_seq_id; }
const std::string get_auth_asym_id() const { return m_auth_asym_id; }
const std::string get_auth_seq_id() const { return m_auth_seq_id; }
std::string get_pdb_ins_code() const { return m_pdb_ins_code; }
const std::string &get_compound_id() const { return m_compound_id; }
void set_compound_id(const std::string &id) { m_compound_id = id; }
const structure *get_structure() const { return m_structure; }
// const compound &compound() const;
std::vector<atom> &atoms()
{
return m_atoms;
}
const std::vector<atom> &atoms() const
{
return m_atoms;
}
void add_atom(atom &atom);
/// \brief Unique atoms returns only the atoms without alternates and the first of each alternate atom id.
std::vector<atom> unique_atoms() const;
/// \brief The alt ID used for the unique atoms
std::string unique_alt_id() const;
atom get_atom_by_atom_id(const std::string &atomID) const;
// Is this residue a single entity?
bool is_entity() const;
bool is_water() const { return m_compound_id == "HOH"; }
// bool empty() const { return m_structure == nullptr; }
bool has_alternate_atoms() const;
/// \brief Return the list of unique alt ID's present in this residue
std::set<std::string> get_alternate_ids() const;
/// \brief Return the list of unique atom ID's
std::set<std::string> get_atom_ids() const;
/// \brief Return the list of atoms having ID \a atomID
std::vector<atom> get_atoms_by_id(const std::string &atomID) const;
// some routines for 3d work
std::tuple<point, float> center_and_radius() const;
friend std::ostream &operator<<(std::ostream &os, const residue &res);
bool operator==(const residue &rhs) const
{
return this == &rhs or (m_structure == rhs.m_structure and
m_seq_id == rhs.m_seq_id and
m_asym_id == rhs.m_asym_id and
m_compound_id == rhs.m_compound_id and
m_auth_seq_id == rhs.m_auth_seq_id);
}
protected:
residue() {}
const structure *m_structure = nullptr;
std::string m_compound_id, m_asym_id;
int m_seq_id = 0;
std::string m_auth_asym_id, m_auth_seq_id, m_pdb_ins_code;
std::vector<atom> m_atoms;
};
// --------------------------------------------------------------------
// a monomer models a single residue in a protein chain
class monomer : public residue
{
public:
// monomer();
monomer(const monomer &rhs) = delete;
monomer &operator=(const monomer &rhs) = delete;
monomer(monomer &&rhs);
monomer &operator=(monomer &&rhs);
monomer(const polymer &polymer, size_t index, int seqID, const std::string &authSeqID,
const std::string &pdbInsCode, const std::string &compoundID);
bool is_first_in_chain() const;
bool is_last_in_chain() const;
// convenience
bool has_alpha() const;
bool has_kappa() const;
// Assuming this is really an amino acid...
float phi() const;
float psi() const;
float alpha() const;
float kappa() const;
float tco() const;
float omega() const;
// torsion angles
size_t nr_of_chis() const;
float chi(size_t i) const;
bool is_cis() const;
/// \brief Returns true if the four atoms C, CA, N and O are present
bool is_complete() const;
/// \brief Returns true if any of the backbone atoms has an alternate
bool has_alternate_backbone_atoms() const;
atom CAlpha() const { return get_atom_by_atom_id("CA"); }
atom C() const { return get_atom_by_atom_id("C"); }
atom N() const { return get_atom_by_atom_id("N"); }
atom O() const { return get_atom_by_atom_id("O"); }
atom H() const { return get_atom_by_atom_id("H"); }
bool is_bonded_to(const monomer &rhs) const
{
return this != &rhs and are_bonded(*this, rhs);
}
static bool are_bonded(const monomer &a, const monomer &b, float errorMargin = 0.5f);
static bool is_cis(const monomer &a, const monomer &b);
static float omega(const monomer &a, const monomer &b);
// for LEU and VAL
float chiral_volume() const;
bool operator==(const monomer &rhs) const
{
return m_polymer == rhs.m_polymer and m_index == rhs.m_index;
}
private:
const polymer *m_polymer;
size_t m_index;
};
// --------------------------------------------------------------------
class polymer : public std::vector<monomer>
{
public:
polymer(const structure &s, const std::string &entityID, const std::string &asymID, const std::string &auth_asym_id);
polymer(const polymer &) = delete;
polymer &operator=(const polymer &) = delete;
// monomer &getBySeqID(int seqID);
// const monomer &getBySeqID(int seqID) const;
const structure *get_structure() const { return m_structure; }
std::string get_asym_id() const { return m_asym_id; }
std::string get_auth_asym_id() const { return m_auth_asym_id; } // The PDB chain ID, actually
std::string get_entity_id() const { return m_entity_id; }
// int Distance(const monomer &a, const monomer &b) const;
private:
const structure *m_structure;
std::string m_entity_id;
std::string m_asym_id;
std::string m_auth_asym_id;
};
// --------------------------------------------------------------------
// sugar and branch, to describe glycosylation sites
class branch;
class sugar : public residue
{
public:
sugar(const branch &branch, const std::string &compoundID,
const std::string &asymID, int authSeqID);
sugar(sugar &&rhs);
sugar &operator=(sugar &&rhs);
int num() const {
int result;
auto r = std::from_chars(m_auth_seq_id.data(), m_auth_seq_id.data() + m_auth_seq_id.length(), result);
if (r.ec != std::errc())
throw std::runtime_error("The auth_seq_id should be a number for a sugar");
return result;
}
std::string name() const;
/// \brief Return the atom the C1 is linked to
atom get_link() const { return m_link; }
void set_link(atom link) { m_link = link; }
size_t get_link_nr() const
{
size_t result = 0;
if (m_link)
result = m_link.get_property_int("auth_seq_id");
return result;
}
private:
const branch *m_branch;
atom m_link;
};
class branch : public std::vector<sugar>
{
public:
branch(structure &structure, const std::string &asymID);
void link_atoms();
std::string name() const;
float weight() const;
std::string get_asym_id() const { return m_asym_id; }
structure &get_structure() { return *m_structure; }
const structure &get_structure() const { return *m_structure; }
sugar &getSugarByNum(int nr);
const sugar &getSugarByNum(int nr) const;
private:
friend sugar;
std::string name(const sugar &s) const;
structure *m_structure;
std::string m_asym_id;
};
// // --------------------------------------------------------------------
// // file is a reference to the data stored in e.g. the cif file.
// // This object is not copyable.
// class File : public file
// {
// public:
// File() {}
// // File(const std::filesystem::path &path)
// // {
// // load(path);
// // }
// // File(const char *data, size_t length)
// // {
// // load(data, length);
// // }
// File(const File &) = delete;
// File &operator=(const File &) = delete;
// // void load(const std::filesystem::path &p) override;
// // void save(const std::filesystem::path &p) override;
// // using file::load;
// // using file::save;
// datablock &data() { return front(); }
// };
// --------------------------------------------------------------------
enum class StructureOpenOptions
{
SkipHydrogen = 1 << 0
};
inline bool operator&(StructureOpenOptions a, StructureOpenOptions b)
{
return static_cast<int>(a) bitand static_cast<int>(b);
}
// --------------------------------------------------------------------
class structure
{
public:
structure(file &p, size_t modelNr = 1, StructureOpenOptions options = {});
structure(datablock &db, size_t modelNr = 1, StructureOpenOptions options = {});
structure(structure &&s) = default;
// Create a read-only clone of the current structure (for multithreaded calculations that move atoms)
// NOTE: removed, simply create a new structure for each thread
structure(const structure &) = delete;
structure &operator=(const structure &) = delete;
// Structure &operator=(Structure &&s) = default;
~structure() = default;
size_t get_model_nr() const { return m_model_nr; }
const std::vector<atom> &atoms() const { return m_atoms; }
// std::vector<atom> &atoms() { return m_atoms; }
EntityType get_entity_type_for_entity_id(const std::string entityID) const;
EntityType get_entity_type_for_asym_id(const std::string asymID) const;
// std::vector<atom> waters() const;
const std::list<polymer> &polymers() const { return m_polymers; }
std::list<polymer> &polymers() { return m_polymers; }
polymer &get_polymer_by_asym_id(const std::string &asymID);
const polymer &get_polymer_by_asym_id(const std::string &asymID) const
{
return const_cast<structure *>(this)->get_polymer_by_asym_id(asymID);
}
const std::list<branch> &branches() const { return m_branches; }
std::list<branch> &branches() { return m_branches; }
branch &get_branch_by_asym_id(const std::string &asymID);
const branch &get_branch_by_asym_id(const std::string &asymID) const;
const std::vector<residue> &non_polymers() const { return m_non_polymers; }
atom get_atom_by_id(const std::string &id) const;
// atom getAtomByLocation(point pt, float maxDistance) const;
atom get_atom_by_label(const std::string &atomID, const std::string &asymID,
const std::string &compID, int seqID, const std::string &altID = "");
// /// \brief Return the atom closest to point \a p
atom get_atom_by_position(point p) const;
/// \brief Return the atom closest to point \a p with atom type \a type in a residue of type \a res_type
atom get_atom_by_position_and_type(point p, std::string_view type, std::string_view res_type) const;
/// \brief Get a non-poly residue for an asym with id \a asymID
residue &get_residue(const std::string &asymID)
{
return get_residue(asymID, 0, "");
}
/// \brief Get a non-poly residue for an asym with id \a asymID
const residue &get_residue(const std::string &asymID) const
{
return get_residue(asymID, 0, "");
}
/// \brief Get a residue for an asym with id \a asymID seq id \a seqID and authSeqID \a authSeqID
residue &get_residue(const std::string &asymID, int seqID, const std::string &authSeqID);
/// \brief Get a the single residue for an asym with id \a asymID seq id \a seqID and authSeqID \a authSeqID
const residue &get_residue(const std::string &asymID, int seqID, const std::string &authSeqID) const
{
return const_cast<structure *>(this)->get_residue(asymID, seqID, authSeqID);
}
/// \brief Get a residue for an asym with id \a asymID, compound id \a compID, seq id \a seqID and authSeqID \a authSeqID
residue &get_residue(const std::string &asymID, const std::string &compID, int seqID, const std::string &authSeqID);
/// \brief Get a residue for an asym with id \a asymID, compound id \a compID, seq id \a seqID and authSeqID \a authSeqID
const residue &get_residue(const std::string &asymID, const std::string &compID, int seqID, const std::string &authSeqID) const
{
return const_cast<structure *>(this)->get_residue(asymID, compID, seqID, authSeqID);
}
/// \brief Get a the residue for atom \a atom
residue &get_residue(const atom &atom)
{
return get_residue(atom.get_label_asym_id(), atom.get_label_comp_id(), atom.get_label_seq_id(), atom.get_auth_seq_id());
}
/// \brief Get a the residue for atom \a atom
const residue &get_residue(const atom &atom) const
{
return get_residue(atom.get_label_asym_id(), atom.get_label_comp_id(), atom.get_label_seq_id(), atom.get_auth_seq_id());
}
// Actions
void remove_atom(atom &a)
{
remove_atom(a, true);
}
void swap_atoms(atom a1, atom a2); // swap the labels for these atoms
void move_atom(atom a, point p); // move atom to a new location
void change_residue(residue &res, const std::string &newcompound,
const std::vector<std::tuple<std::string, std::string>> &remappedAtoms);
/// \brief Remove a residue, can be monomer or nonpoly
///
/// \param asym_id The asym ID
/// \param seq_id The sequence ID
void remove_residue(const std::string &asym_id, int seq_id, const std::string &auth_seq_id)
{
remove_residue(get_residue(asym_id, seq_id, auth_seq_id));
}
/// \brief Create a new non-polymer entity, returns new ID
/// \param mon_id The mon_id for the new nonpoly, must be an existing and known compound from CCD
/// \return The ID of the created entity
std::string create_non_poly_entity(const std::string &mon_id);
/// \brief Create a new NonPolymer struct_asym with atoms constructed from \a atoms, returns asym_id.
/// This method assumes you are copying data from one cif file to another.
///
/// \param entity_id The entity ID of the new nonpoly
/// \param atoms The array of atom_site rows containing the data.
/// \return The newly create asym ID
std::string create_non_poly(const std::string &entity_id, const std::vector<atom> &atoms);
/// \brief Create a new NonPolymer struct_asym with atoms constructed from info in \a atom_info, returns asym_id.
/// This method creates new atom records filled with info from the info.
///
/// \param entity_id The entity ID of the new nonpoly
/// \param atoms The array of sets of item data containing the data for the atoms.
/// \return The newly create asym ID
std::string create_non_poly(const std::string &entity_id, std::vector<row_initializer> atoms);
/// \brief Create a new (sugar) branch with one first NAG containing atoms constructed from \a atoms
branch &create_branch(std::vector<row_initializer> atoms);
/// \brief Extend an existing (sugar) branch identified by \a asymID with one sugar containing atoms constructed from \a atom_info
///
/// \param asym_id The asym id of the branch to extend
/// \param atom_info Array containing the info for the atoms to construct for the new sugar
/// \param link_sugar The sugar to link to, note: this is the sugar number (1 based)
/// \param link_atom The atom id of the atom linked in the sugar
branch &extend_branch(const std::string &asym_id, std::vector<row_initializer> atom_info,
int link_sugar, const std::string &link_atom);
/// \brief Remove \a branch
void remove_branch(branch &branch);
/// \brief Remove residue \a res
///
/// \param res The residue to remove
void remove_residue(residue &res);
/// \brief Translate the coordinates of all atoms in the structure by \a t
void translate(point t);
/// \brief Rotate the coordinates of all atoms in the structure by \a q
void rotate(quaternion t);
/// \brief Translate and rotate the coordinates of all atoms in the structure by \a t and \a q
void translate_and_rotate(point t, quaternion q);
/// \brief Translate, rotate and translate again the coordinates of all atoms in the structure by \a t1 , \a q and \a t2
void translate_rotate_and_translate(point t1, quaternion q, point t2);
void cleanup_empty_categories();
/// \brief Direct access to underlying data
category &get_category(std::string_view name) const
{
return m_db[name];
}
datablock &get_datablock() const
{
return m_db;
}
void validate_atoms() const;
private:
friend polymer;
friend residue;
std::string insert_compound(const std::string &compoundID, bool is_entity);
std::string create_entity_for_branch(branch &branch);
void load_data();
void load_atoms_for_model(StructureOpenOptions options);
template <typename... Args>
atom &emplace_atom(Args... args)
{
return emplace_atom(atom{ std::forward<Args>(args)... });
}
atom &emplace_atom(atom &&atom);
void remove_atom(atom &a, bool removeFromResidue);
void remove_sugar(sugar &sugar);
datablock &m_db;
size_t m_model_nr;
std::vector<atom> m_atoms;
std::vector<size_t> m_atom_index;
std::list<polymer> m_polymers;
std::list<branch> m_branches;
std::vector<residue> m_non_polymers;
};
} // namespace cif::mm

293
include/cif++/parser.hpp Normal file
View File

@@ -0,0 +1,293 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <map>
#include <cif++/row.hpp>
namespace cif
{
// --------------------------------------------------------------------
class parse_error : public std::runtime_error
{
public:
parse_error(uint32_t line_nr, const std::string &message)
: std::runtime_error("parse error at line " + std::to_string(line_nr) + ": " + message)
{
}
};
// --------------------------------------------------------------------
// TODO: Need to implement support for transformed long lines
class sac_parser
{
public:
using datablock_index = std::map<std::string, std::size_t>;
sac_parser(std::istream &is, bool init = true);
virtual ~sac_parser() = default;
enum CharTraitsMask : uint8_t
{
kOrdinaryMask = 1 << 0,
kNonBlankMask = 1 << 1,
kTextLeadMask = 1 << 2,
kAnyPrintMask = 1 << 3
};
static bool is_white(int ch)
{
return std::isspace(ch) or ch == '#';
}
static constexpr bool is_ordinary(int ch)
{
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kOrdinaryMask) != 0;
}
static constexpr bool is_non_blank(int ch)
{
return ch > 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kNonBlankMask) != 0;
}
static constexpr bool is_text_lead(int ch)
{
return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kTextLeadMask) != 0;
}
static constexpr bool is_any_print(int ch)
{
return ch == '\t' or
(ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
}
static bool is_unquoted_string(std::string_view text)
{
auto s = text.begin();
bool result = is_ordinary(*s++);
while (result and s != text.end())
{
result = is_non_blank(*s);
++s;
}
// but be careful it does not contain e.g. stop_
if (result)
{
static const std::regex reservedRx(R"((^(?:data|save)|.*(?:loop|stop|global))_.+)", std::regex_constants::icase);
result = not std::regex_match(text.begin(), text.end(), reservedRx);
}
return result;
}
protected:
static constexpr uint8_t kCharTraitsTable[128] = {
// 0 1 2 3 4 5 6 7 8 9 a b c d e f
14, 15, 14, 14, 14, 15, 15, 14, 15, 15, 15, 15, 15, 15, 15, 15, // 2
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 10, 15, 15, 15, 15, // 3
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 4
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 14, 15, 14, 15, 14, // 5
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, // 6
15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 0, // 7
};
enum class CIFToken
{
Unknown,
Eof,
DATA,
LOOP,
GLOBAL,
SAVE,
STOP,
Tag,
Value
};
static constexpr const char *get_token_name(CIFToken token)
{
switch (token)
{
case CIFToken::Unknown: return "Unknown";
case CIFToken::Eof: return "Eof";
case CIFToken::DATA: return "DATA";
case CIFToken::LOOP: return "LOOP";
case CIFToken::GLOBAL: return "GLOBAL";
case CIFToken::SAVE: return "SAVE";
case CIFToken::STOP: return "STOP";
case CIFToken::Tag: return "Tag";
case CIFToken::Value: return "Value";
default: return "Invalid token parameter";
}
}
enum class CIFValue
{
Int,
Float,
Numeric,
String,
TextField,
Inapplicable,
Unknown
};
static constexpr const char *get_value_name(CIFValue type)
{
switch (type)
{
case CIFValue::Int: return "Int";
case CIFValue::Float: return "Float";
case CIFValue::Numeric: return "Numeric";
case CIFValue::String: return "String";
case CIFValue::TextField: return "TextField";
case CIFValue::Inapplicable: return "Inapplicable";
case CIFValue::Unknown: return "Unknown";
default: return "Invalid type parameter";
}
}
// get_next_char takes a char from the buffer, or if it is empty
// from the istream. This function also does carriage/linefeed
// translation.
int get_next_char();
void retract();
int restart(int start);
CIFToken get_next_token();
void match(CIFToken token);
public:
bool parse_single_datablock(const std::string &datablock);
datablock_index index_datablocks();
bool parse_single_datablock(const std::string &datablock, const datablock_index &index);
void parse_file();
protected:
void parse_global();
void parse_datablock();
virtual void parse_save_frame();
void error(const std::string &msg)
{
if (cif::VERBOSE > 0)
std::cerr << "Error parsing mmCIF: " << msg << std::endl;
throw parse_error(m_line_nr, msg);
}
void warning(const std::string &msg)
{
if (cif::VERBOSE > 0)
std::cerr << "parser warning at line " << m_line_nr << ": " << msg << std::endl;
}
// production methods, these are pure virtual here
virtual void produce_datablock(const std::string &name) = 0;
virtual void produce_category(const std::string &name) = 0;
virtual void produce_row() = 0;
virtual void produce_item(const std::string &category, const std::string &item, const std::string &value) = 0;
protected:
enum State
{
Start,
White,
Esc,
Comment,
QuestionMark,
Dot,
QuotedString,
QuotedStringQuote,
UnquotedString,
Tag,
TextField,
Float = 100,
Int = 110,
Value = 300,
DATA,
SAVE
};
std::streambuf &m_source;
// Parser state
bool m_validate;
uint32_t m_line_nr;
bool m_bol;
CIFToken m_lookahead;
std::string m_token_value;
CIFValue mTokenType;
std::vector<int> m_buffer; // retract buffer, used to be a stack<char>
};
// --------------------------------------------------------------------
class parser : public sac_parser
{
public:
parser(std::istream &is, file &file)
: sac_parser(is)
, m_file(file)
{
}
void produce_datablock(const std::string &name) override;
void produce_category(const std::string &name) override;
void produce_row() override;
void produce_item(const std::string &category, const std::string &item, const std::string &value) override;
protected:
file &m_file;
datablock *m_datablock = nullptr;
category *m_category = nullptr;
row_handle m_row;
};
} // namespace cif

View File

@@ -0,0 +1,42 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cif++.hpp>
namespace cif::pdb
{
/// \brief Just the HEADER, COMPND, SOURCE and AUTHOR lines
void write_header_lines(std::ostream &os, const datablock &data);
std::string get_HEADER_line(const datablock &data, std::string::size_type truncate_at = 127);
std::string get_COMPND_line(const datablock &data, std::string::size_type truncate_at = 127);
std::string get_SOURCE_line(const datablock &data, std::string::size_type truncate_at = 127);
std::string get_AUTHOR_line(const datablock &data, std::string::size_type truncate_at = 127);
} // namespace pdbx

60
include/cif++/pdb/io.hpp Normal file
View File

@@ -0,0 +1,60 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cif++.hpp>
namespace cif::pdb
{
/// \brief Read a file in either mmCIF or PDB format, compressed or not,
/// depending on the content.
file read(const std::filesystem::path &file);
/// \brief Read a file in either mmCIF or PDB format, compressed or not,
/// depending on the content.
file read(std::istream &is);
/// \brief Write out a file in PDB format
void write(std::ostream &os, const datablock &db);
/// \brief Write out a file in PDB format
inline void write(std::ostream &os, const file &f)
{
write(os, f.front());
}
/// \brief Write out a file in PDB format or mmCIF format, depending on the filename extension
void write(const std::filesystem::path &file, const datablock &db);
/// \brief Write out a file in PDB format or mmCIF format, depending on the filename extension
inline void write(const std::filesystem::path &p, const file &f)
{
write(p, f.front());
}
}

View File

@@ -0,0 +1,65 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cif++.hpp>
namespace cif::pdb
{
// --------------------------------------------------------------------
struct PDBRecord
{
PDBRecord *mNext;
uint32_t mLineNr;
char mName[11];
size_t mVlen;
char mValue[1];
PDBRecord(uint32_t lineNr, const std::string &name, const std::string &value);
~PDBRecord();
void *operator new(size_t);
void *operator new(size_t size, size_t vLen);
void operator delete(void *p);
void operator delete(void *p, size_t vLen);
bool is(const char *name) const;
char vC(size_t column);
std::string vS(size_t columnFirst, size_t columnLast = std::numeric_limits<size_t>::max());
int vI(int columnFirst, int columnLast);
std::string vF(size_t columnFirst, size_t columnLast);
};
// --------------------------------------------------------------------
void ReadPDBFile(std::istream &pdbFile, file &cifFile);
} // namespace pdbx

View File

@@ -0,0 +1,75 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cif++/pdb/pdb2cif.hpp>
// --------------------------------------------------------------------
namespace cif::pdb
{
struct TemplateLine;
class Remark3Parser
{
public:
virtual ~Remark3Parser() {}
static bool parse(const std::string &expMethod, PDBRecord *r, cif::datablock &db);
virtual std::string program();
virtual std::string version();
protected:
Remark3Parser(const std::string &name, const std::string &expMethod, PDBRecord *r, cif::datablock &db,
const TemplateLine templatelines[], uint32_t templateLineCount, std::regex programVersion);
virtual float parse();
std::string nextLine();
bool match(const char *expr, int nextState);
void storeCapture(const char *category, std::initializer_list<const char *> items, bool createNew = false);
void storeRefineLsRestr(const char *type, std::initializer_list<const char *> values);
void updateRefineLsRestr(const char *type, std::initializer_list<const char *> values);
virtual void fixup() {}
std::string mName;
std::string mExpMethod;
PDBRecord *mRec;
cif::datablock mDb;
std::string mLine;
std::smatch mM;
uint32_t mState;
const TemplateLine *mTemplate;
uint32_t mTemplateCount;
std::regex mProgramVersion;
};
} // namespace pdbx

55
include/cif++/pdb/tls.hpp Normal file
View File

@@ -0,0 +1,55 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <string>
#include <tuple>
#include <vector>
#include <cif++.hpp>
namespace cif
{
extern const int
kResidueNrWildcard,
kNoSeqNum;
struct tls_selection;
struct tls_residue;
struct tls_selection
{
virtual ~tls_selection() {}
virtual void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, std::size_t indentLevel = 0) const = 0;
std::vector<std::tuple<std::string, int, int>> get_ranges(cif::datablock &db, bool pdbNamespace) const;
};
// Low level: get the selections
std::unique_ptr<tls_selection> parse_tls_selection_details(const std::string &program, const std::string &selection);
} // namespace cif

736
include/cif++/point.hpp Normal file
View File

@@ -0,0 +1,736 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cmath>
#include <complex>
#include <functional>
#include <valarray>
#if __has_include(<clipper/core/coords.h>)
#define HAVE_LIBCLIPPER 1
#include <clipper/core/coords.h>
#endif
namespace cif
{
// --------------------------------------------------------------------
const double
kPI = 3.141592653589793238462643383279502884;
// --------------------------------------------------------------------
// A stripped down quaternion implementation, based on boost::math::quaternion
// We use quaternions to do rotations in 3d space
template <typename T>
class quaternion_type
{
public:
using value_type = T;
constexpr explicit quaternion_type(value_type const &value_a = value_type(), value_type const &value_b = value_type(), value_type const &value_c = value_type(), value_type const &value_d = value_type())
: a(value_a)
, b(value_b)
, c(value_c)
, d(value_d)
{
}
constexpr explicit quaternion_type(std::complex<value_type> const &z0, std::complex<value_type> const &z1 = std::complex<value_type>())
: a(z0.real())
, b(z0.imag())
, c(z1.real())
, d(z1.imag())
{
}
constexpr quaternion_type(quaternion_type const &) = default;
constexpr quaternion_type(quaternion_type &&) = default;
template <typename X>
constexpr explicit quaternion_type(quaternion_type<X> const &rhs)
: a(static_cast<value_type>(rhs.a))
, b(static_cast<value_type>(rhs.b))
, c(static_cast<value_type>(rhs.c))
, d(static_cast<value_type>(rhs.d))
{
}
// accessors
//
// Note: Like complex number, quaternions do have a meaningful notion of "real part",
// but unlike them there is no meaningful notion of "imaginary part".
// Instead there is an "unreal part" which itself is a quaternion, and usually
// nothing simpler (as opposed to the complex number case).
// However, for practicality, there are accessors for the other components
// (these are necessary for the templated copy constructor, for instance).
constexpr value_type real() const
{
return a;
}
constexpr quaternion_type unreal() const
{
return { 0, b, c, d };
}
constexpr void swap(quaternion_type &o)
{
std::swap(a, o.a);
std::swap(b, o.b);
std::swap(c, o.c);
std::swap(d, o.d);
}
// assignment operators
template <typename X>
constexpr quaternion_type &operator=(quaternion_type<X> const &rhs)
{
a = static_cast<value_type>(rhs.a);
b = static_cast<value_type>(rhs.b);
c = static_cast<value_type>(rhs.c);
d = static_cast<value_type>(rhs.d);
return *this;
}
constexpr quaternion_type &operator=(quaternion_type const &rhs)
{
a = rhs.a;
b = rhs.b;
c = rhs.c;
d = rhs.d;
return *this;
}
constexpr quaternion_type &operator=(value_type const &rhs)
{
a = rhs;
b = c = d = static_cast<value_type>(0);
return *this;
}
constexpr quaternion_type &operator=(std::complex<value_type> const &rhs)
{
a = rhs.real();
b = rhs.imag();
c = d = static_cast<value_type>(0);
return *this;
}
// other assignment-related operators
//
// NOTE: Quaternion multiplication is *NOT* commutative;
// symbolically, "q *= rhs;" means "q = q * rhs;"
// and "q /= rhs;" means "q = q * inverse_of(rhs);"
constexpr quaternion_type &operator+=(value_type const &rhs)
{
a += rhs;
return *this;
}
constexpr quaternion_type &operator+=(std::complex<value_type> const &rhs)
{
a += std::real(rhs);
b += std::imag(rhs);
return *this;
}
template <class X>
constexpr quaternion_type &operator+=(quaternion_type<X> const &rhs)
{
a += rhs.a;
b += rhs.b;
c += rhs.c;
d += rhs.d;
return *this;
}
constexpr quaternion_type &operator-=(value_type const &rhs)
{
a -= rhs;
return *this;
}
constexpr quaternion_type &operator-=(std::complex<value_type> const &rhs)
{
a -= std::real(rhs);
b -= std::imag(rhs);
return *this;
}
template <class X>
constexpr quaternion_type &operator-=(quaternion_type<X> const &rhs)
{
a -= rhs.a;
b -= rhs.b;
c -= rhs.c;
d -= rhs.d;
return *this;
}
constexpr quaternion_type &operator*=(value_type const &rhs)
{
a *= rhs;
b *= rhs;
c *= rhs;
d *= rhs;
return *this;
}
constexpr quaternion_type &operator*=(std::complex<value_type> const &rhs)
{
value_type ar = rhs.real();
value_type br = rhs.imag();
quaternion_type result(a * ar - b * br, a * br + b * ar, c * ar + d * br, -c * br + d * ar);
swap(result);
return *this;
}
constexpr friend quaternion_type operator*(const quaternion_type &a, const quaternion_type &b)
{
auto result = a;
result *= b;
return result;
}
template <typename X>
constexpr quaternion_type &operator*=(quaternion_type<X> const &rhs)
{
value_type ar = static_cast<value_type>(rhs.a);
value_type br = static_cast<value_type>(rhs.b);
value_type cr = static_cast<value_type>(rhs.c);
value_type dr = static_cast<value_type>(rhs.d);
quaternion_type result(a * ar - b * br - c * cr - d * dr, a * br + b * ar + c * dr - d * cr, a * cr - b * dr + c * ar + d * br, a * dr + b * cr - c * br + d * ar);
swap(result);
return *this;
}
constexpr quaternion_type &operator/=(value_type const &rhs)
{
a /= rhs;
b /= rhs;
c /= rhs;
d /= rhs;
return *this;
}
constexpr quaternion_type &operator/=(std::complex<value_type> const &rhs)
{
value_type ar = rhs.real();
value_type br = rhs.imag();
value_type denominator = ar * ar + br * br;
quaternion_type result((+a * ar + b * br) / denominator, (-a * br + b * ar) / denominator, (+c * ar - d * br) / denominator, (+c * br + d * ar) / denominator);
swap(result);
return *this;
}
template <typename X>
constexpr quaternion_type &operator/=(quaternion_type<X> const &rhs)
{
value_type ar = static_cast<value_type>(rhs.a);
value_type br = static_cast<value_type>(rhs.b);
value_type cr = static_cast<value_type>(rhs.c);
value_type dr = static_cast<value_type>(rhs.d);
value_type denominator = ar * ar + br * br + cr * cr + dr * dr;
quaternion_type result((+a * ar + b * br + c * cr + d * dr) / denominator, (-a * br + b * ar - c * dr + d * cr) / denominator, (-a * cr + b * dr + c * ar - d * br) / denominator, (-a * dr - b * cr + c * br + d * ar) / denominator);
swap(result);
return *this;
}
constexpr friend quaternion_type normalize(quaternion_type q)
{
std::valarray<value_type> t(4);
t[0] = q.a;
t[1] = q.b;
t[2] = q.c;
t[3] = q.d;
t *= t;
value_type length = std::sqrt(t.sum());
if (length > 0.001)
q /= static_cast<value_type>(length);
else
q = quaternion_type(1, 0, 0, 0);
return q;
}
constexpr friend quaternion_type conj(quaternion_type q)
{
return quaternion_type{ +q.a, -q.b, -q.c, -q.d };
}
constexpr value_type get_a() const { return a; }
constexpr value_type get_b() const { return b; }
constexpr value_type get_c() const { return c; }
constexpr value_type get_d() const { return d; }
private:
value_type a, b, c, d;
};
template <typename T>
inline quaternion_type<T> spherical(T const &rho, T const &theta, T const &phi1, T const &phi2)
{
T cos_phi1 = std::cos(phi1);
T cos_phi2 = std::cos(phi2);
T a = std::cos(theta) * cos_phi1 * cos_phi2;
T b = std::sin(theta) * cos_phi1 * cos_phi2;
T c = std::sin(phi1) * cos_phi2;
T d = std::sin(phi2);
quaternion_type result(a, b, c, d);
result *= rho;
return result;
}
using quaternion = quaternion_type<float>;
// --------------------------------------------------------------------
// point, a location with x, y and z coordinates as floating point.
// This one is derived from a tuple<float,float,float> so
// you can do things like:
//
// float x, y, z;
// tie(x, y, z) = atom.loc();
template <typename F>
struct point_type
{
using value_type = F;
value_type m_x, m_y, m_z;
constexpr point_type()
: m_x(0)
, m_y(0)
, m_z(0)
{
}
constexpr point_type(value_type x, value_type y, value_type z)
: m_x(x)
, m_y(y)
, m_z(z)
{
}
template <typename PF>
constexpr point_type(const point_type<PF> &pt)
: m_x(static_cast<F>(pt.m_x))
, m_y(static_cast<F>(pt.m_y))
, m_z(static_cast<F>(pt.m_z))
{
}
constexpr point_type(const std::tuple<value_type, value_type, value_type> &pt)
: point_type(std::get<0>(pt), std::get<1>(pt), std::get<2>(pt))
{
}
#if HAVE_LIBCLIPPER
constexpr point_type(const clipper::Coord_orth &pt)
: m_x(pt[0])
, m_y(pt[1])
, m_z(pt[2])
{
}
constexpr point_type &operator=(const clipper::Coord_orth &rhs)
{
m_x = rhs[0];
m_y = rhs[1];
m_z = rhs[2];
return *this;
}
#endif
template <typename PF>
constexpr point_type &operator=(const point_type<PF> &rhs)
{
m_x = static_cast<F>(rhs.m_x);
m_y = static_cast<F>(rhs.m_y);
m_z = static_cast<F>(rhs.m_z);
return *this;
}
constexpr value_type &get_x() { return m_x; }
constexpr value_type get_x() const { return m_x; }
constexpr void set_x(value_type x) { m_x = x; }
constexpr value_type &get_y() { return m_y; }
constexpr value_type get_y() const { return m_y; }
constexpr void set_y(value_type y) { m_y = y; }
constexpr value_type &get_z() { return m_z; }
constexpr value_type get_z() const { return m_z; }
constexpr void set_z(value_type z) { m_z = z; }
constexpr point_type &operator+=(const point_type &rhs)
{
m_x += rhs.m_x;
m_y += rhs.m_y;
m_z += rhs.m_z;
return *this;
}
constexpr point_type &operator+=(value_type d)
{
m_x += d;
m_y += d;
m_z += d;
return *this;
}
constexpr point_type &operator-=(const point_type &rhs)
{
m_x -= rhs.m_x;
m_y -= rhs.m_y;
m_z -= rhs.m_z;
return *this;
}
constexpr point_type &operator-=(value_type d)
{
m_x -= d;
m_y -= d;
m_z -= d;
return *this;
}
constexpr point_type &operator*=(value_type rhs)
{
m_x *= rhs;
m_y *= rhs;
m_z *= rhs;
return *this;
}
constexpr point_type &operator/=(value_type rhs)
{
m_x /= rhs;
m_y /= rhs;
m_z /= rhs;
return *this;
}
constexpr value_type normalize()
{
auto length = m_x * m_x + m_y * m_y + m_z * m_z;
if (length > 0)
{
length = std::sqrt(length);
operator/=(length);
}
return length;
}
constexpr void rotate(const quaternion &q)
{
quaternion_type<value_type> p(0, m_x, m_y, m_z);
p = q * p * conj(q);
m_x = p.get_b();
m_y = p.get_c();
m_z = p.get_d();
}
#if HAVE_LIBCLIPPER
operator clipper::Coord_orth() const
{
return clipper::Coord_orth(m_x, m_y, m_z);
}
#endif
constexpr operator std::tuple<const value_type &, const value_type &, const value_type &>() const
{
return std::make_tuple(std::ref(m_x), std::ref(m_y), std::ref(m_z));
}
constexpr operator std::tuple<value_type &, value_type &, value_type &>()
{
return std::make_tuple(std::ref(m_x), std::ref(m_y), std::ref(m_z));
}
constexpr bool operator==(const point_type &rhs) const
{
return m_x == rhs.m_x and m_y == rhs.m_y and m_z == rhs.m_z;
}
// consider point as a vector... perhaps I should rename point?
constexpr value_type length_sq() const
{
return m_x * m_x + m_y * m_y + m_z * m_z;
}
constexpr value_type length() const
{
return std::sqrt(m_x * m_x + m_y * m_y + m_z * m_z);
}
};
using point = point_type<float>;
template <typename F>
inline constexpr std::ostream &operator<<(std::ostream &os, const point_type<F> &pt)
{
os << '(' << pt.m_x << ',' << pt.m_y << ',' << pt.m_z << ')';
return os;
}
template <typename F>
inline constexpr point_type<F> operator+(const point_type<F> &lhs, const point_type<F> &rhs)
{
return point_type<F>(lhs.m_x + rhs.m_x, lhs.m_y + rhs.m_y, lhs.m_z + rhs.m_z);
}
template <typename F>
inline constexpr point_type<F> operator-(const point_type<F> &lhs, const point_type<F> &rhs)
{
return point_type<F>(lhs.m_x - rhs.m_x, lhs.m_y - rhs.m_y, lhs.m_z - rhs.m_z);
}
template <typename F>
inline constexpr point_type<F> operator-(const point_type<F> &pt)
{
return point_type<F>(-pt.m_x, -pt.m_y, -pt.m_z);
}
template <typename F>
inline constexpr point_type<F> operator*(const point_type<F> &pt, F f)
{
return point_type<F>(pt.m_x * f, pt.m_y * f, pt.m_z * f);
}
template <typename F>
inline constexpr point_type<F> operator*(F f, const point_type<F> &pt)
{
return point_type<F>(pt.m_x * f, pt.m_y * f, pt.m_z * f);
}
template <typename F>
inline constexpr point_type<F> operator/(const point_type<F> &pt, F f)
{
return point_type<F>(pt.m_x / f, pt.m_y / f, pt.m_z / f);
}
// --------------------------------------------------------------------
// several standard 3d operations
template <typename F>
inline constexpr auto distance_squared(const point_type<F> &a, const point_type<F> &b)
{
return (a.m_x - b.m_x) * (a.m_x - b.m_x) +
(a.m_y - b.m_y) * (a.m_y - b.m_y) +
(a.m_z - b.m_z) * (a.m_z - b.m_z);
}
template <typename F>
inline constexpr auto distance(const point_type<F> &a, const point_type<F> &b)
{
return std::sqrt(
(a.m_x - b.m_x) * (a.m_x - b.m_x) +
(a.m_y - b.m_y) * (a.m_y - b.m_y) +
(a.m_z - b.m_z) * (a.m_z - b.m_z));
}
template <typename F>
inline constexpr auto dot_product(const point_type<F> &a, const point_type<F> &b)
{
return a.m_x * b.m_x + a.m_y * b.m_y + a.m_z * b.m_z;
}
template <typename F>
inline constexpr point_type<F> cross_product(const point_type<F> &a, const point_type<F> &b)
{
return point_type<F>(a.m_y * b.m_z - b.m_y * a.m_z,
a.m_z * b.m_x - b.m_z * a.m_x,
a.m_x * b.m_y - b.m_x * a.m_y);
}
template <typename F>
constexpr auto angle(const point_type<F> &p1, const point_type<F> &p2, const point_type<F> &p3)
{
point_type<F> v1 = p1 - p2;
point_type<F> v2 = p3 - p2;
return std::acos(dot_product(v1, v2) / (v1.length() * v2.length())) * 180 / kPI;
}
template <typename F>
constexpr auto dihedral_angle(const point_type<F> &p1, const point_type<F> &p2, const point_type<F> &p3, const point_type<F> &p4)
{
point_type<F> v12 = p1 - p2; // vector from p2 to p1
point_type<F> v43 = p4 - p3; // vector from p3 to p4
point_type<F> z = p2 - p3; // vector from p3 to p2
point_type<F> p = cross_product(z, v12);
point_type<F> x = cross_product(z, v43);
point_type<F> y = cross_product(z, x);
auto u = dot_product(x, x);
auto v = dot_product(y, y);
F result = 360;
if (u > 0 and v > 0)
{
u = dot_product(p, x) / std::sqrt(u);
v = dot_product(p, y) / std::sqrt(v);
if (u != 0 or v != 0)
result = std::atan2(v, u) * static_cast<F>(180 / kPI);
}
return result;
}
template <typename F>
constexpr auto cosinus_angle(const point_type<F> &p1, const point_type<F> &p2, const point_type<F> &p3, const point_type<F> &p4)
{
point_type<F> v12 = p1 - p2;
point_type<F> v34 = p3 - p4;
auto x = dot_product(v12, v12) * dot_product(v34, v34);
return x > 0 ? dot_product(v12, v34) / std::sqrt(x) : 0;
}
template <typename F>
constexpr auto distance_point_to_line(const point_type<F> &l1, const point_type<F> &l2, const point_type<F> &p)
{
auto line = l2 - l1;
auto p_to_l1 = p - l1;
auto p_to_l2 = p - l2;
auto cross = cross_product(p_to_l1, p_to_l2);
return cross.length() / line.length();
}
// --------------------------------------------------------------------
// For e.g. simulated annealing, returns a new point that is moved in
// a random direction with a distance randomly chosen from a normal
// distribution with a stddev of offset.
point nudge(point p, float offset);
// --------------------------------------------------------------------
quaternion construct_from_angle_axis(float angle, point axis);
std::tuple<double, point> quaternion_to_angle_axis(quaternion q);
point centroid(const std::vector<point> &Points);
point center_points(std::vector<point> &Points);
/// \brief Returns how the two sets of points \a a and \b b can be aligned
///
/// \param a The first set of points
/// \param b The second set of points
/// \result The quaternion which should be applied to the points in \a a to
/// obtain the best superposition.
quaternion align_points(const std::vector<point> &a, const std::vector<point> &b);
/// \brief The RMSd for the points in \a a and \a b
double RMSd(const std::vector<point> &a, const std::vector<point> &b);
// --------------------------------------------------------------------
// Helper class to generate evenly divided points on a sphere
// we use a fibonacci sphere to calculate even distribution of the dots
template <int N>
class spherical_dots
{
public:
constexpr static int P = 2 * N * 1;
using array_type = typename std::array<point, P>;
using iterator = typename array_type::const_iterator;
static spherical_dots &instance()
{
static spherical_dots sInstance;
return sInstance;
}
size_t size() const { return m_points.size(); }
const point operator[](uint32_t inIx) const { return m_points[inIx]; }
iterator begin() const { return m_points.begin(); }
iterator end() const { return m_points.end(); }
double weight() const { return m_weight; }
spherical_dots()
{
const double
kGoldenRatio = (1 + std::sqrt(5.0)) / 2;
m_weight = (4 * kPI) / P;
auto p = m_points.begin();
for (int32_t i = -N; i <= N; ++i)
{
double lat = std::asin((2.0 * i) / P);
double lon = std::fmod(i, kGoldenRatio) * 2 * kPI / kGoldenRatio;
p->m_x = std::sin(lon) * std::cos(lat);
p->m_y = std::cos(lon) * std::cos(lat);
p->m_z = std::sin(lat);
++p;
}
}
private:
array_type m_points;
double m_weight;
};
} // namespace cif

305
include/cif++/row.hpp Normal file
View File

@@ -0,0 +1,305 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <cif++/item.hpp>
namespace cif
{
namespace detail
{
// some helper classes to help create tuple result types
template <typename... C>
struct get_row_result
{
static constexpr size_t N = sizeof...(C);
get_row_result(const row_handle &r, std::array<size_t, N> &&columns)
: m_row(r)
, m_columns(std::move(columns))
{
}
const item_handle operator[](size_t ix) const
{
return m_row[m_columns[ix]];
}
template <typename... Ts, std::enable_if_t<N == sizeof...(Ts), int> = 0>
operator std::tuple<Ts...>() const
{
return get<Ts...>(std::index_sequence_for<Ts...>{});
}
template <typename... Ts, std::size_t... Is>
std::tuple<Ts...> get(std::index_sequence<Is...>) const
{
return std::tuple<Ts...>{ m_row[m_columns[Is]].template as<Ts>()... };
}
const row_handle &m_row;
std::array<size_t, N> m_columns;
};
// we want to be able to tie some variables to a get_row_result, for this we use tiewraps
template <typename... Ts>
struct tie_wrap
{
tie_wrap(Ts... args)
: m_value(args...)
{
}
template <typename RR>
void operator=(const RR &&rr)
{
// get_row_result will do the conversion, but only if the types
// are compatible. That means the number of parameters to the get()
// of the row should be equal to the number of items in the tuple
// you are trying to tie.
using RType = std::tuple<typename std::remove_reference<Ts>::type...>;
m_value = static_cast<RType>(rr);
}
std::tuple<Ts...> m_value;
};
} // namespace detail
template <typename... Ts>
auto tie(Ts &...v)
{
return detail::tie_wrap<Ts &...>(std::forward<Ts &>(v)...);
}
// --------------------------------------------------------------------
/// \brief the row class, this one is not directly accessible from the outside
class row : public std::vector<item_value>
{
public:
row() = default;
item_value* get(size_t ix)
{
return ix < size() ? &at(ix) : nullptr;
}
const item_value* get(size_t ix) const
{
return ix < size() ? &at(ix) : nullptr;
}
private:
friend class category;
friend class category_index;
template <typename, typename...>
friend class iterator_impl;
void append(size_t ix, item_value &&iv)
{
if (ix >= size())
resize(ix + 1);
at(ix) = std::move(iv);
}
void remove(size_t ix)
{
if (ix < size())
at(ix) = item_value{};
}
row *m_next = nullptr;
};
// --------------------------------------------------------------------
/// \brief row_handle is the way to access data stored in rows
class row_handle
{
public:
friend class item_handle;
friend class category;
friend class category_index;
friend class row_initializer;
row_handle() = default;
row_handle(const row_handle &) = default;
row_handle(row_handle &&) = default;
row_handle &operator=(const row_handle &) = default;
row_handle &operator=(row_handle &&) = default;
row_handle(const category &cat, const row &r)
: m_category(const_cast<category *>(&cat))
, m_row(const_cast<row *>(&r))
{
}
const category &get_category() const
{
return *m_category;
}
bool empty() const
{
return m_category == nullptr or m_row == nullptr;
}
explicit operator bool() const
{
return not empty();
}
item_handle operator[](uint32_t column_ix)
{
return empty() ? item_handle::s_null_item : item_handle(column_ix, *this);
}
const item_handle operator[](uint32_t column_ix) const
{
return empty() ? item_handle::s_null_item : item_handle(column_ix, const_cast<row_handle &>(*this));
}
item_handle operator[](std::string_view column_name)
{
return empty() ? item_handle::s_null_item : item_handle(add_column(column_name), *this);
}
const item_handle operator[](std::string_view column_name) const
{
return empty() ? item_handle::s_null_item : item_handle(get_column_ix(column_name), const_cast<row_handle &>(*this));
}
template <typename... C>
auto get(C... columns) const
{
return detail::get_row_result<C...>(*this, { get_column_ix(columns)... });
}
template <typename... Ts, typename... C, std::enable_if_t<sizeof...(Ts) == sizeof...(C), int> = 0>
std::tuple<Ts...> get(C... columns) const
{
return detail::get_row_result<Ts...>(*this, { get_column_ix(columns)... });
}
template <typename T>
T get(const char *column)
{
return operator[](get_column_ix(column)).template as<T>();
}
void assign(const std::vector<item> &values)
{
for (auto &value : values)
assign(value, true);
}
void assign(std::string_view name, std::string_view value, bool updateLinked, bool validate = true)
{
assign(add_column(name), value, updateLinked, validate);
}
void assign(size_t column, std::string_view value, bool updateLinked, bool validate = true);
bool operator==(const row_handle &rhs) const { return m_category == rhs.m_category and m_row == rhs.m_row; }
bool operator!=(const row_handle &rhs) const { return m_category != rhs.m_category or m_row != rhs.m_row; }
private:
uint16_t get_column_ix(std::string_view name) const;
std::string_view get_column_name(uint16_t ix) const;
uint16_t add_column(std::string_view name);
row *get_row()
{
return m_row;
}
const row *get_row() const
{
return m_row;
}
void assign(const item &i, bool updateLinked)
{
assign(i.name(), i.value(), updateLinked);
}
void swap(size_t column, row_handle &r);
category *m_category = nullptr;
row *m_row = nullptr;
};
// --------------------------------------------------------------------
class row_initializer : public std::vector<item>
{
public:
friend class category;
row_initializer() = default;
row_initializer(const row_initializer &) = default;
row_initializer(row_initializer &&) = default;
row_initializer &operator=(const row_initializer &) = default;
row_initializer &operator=(row_initializer &&) = default;
row_initializer(std::initializer_list<item> items)
: std::vector<item>(items)
{
}
template <typename ItemIter, std::enable_if_t<std::is_same_v<typename ItemIter::value_type, item>, int> = 0>
row_initializer(ItemIter b, ItemIter e)
: std::vector<item>(b, e)
{
}
row_initializer(row_handle rh);
void set_value(std::string_view name, std::string_view value);
void set_value(const item &i)
{
set_value(i.name(), i.value());
}
void set_value_if_empty(std::string_view name, std::string_view value);
void set_value_if_empty(const item &i)
{
set_value_if_empty(i.name(), i.value());
}
};
} // namespace cif

150
include/cif++/symmetry.hpp Normal file
View File

@@ -0,0 +1,150 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <array>
#include <cstdint>
#include <string>
namespace cif
{
// --------------------------------------------------------------------
enum class space_group_name
{
full,
xHM,
Hall
};
struct space_group
{
const char *name;
const char *xHM;
const char *Hall;
int nr;
};
extern const space_group kSpaceGroups[];
extern const std::size_t kNrOfSpaceGroups;
// --------------------------------------------------------------------
struct symop_data
{
constexpr symop_data(const std::array<int, 15> &data)
: m_packed((data[0] & 0x03ULL) << 34 bitor
(data[1] & 0x03ULL) << 32 bitor
(data[2] & 0x03ULL) << 30 bitor
(data[3] & 0x03ULL) << 28 bitor
(data[4] & 0x03ULL) << 26 bitor
(data[5] & 0x03ULL) << 24 bitor
(data[6] & 0x03ULL) << 22 bitor
(data[7] & 0x03ULL) << 20 bitor
(data[8] & 0x03ULL) << 18 bitor
(data[9] & 0x07ULL) << 15 bitor
(data[10] & 0x07ULL) << 12 bitor
(data[11] & 0x07ULL) << 9 bitor
(data[12] & 0x07ULL) << 6 bitor
(data[13] & 0x07ULL) << 3 bitor
(data[14] & 0x07ULL) << 0)
{
}
bool operator==(const symop_data &rhs) const
{
return m_packed == rhs.m_packed;
}
bool operator<(const symop_data &rhs) const
{
return m_packed < rhs.m_packed;
}
std::array<int, 15> data() const
{
return {
static_cast<int>(m_packed >> 34) bitand 0x03,
static_cast<int>(m_packed >> 32) bitand 0x03,
static_cast<int>(m_packed >> 30) bitand 0x03,
static_cast<int>(m_packed >> 28) bitand 0x03,
static_cast<int>(m_packed >> 26) bitand 0x03,
static_cast<int>(m_packed >> 24) bitand 0x03,
static_cast<int>(m_packed >> 22) bitand 0x03,
static_cast<int>(m_packed >> 20) bitand 0x03,
static_cast<int>(m_packed >> 18) bitand 0x03,
static_cast<int>(m_packed >> 15) bitand 0x07,
static_cast<int>(m_packed >> 12) bitand 0x07,
static_cast<int>(m_packed >> 9) bitand 0x07,
static_cast<int>(m_packed >> 6) bitand 0x07,
static_cast<int>(m_packed >> 3) bitand 0x07,
static_cast<int>(m_packed >> 0) bitand 0x07,
};
}
private:
friend struct symop_datablock;
const uint64_t kPackMask = (~0ULL >> (64 - 36));
symop_data(uint64_t v)
: m_packed(v bitand kPackMask)
{
}
uint64_t m_packed;
};
struct symop_datablock
{
constexpr symop_datablock(int spacegroup, int rotational_number, const std::array<int, 15> &rt_data)
: m_v((spacegroup & 0xffffULL) << 48 bitor
(rotational_number & 0xffULL) << 40 bitor
symop_data(rt_data).m_packed)
{
}
uint16_t spacegroup() const { return m_v >> 48; }
symop_data symop() const { return symop_data(m_v); }
uint8_t rotational_number() const { return (m_v >> 40) bitand 0xff; }
private:
uint64_t m_v;
};
static_assert(sizeof(symop_datablock) == sizeof(uint64_t), "Size of symop_data is wrong");
extern const symop_datablock kSymopNrTable[];
extern const std::size_t kSymopNrTableSize;
// --------------------------------------------------------------------
int get_space_group_number(std::string spacegroup); // alternative for clipper's parsing code, using space_group_name::full
int get_space_group_number(std::string spacegroup, space_group_name type); // alternative for clipper's parsing code
} // namespace cif

458
include/cif++/text.hpp Normal file
View File

@@ -0,0 +1,458 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <charconv>
#include <cmath>
#include <set>
#include <sstream>
#include <tuple>
#include <vector>
#if __has_include(<experimental/type_traits>)
#include <experimental/type_traits>
#endif
namespace cif
{
// --------------------------------------------------------------------
// some basic utilities: Since we're using ASCII input only, we define for optimisation
// our own case conversion routines.
bool iequals(std::string_view a, std::string_view b);
int icompare(std::string_view a, std::string_view b);
bool iequals(const char *a, const char *b);
int icompare(const char *a, const char *b);
void to_lower(std::string &s);
std::string to_lower_copy(std::string_view s);
void to_upper(std::string &s);
// std::string toUpperCopy(const std::string &s);
template <typename IterType>
std::string join(IterType b, IterType e, std::string_view sep)
{
std::ostringstream s;
if (b != e)
{
auto ai = b;
auto ni = std::next(ai);
for (;;)
{
s << *ai;
if (ni == e)
break;
ai = ni;
ni = std::next(ai);
s << sep;
}
}
return s.str();
}
template <typename V>
std::string join(const V &arr, std::string_view sep)
{
return join(arr.begin(), arr.end(), sep);
}
template <typename StringType = std::string_view>
std::vector<StringType> split(std::string_view s, std::string_view separators, bool suppress_empty = false)
{
std::vector<StringType> result;
auto b = s.begin();
auto e = b;
while (e != s.end())
{
if (separators.find(*e) != std::string_view::npos)
{
if (e > b or not suppress_empty)
result.emplace_back(b, e - b);
b = e = e + 1;
continue;
}
++e;
}
if (e > b or not suppress_empty)
result.emplace_back(b, e - b);
return result;
}
void replace_all(std::string &s, std::string_view what, std::string_view with = {});
#if defined(__cpp_lib_starts_ends_with)
inline bool starts_with(std::string s, std::string_view with)
{
return s.starts_with(with);
}
inline bool ends_with(std::string_view s, std::string_view with)
{
return s.ends_with(with);
}
#else
inline bool starts_with(std::string s, std::string_view with)
{
return s.compare(0, with.length(), with) == 0;
}
inline bool ends_with(std::string_view s, std::string_view with)
{
return s.length() >= with.length() and s.compare(s.length() - with.length(), with.length(), with) == 0;
}
#endif
#if defined(__cpp_lib_string_contains)
inline bool contains(std::string_view s, std::string_view q)
{
return s.contains(q);
}
#else
inline bool contains(std::string_view s, std::string_view q)
{
return s.find(q) != std::string_view::npos;
}
#endif
bool icontains(std::string_view s, std::string_view q);
void trim_left(std::string &s);
void trim_right(std::string &s);
void trim(std::string &s);
std::string trim_left_copy(std::string_view s);
std::string trim_right_copy(std::string_view s);
std::string trim_copy(std::string_view s);
// To make life easier, we also define iless and iset using iequals
struct iless
{
bool operator()(const std::string &a, const std::string &b) const
{
return icompare(a, b) < 0;
}
};
typedef std::set<std::string, iless> iset;
// --------------------------------------------------------------------
// This really makes a difference, having our own tolower routines
extern const uint8_t kCharToLowerMap[256];
inline char tolower(int ch)
{
return static_cast<char>(kCharToLowerMap[static_cast<uint8_t>(ch)]);
}
// --------------------------------------------------------------------
std::tuple<std::string, std::string> split_tag_name(std::string_view tag);
// --------------------------------------------------------------------
// generate a cif name, mainly used to generate asym_id's
std::string cif_id_for_number(int number);
// --------------------------------------------------------------------
// custom wordwrapping routine
std::vector<std::string> word_wrap(const std::string &text, size_t width);
// --------------------------------------------------------------------
/// std::from_chars for floating point types.
/// These are optional, there's a selected_charconv class below that selects
/// the best option to used based on support by the stl library
/// I.e. that in case of GNU < 12 (or something) the cif implementation will
/// be used, all other cases will use the stl version.
template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
std::from_chars_result from_chars(const char *first, const char *last, FloatType &value)
{
std::from_chars_result result{ first, {} };
enum State
{
IntegerSign,
Integer,
Fraction,
ExponentSign,
Exponent
} state = IntegerSign;
int sign = 1;
unsigned long long vi = 0;
long double f = 1;
int exponent_sign = 1;
int exponent = 0;
bool done = false;
while (not done and result.ec == std::errc())
{
char ch = result.ptr != last ? *result.ptr : 0;
++result.ptr;
switch (state)
{
case IntegerSign:
if (ch == '-')
{
sign = -1;
state = Integer;
}
else if (ch == '+')
state = Integer;
else if (ch >= '0' and ch <= '9')
{
vi = ch - '0';
state = Integer;
}
else if (ch == '.')
state = Fraction;
else
result.ec = std::errc::invalid_argument;
break;
case Integer:
if (ch >= '0' and ch <= '9')
vi = 10 * vi + (ch - '0');
else if (ch == 'e' or ch == 'E')
state = ExponentSign;
else if (ch == '.')
state = Fraction;
else
{
done = true;
--result.ptr;
}
break;
case Fraction:
if (ch >= '0' and ch <= '9')
{
vi = 10 * vi + (ch - '0');
f /= 10;
}
else if (ch == 'e' or ch == 'E')
state = ExponentSign;
else
{
done = true;
--result.ptr;
}
break;
case ExponentSign:
if (ch == '-')
{
exponent_sign = -1;
state = Exponent;
}
else if (ch == '+')
state = Exponent;
else if (ch >= '0' and ch <= '9')
{
exponent = ch - '0';
state = Exponent;
}
else
result.ec = std::errc::invalid_argument;
break;
case Exponent:
if (ch >= '0' and ch <= '9')
exponent = 10 * exponent + (ch - '0');
else
{
done = true;
--result.ptr;
}
break;
}
}
if (result.ec == std::errc())
{
long double v = f * vi * sign;
if (exponent != 0)
v *= std::pow(10, exponent * exponent_sign);
if (std::isnan(v))
result.ec = std::errc::invalid_argument;
else if (std::abs(v) > std::numeric_limits<FloatType>::max())
result.ec = std::errc::result_out_of_range;
value = static_cast<FloatType>(v);
}
return result;
}
enum class chars_format
{
scientific = 1,
fixed = 2,
// hex,
general = fixed | scientific
};
template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
std::to_chars_result to_chars(char *first, char *last, FloatType &value, chars_format fmt)
{
int size = last - first;
int r;
switch (fmt)
{
case chars_format::scientific:
if constexpr (std::is_same_v<FloatType, long double>)
r = snprintf(first, last - first, "%le", value);
else
r = snprintf(first, last - first, "%e", value);
break;
case chars_format::fixed:
if constexpr (std::is_same_v<FloatType, long double>)
r = snprintf(first, last - first, "%lf", value);
else
r = snprintf(first, last - first, "%f", value);
break;
case chars_format::general:
if constexpr (std::is_same_v<FloatType, long double>)
r = snprintf(first, last - first, "%lg", value);
else
r = snprintf(first, last - first, "%g", value);
break;
}
std::to_chars_result result;
if (r < 0 or r >= size)
result = { first, std::errc::value_too_large };
else
result = { first + r, std::errc() };
return result;
}
template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
std::to_chars_result to_chars(char *first, char *last, FloatType &value, chars_format fmt, int precision)
{
int size = last - first;
int r;
switch (fmt)
{
case chars_format::scientific:
if constexpr (std::is_same_v<FloatType, long double>)
r = snprintf(first, last - first, "%.*le", precision, value);
else
r = snprintf(first, last - first, "%.*e", precision, value);
break;
case chars_format::fixed:
if constexpr (std::is_same_v<FloatType, long double>)
r = snprintf(first, last - first, "%.*lf", precision, value);
else
r = snprintf(first, last - first, "%.*f", precision, value);
break;
case chars_format::general:
if constexpr (std::is_same_v<FloatType, long double>)
r = snprintf(first, last - first, "%.*lg", precision, value);
else
r = snprintf(first, last - first, "%.*g", precision, value);
break;
}
std::to_chars_result result;
if (r < 0 or r >= size)
result = { first, std::errc::value_too_large };
else
result = { first + r, std::errc() };
return result;
}
template <typename T>
struct my_charconv
{
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
{
return cif::from_chars(a, b, d);
}
static std::to_chars_result to_chars(char *first, char *last, T &value, chars_format fmt)
{
return cif::to_chars(first, last, value, fmt);
}
};
template <typename T>
struct std_charconv
{
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
{
return std::from_chars(a, b, d);
}
static std::to_chars_result to_chars(char *first, char *last, T &value, chars_format fmt)
{
return std::to_chars(first, last, value, fmt);
}
};
template <typename T>
using from_chars_function = decltype(std::from_chars(std::declval<const char *>(), std::declval<const char *>(), std::declval<T &>()));
template <typename T>
using selected_charconv = typename std::conditional_t<std::experimental::is_detected_v<from_chars_function, T>, std_charconv<T>, my_charconv<T>>;
} // namespace cif

183
include/cif++/utilities.hpp Normal file
View File

@@ -0,0 +1,183 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <filesystem>
#ifndef STDOUT_FILENO
#define STDOUT_FILENO 1
#endif
#if _MSC_VER
#include <io.h>
#define isatty _isatty
#else
#include <unistd.h>
#endif
#if _MSC_VER
#pragma warning(disable : 4996) // unsafe function or variable (strcpy e.g.)
#pragma warning(disable : 4068) // unknown pragma
#pragma warning(disable : 4100) // unreferenced formal parameter
#pragma warning(disable : 4101) // unreferenced local variable
#define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING 1
#endif
namespace cif
{
extern int VERBOSE;
// the git 'build' number
std::string get_version_nr();
// std::string get_version_date();
// --------------------------------------------------------------------
// Code helping with terminal i/o
uint32_t get_terminal_width();
// --------------------------------------------------------------------
// Path of the current executable
std::string get_executable_path();
// --------------------------------------------------------------------
// some manipulators to write coloured text to terminals
enum StringColour
{
scBLACK = 0,
scRED,
scGREEN,
scYELLOW,
scBLUE,
scMAGENTA,
scCYAN,
scWHITE,
scNONE = 9
};
template <typename String, typename CharT>
struct ColouredString
{
static_assert(std::is_reference<String>::value or std::is_pointer<String>::value, "String type must be pointer or reference");
ColouredString(String s, StringColour fore, StringColour back, bool bold = true)
: m_s(s)
, m_fore(fore)
, m_back(back)
, m_bold(bold)
{
}
ColouredString &operator=(const ColouredString &) = delete;
String m_s;
StringColour m_fore, m_back;
bool m_bold;
};
template <typename CharT, typename Traits>
std::basic_ostream<CharT, Traits> &operator<<(std::basic_ostream<CharT, Traits> &os, const ColouredString<const CharT *, CharT> &s)
{
if (isatty(STDOUT_FILENO))
{
std::basic_ostringstream<CharT, Traits> ostr;
ostr << "\033[" << (30 + s.m_fore) << ';' << (s.m_bold ? "1" : "22") << ';' << (40 + s.m_back) << 'm'
<< s.m_s
<< "\033[0m";
return os << ostr.str();
}
else
return os << s.m_s;
}
template <typename CharT, typename Traits, typename String>
std::basic_ostream<CharT, Traits> &operator<<(std::basic_ostream<CharT, Traits> &os, const ColouredString<String, CharT> &s)
{
if (isatty(STDOUT_FILENO))
{
std::basic_ostringstream<CharT, Traits> ostr;
ostr << "\033[" << (30 + s.m_fore) << ';' << (s.m_bold ? "1" : "22") << ';' << (40 + s.m_back) << 'm'
<< s.m_s
<< "\033[0m";
return os << ostr.str();
}
else
return os << s.m_s;
}
template <typename CharT>
inline auto coloured(const CharT *s, StringColour fore = scWHITE, StringColour back = scRED, bool bold = true)
{
return ColouredString<const CharT *, CharT>(s, fore, back, bold);
}
template <typename CharT, typename Traits, typename Alloc>
inline auto coloured(const std::basic_string<CharT, Traits, Alloc> &s, StringColour fore = scWHITE, StringColour back = scRED, bool bold = true)
{
return ColouredString<const std::basic_string<CharT, Traits, Alloc>, CharT>(s, fore, back, bold);
}
template <typename CharT, typename Traits, typename Alloc>
inline auto coloured(std::basic_string<CharT, Traits, Alloc> &s, StringColour fore = scWHITE, StringColour back = scRED, bool bold = true)
{
return ColouredString<std::basic_string<CharT, Traits, Alloc>, CharT>(s, fore, back, bold);
}
// --------------------------------------------------------------------
// A progress bar
class Progress
{
public:
Progress(int64_t inMax, const std::string &inAction);
virtual ~Progress();
void consumed(int64_t inConsumed); // consumed is relative
void progress(int64_t inProgress); // progress is absolute
void message(const std::string &inMessage);
private:
Progress(const Progress &) = delete;
Progress &operator=(const Progress &) = delete;
struct ProgressImpl *m_impl;
};
// --------------------------------------------------------------------
// Resources
std::unique_ptr<std::istream> load_resource(std::filesystem::path name);
void add_file_resource(const std::string &name, std::filesystem::path dataFile);
void add_data_directory(std::filesystem::path dataDir);
} // namespace cif

242
include/cif++/validate.hpp Normal file
View File

@@ -0,0 +1,242 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <filesystem>
#include <list>
#include <mutex>
#include <utility>
#include <cif++/text.hpp>
namespace cif
{
struct category_validator;
// --------------------------------------------------------------------
class validation_error : public std::exception
{
public:
validation_error(const std::string &msg);
validation_error(const std::string &cat, const std::string &item,
const std::string &msg);
const char *what() const noexcept { return m_msg.c_str(); }
std::string m_msg;
};
// --------------------------------------------------------------------
enum class DDL_PrimitiveType
{
Char,
UChar,
Numb
};
DDL_PrimitiveType map_to_primitive_type(std::string_view s);
struct regex_impl;
struct type_validator
{
std::string m_name;
DDL_PrimitiveType m_primitive_type;
regex_impl *m_rx;
type_validator() = delete;
type_validator(std::string_view name, DDL_PrimitiveType type, std::string_view rx);
type_validator(const type_validator &) = delete;
type_validator(type_validator &&rhs)
: m_name(std::move(rhs.m_name))
, m_primitive_type(rhs.m_primitive_type)
{
m_rx = std::exchange(rhs.m_rx, nullptr);
}
type_validator &operator=(const type_validator &) = delete;
type_validator &operator=(type_validator &&rhs)
{
m_name = std::move(rhs.m_name);
m_primitive_type = rhs.m_primitive_type;
m_rx = std::exchange(rhs.m_rx, nullptr);
return *this;
}
~type_validator();
bool operator<(const type_validator &rhs) const
{
return icompare(m_name, rhs.m_name) < 0;
}
int compare(std::string_view a, std::string_view b) const;
};
struct item_validator
{
std::string m_tag;
bool m_mandatory;
const type_validator *m_type;
cif::iset m_enums;
std::string m_default;
bool m_default_is_null;
category_validator *m_category = nullptr;
// ItemLinked is used for non-key links
struct item_link
{
item_validator *m_parent;
std::string m_parent_item;
std::string m_child_item;
};
std::vector<item_link> mLinked;
bool operator<(const item_validator &rhs) const
{
return icompare(m_tag, rhs.m_tag) < 0;
}
bool operator==(const item_validator &rhs) const
{
return iequals(m_tag, rhs.m_tag);
}
void operator()(std::string_view value) const;
};
struct category_validator
{
std::string m_name;
std::vector<std::string> m_keys;
cif::iset m_groups;
cif::iset m_mandatory_fields;
std::set<item_validator> m_item_validators;
bool operator<(const category_validator &rhs) const
{
return icompare(m_name, rhs.m_name) < 0;
}
void addItemValidator(item_validator &&v);
const item_validator *get_validator_for_item(std::string_view tag) const;
const std::set<item_validator> &item_validators() const
{
return m_item_validators;
}
};
struct link_validator
{
int m_link_group_id;
std::string m_parent_category;
std::vector<std::string> m_parent_keys;
std::string m_child_category;
std::vector<std::string> m_child_keys;
std::string m_link_group_label;
};
// --------------------------------------------------------------------
class validator
{
public:
validator(std::string_view name)
: m_name(name)
{
}
~validator() = default;
validator(const validator &rhs) = delete;
validator &operator=(const validator &rhs) = delete;
validator(validator &&rhs) = default;
validator &operator=(validator &&rhs) = default;
friend class dictionary_parser;
void add_type_validator(type_validator &&v);
const type_validator *get_validator_for_type(std::string_view type_code) const;
void add_category_validator(category_validator &&v);
const category_validator *get_validator_for_category(std::string_view category) const;
void add_link_validator(link_validator &&v);
std::vector<const link_validator *> get_links_for_parent(std::string_view category) const;
std::vector<const link_validator *> get_links_for_child(std::string_view category) const;
void report_error(const std::string &msg, bool fatal) const;
const std::string &name() const { return m_name; }
void set_name(const std::string &name) { m_name = name; }
const std::string &version() const { return m_version; }
void version(const std::string &version) { m_version = version; }
private:
// name is fully qualified here:
item_validator *get_validator_for_item(std::string_view name) const;
std::string m_name;
std::string m_version;
bool m_strict = false;
std::set<type_validator> m_type_validators;
std::set<category_validator> m_category_validators;
std::vector<link_validator> m_link_validators;
};
// --------------------------------------------------------------------
class validator_factory
{
public:
static validator_factory &instance()
{
static validator_factory s_instance;
return s_instance;
}
const validator &operator[](std::string_view dictionary_name);
private:
void construct_validator(std::string_view name, std::istream &is);
// --------------------------------------------------------------------
validator_factory() = default;
std::mutex m_mutex;
std::list<validator> m_validators;
};
} // namespace cif

13
libcifpp.pc.in Normal file
View File

@@ -0,0 +1,13 @@
prefix=@prefix@
exec_prefix=@exec_prefix@
libdir=@libdir@
includedir=@includedir@
datalibdir=@datarootdir@/libcifpp
Name: libcifpp
Description: C++ library for the manipulation of mmCIF files.
Version: @PACKAGE_VERSION@
Requires.private: zlib, liblzma
Libs: -L${libdir} -lcifpp
Cflags: -I${includedir} -pthread

1
regex Submodule

Submodule regex added at e5979ae1af

36
rsrc/isomers.txt Normal file
View File

@@ -0,0 +1,36 @@
13R:13S
1AB:IMR
558:559
6PG:LG6
A:AMP
ABA:DBB
ALO:DTH
BDR:BXX
C:C5P
CBI:MAL:MAB
COA:COZ
COM:COM
CTL:TTL
CTR:MLR
DIL:IIL
DNE:NLE
DTL:MRY
DX5:LXP:R5P
G:G25
GDC:GDD
GDM:GMY
GDU:GUD:UFM:UPG
HY0:HYG
I:IMP
LLT:THM
LPK:LTG
M13:MDM
PDE:PNE
QDN:QI9
R5A:R5B
RUB:XBP
RWF:SWF
TBE:TBI
U:U5P
U2F:UP1:UPF
UD1:UD2

3098
rsrc/mmcif_ddl.dic Normal file

File diff suppressed because it is too large Load Diff

150867
rsrc/mmcif_ma.dic Normal file

File diff suppressed because it is too large Load Diff

165360
rsrc/mmcif_pdbx.dic Normal file

File diff suppressed because it is too large Load Diff

1162
src/atom_type.cpp Normal file

File diff suppressed because it is too large Load Diff

2129
src/category.cpp Normal file

File diff suppressed because it is too large Load Diff

748
src/compound.cpp Normal file
View File

@@ -0,0 +1,748 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020-2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <map>
#include <mutex>
#include <numeric>
#include <shared_mutex>
#include <filesystem>
#include <fstream>
#include <cif++/compound.hpp>
namespace fs = std::filesystem;
namespace cif
{
// --------------------------------------------------------------------
std::string to_string(bond_type bondType)
{
switch (bondType)
{
case bond_type::sing: return "sing";
case bond_type::doub: return "doub";
case bond_type::trip: return "trip";
case bond_type::quad: return "quad";
case bond_type::arom: return "arom";
case bond_type::poly: return "poly";
case bond_type::delo: return "delo";
case bond_type::pi: return "pi";
}
throw std::invalid_argument("Invalid bondType");
}
bond_type from_string(const std::string &bondType)
{
if (cif::iequals(bondType, "sing"))
return bond_type::sing;
if (cif::iequals(bondType, "doub"))
return bond_type::doub;
if (cif::iequals(bondType, "trip"))
return bond_type::trip;
if (cif::iequals(bondType, "quad"))
return bond_type::quad;
if (cif::iequals(bondType, "arom"))
return bond_type::arom;
if (cif::iequals(bondType, "poly"))
return bond_type::poly;
if (cif::iequals(bondType, "delo"))
return bond_type::delo;
if (cif::iequals(bondType, "pi"))
return bond_type::pi;
throw std::invalid_argument("Invalid bondType: " + bondType);
}
// --------------------------------------------------------------------
// compound helper classes
struct compound_atom_less
{
bool operator()(const compound_atom &a, const compound_atom &b) const
{
int d = a.id.compare(b.id);
if (d == 0)
d = a.type_symbol - b.type_symbol;
return d < 0;
}
};
struct compound_bond_less
{
bool operator()(const compound_bond &a, const compound_bond &b) const
{
int d = a.atom_id[0].compare(b.atom_id[0]);
if (d == 0)
d = a.atom_id[1].compare(b.atom_id[1]);
if (d == 0)
d = static_cast<int>(a.type) - static_cast<int>(b.type);
return d < 0;
}
};
// --------------------------------------------------------------------
// compound
compound::compound(cif::datablock &db)
{
auto &chemComp = db["chem_comp"];
if (chemComp.size() != 1)
throw std::runtime_error("Invalid compound file, chem_comp should contain a single row");
cif::tie(m_id, m_name, m_type, m_formula, m_formula_weight, m_formal_charge) =
chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge");
// The name should not contain newline characters since that triggers validation errors later on
cif::replace_all(m_name, "\n", "");
m_group = "non-polymer";
auto &chemCompAtom = db["chem_comp_atom"];
for (auto row : chemCompAtom)
{
compound_atom atom;
std::string type_symbol;
cif::tie(atom.id, type_symbol, atom.charge, atom.aromatic, atom.leaving_atom, atom.stereo_config, atom.x, atom.y, atom.z) =
row.get("atom_id", "type_symbol", "charge", "pdbx_aromatic_flag", "pdbx_leaving_atom_flag", "pdbx_stereo_config",
"model_Cartn_x", "model_Cartn_y", "model_Cartn_z");
atom.type_symbol = atom_type_traits(type_symbol).type();
m_atoms.push_back(std::move(atom));
}
auto &chemCompBond = db["chem_comp_bond"];
for (auto row : chemCompBond)
{
compound_bond bond;
std::string valueOrder;
cif::tie(bond.atom_id[0], bond.atom_id[1], valueOrder, bond.aromatic, bond.stereo_config) = row.get("atom_id_1", "atom_id_2", "value_order", "pdbx_aromatic_flag", "pdbx_stereo_config");
bond.type = from_string(valueOrder);
m_bonds.push_back(std::move(bond));
}
}
compound::compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group)
: m_id(id)
, m_name(name)
, m_type(type)
, m_group(group)
{
auto &chemCompAtom = db["chem_comp_atom"];
for (auto row : chemCompAtom)
{
compound_atom atom;
std::string type_symbol;
cif::tie(atom.id, type_symbol, atom.charge, atom.x, atom.y, atom.z) =
row.get("atom_id", "type_symbol", "charge", "x", "y", "z");
atom.type_symbol = atom_type_traits(type_symbol).type();
m_formal_charge += atom.charge;
m_formula_weight += atom_type_traits(atom.type_symbol).weight();
m_atoms.push_back(std::move(atom));
}
auto &chemCompBond = db["chem_comp_bond"];
for (auto row : chemCompBond)
{
compound_bond bond;
std::string btype;
cif::tie(bond.atom_id[0], bond.atom_id[1], btype, bond.aromatic) = row.get("atom_id_1", "atom_id_2", "type", "aromatic");
using cif::iequals;
if (iequals(btype, "single"))
bond.type = bond_type::sing;
else if (iequals(btype, "double"))
bond.type = bond_type::doub;
else if (iequals(btype, "triple"))
bond.type = bond_type::trip;
else if (iequals(btype, "deloc") or iequals(btype, "aromat") or iequals(btype, "aromatic"))
bond.type = bond_type::delo;
else
{
if (cif::VERBOSE > 0)
std::cerr << "Unimplemented chem_comp_bond.type " << btype << " in " << id << std::endl;
bond.type = bond_type::sing;
}
m_bonds.push_back(std::move(bond));
}
}
compound_atom compound::get_atom_by_atom_id(const std::string &atom_id) const
{
compound_atom result = {};
for (auto &a : m_atoms)
{
if (a.id == atom_id)
{
result = a;
break;
}
}
if (result.id != atom_id)
throw std::out_of_range("No atom " + atom_id + " in compound " + m_id);
return result;
}
bool compound::atoms_bonded(const std::string &atomId_1, const std::string &atomId_2) const
{
auto i = find_if(m_bonds.begin(), m_bonds.end(),
[&](const compound_bond &b)
{
return (b.atom_id[0] == atomId_1 and b.atom_id[1] == atomId_2) or (b.atom_id[0] == atomId_2 and b.atom_id[1] == atomId_1);
});
return i != m_bonds.end();
}
// --------------------------------------------------------------------
// known amino acids and bases
const std::map<std::string, char> compound_factory::kAAMap{
{ "ALA", 'A' },
{ "ARG", 'R' },
{ "ASN", 'N' },
{ "ASP", 'D' },
{ "CYS", 'C' },
{ "GLN", 'Q' },
{ "GLU", 'E' },
{ "GLY", 'G' },
{ "HIS", 'H' },
{ "ILE", 'I' },
{ "LEU", 'L' },
{ "LYS", 'K' },
{ "MET", 'M' },
{ "PHE", 'F' },
{ "PRO", 'P' },
{ "SER", 'S' },
{ "THR", 'T' },
{ "TRP", 'W' },
{ "TYR", 'Y' },
{ "VAL", 'V' },
{ "GLX", 'Z' },
{ "ASX", 'B' }
};
const std::map<std::string, char> compound_factory::kBaseMap{
{ "A", 'A' },
{ "C", 'C' },
{ "G", 'G' },
{ "T", 'T' },
{ "U", 'U' },
{ "DA", 'A' },
{ "DC", 'C' },
{ "DG", 'G' },
{ "DT", 'T' }
};
// --------------------------------------------------------------------
// a factory class to generate compounds
class compound_factory_impl : public std::enable_shared_from_this<compound_factory_impl>
{
public:
compound_factory_impl(std::shared_ptr<compound_factory_impl> next);
compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next);
virtual ~compound_factory_impl()
{
for (auto c : m_compounds)
delete c;
}
compound *get(std::string id)
{
cif::to_upper(id);
std::shared_lock lock(mMutex);
compound *result = nullptr;
// walk the list, see if any of us has the compound already
for (auto impl = shared_from_this(); impl; impl = impl->m_next)
{
for (auto cmp : impl->m_compounds)
{
if (cmp->id() == id)
{
result = cmp;
break;
}
}
if (result)
break;
}
if (result == nullptr and m_missing.count(id) == 0)
{
for (auto impl = shared_from_this(); impl; impl = impl->m_next)
{
result = impl->create(id);
if (result != nullptr)
break;
}
if (result == nullptr)
m_missing.insert(id);
}
return result;
}
std::shared_ptr<compound_factory_impl> next() const
{
return m_next;
}
bool is_known_peptide(const std::string &resName)
{
return m_known_peptides.count(resName) or
(m_next and m_next->is_known_peptide(resName));
}
bool is_known_base(const std::string &resName)
{
return m_known_bases.count(resName) or
(m_next and m_next->is_known_base(resName));
}
protected:
virtual compound *create(const std::string &id)
{
// For the base class we assume every compound is preloaded
return nullptr;
}
std::shared_timed_mutex mMutex;
std::vector<compound *> m_compounds;
std::set<std::string> m_known_peptides;
std::set<std::string> m_known_bases;
std::set<std::string> m_missing;
std::shared_ptr<compound_factory_impl> m_next;
};
// --------------------------------------------------------------------
compound_factory_impl::compound_factory_impl(std::shared_ptr<compound_factory_impl> next)
: m_next(next)
{
for (const auto &[key, value] : compound_factory::kAAMap)
m_known_peptides.insert(key);
for (const auto &[key, value] : compound_factory::kBaseMap)
m_known_bases.insert(key);
}
compound_factory_impl::compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next)
: m_next(next)
{
cif::file cifFile(file);
if (cifFile.contains("comp_list")) // So this is a CCP4 restraints file, special handling
{
auto &compList = cifFile["comp_list"];
auto &chemComp = compList["chem_comp"];
for (const auto &[id, name, group] : chemComp.rows<std::string, std::string, std::string>("id", "name", "group"))
{
std::string type;
// known groups are (counted from ccp4 monomer dictionary)
// D-pyranose
// DNA
// L-PEPTIDE LINKING
// L-SACCHARIDE
// L-peptide
// L-pyranose
// M-peptide
// NON-POLYMER
// P-peptide
// RNA
// furanose
// non-polymer
// non_polymer
// peptide
// pyranose
// saccharide
if (cif::iequals(id, "gly"))
type = "peptide linking";
else if (cif::iequals(group, "l-peptide") or cif::iequals(group, "L-peptide linking") or cif::iequals(group, "peptide") or cif::iequals(group, "p-peptide"))
type = "L-peptide linking";
else if (cif::iequals(group, "DNA"))
type = "DNA linking";
else if (cif::iequals(group, "RNA"))
type = "RNA linking";
else
type = "non-polymer";
auto &db = cifFile["comp_" + id];
m_compounds.push_back(new compound(db, id, name, type, group));
}
}
else
{
// A CCD components file, validate it first
try
{
cifFile.load_dictionary("mmcif_pdbx.dic");
if (not cifFile.is_valid())
{
std::cerr << "The components file " << file << " is not valid" << std::endl;
if (cif::VERBOSE < 1)
std::cerr << "(use --verbose to see why)" << std::endl;
}
}
catch (const std::exception &e)
{
std::cerr << "When trying to load the components file " << file << " there was an exception:" << std::endl
<< e.what() << std::endl;
}
for (auto &db : cifFile)
m_compounds.push_back(new compound(db));
}
}
// --------------------------------------------------------------------
// Version for the default compounds, based on the cached components.cif file from CCD
class CCD_compound_factory_impl : public compound_factory_impl
{
public:
CCD_compound_factory_impl(std::shared_ptr<compound_factory_impl> next, const fs::path &file)
: compound_factory_impl(next)
, mCompoundsFile(file)
{
}
CCD_compound_factory_impl(std::shared_ptr<compound_factory_impl> next)
: compound_factory_impl(next)
{
}
compound *create(const std::string &id) override;
cif::parser::datablock_index mIndex;
fs::path mCompoundsFile;
};
compound *CCD_compound_factory_impl::create(const std::string &id)
{
compound *result = nullptr;
std::unique_ptr<std::istream> ccd;
if (mCompoundsFile.empty())
{
ccd = cif::load_resource("components.cif");
if (not ccd)
{
std::cerr << "Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data." << std::endl;
return nullptr;
}
}
else
ccd.reset(new std::ifstream(mCompoundsFile));
cif::file file;
if (mIndex.empty())
{
if (cif::VERBOSE > 1)
{
std::cout << "Creating component index "
<< "...";
std::cout.flush();
}
cif::parser parser(*ccd, file);
mIndex = parser.index_datablocks();
if (cif::VERBOSE > 1)
std::cout << " done" << std::endl;
// reload the resource, perhaps this should be improved...
if (mCompoundsFile.empty())
{
ccd = cif::load_resource("components.cif");
if (not ccd)
throw std::runtime_error("Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data.");
}
else
ccd.reset(new std::ifstream(mCompoundsFile));
}
if (cif::VERBOSE > 1)
{
std::cout << "Loading component " << id << "...";
std::cout.flush();
}
cif::parser parser(*ccd, file);
parser.parse_single_datablock(id, mIndex);
if (cif::VERBOSE > 1)
std::cout << " done" << std::endl;
if (not file.empty())
{
auto &db = file.front();
if (db.name() == id)
{
result = new compound(db);
std::shared_lock lock(mMutex);
m_compounds.push_back(result);
}
}
if (result == nullptr and cif::VERBOSE > 0)
std::cerr << "Could not locate compound " << id << " in the CCD components file" << std::endl;
return result;
}
// --------------------------------------------------------------------
// Version for the default compounds, based on the data found in CCP4's monomers lib
class CCP4_compound_factory_impl : public compound_factory_impl
{
public:
CCP4_compound_factory_impl(const fs::path &clibd_mon, std::shared_ptr<compound_factory_impl> next = nullptr);
compound *create(const std::string &id) override;
private:
cif::file m_file;
fs::path m_CLIBD_MON;
};
CCP4_compound_factory_impl::CCP4_compound_factory_impl(const fs::path &clibd_mon, std::shared_ptr<compound_factory_impl> next)
: compound_factory_impl(next)
, m_file((clibd_mon / "list" / "mon_lib_list.cif").string())
, m_CLIBD_MON(clibd_mon)
{
const std::regex peptideRx("(?:[lmp]-)?peptide", std::regex::icase);
auto &chemComps = m_file["comp_list"]["chem_comp"];
for (const auto &[group, threeLetterCode] : chemComps.rows<std::string, std::string>("group", "three_letter_code"))
{
if (std::regex_match(group, peptideRx))
m_known_peptides.insert(threeLetterCode);
else if (cif::iequals(group, "DNA") or cif::iequals(group, "RNA"))
m_known_bases.insert(threeLetterCode);
}
}
compound *CCP4_compound_factory_impl::create(const std::string &id)
{
compound *result = nullptr;
auto &cat = m_file["comp_list"]["chem_comp"];
auto rs = cat.find(cif::key("three_letter_code") == id);
if (rs.size() == 1)
{
auto row = rs.front();
std::string name, group;
uint32_t numberAtomsAll, numberAtomsNh;
cif::tie(name, group, numberAtomsAll, numberAtomsNh) =
row.get("name", "group", "number_atoms_all", "number_atoms_nh");
fs::path resFile = m_CLIBD_MON / cif::to_lower_copy(id.substr(0, 1)) / (id + ".cif");
if (not fs::exists(resFile) and (id == "COM" or id == "CON" or "PRN")) // seriously...
resFile = m_CLIBD_MON / cif::to_lower_copy(id.substr(0, 1)) / (id + '_' + id + ".cif");
if (fs::exists(resFile))
{
cif::file cf(resFile.string());
// locate the datablock
auto &db = cf["comp_" + id];
std::string type;
// known groups are (counted from ccp4 monomer dictionary)
// D-pyranose
// DNA
// L-PEPTIDE LINKING
// L-SACCHARIDE
// L-peptide
// L-pyranose
// M-peptide
// NON-POLYMER
// P-peptide
// RNA
// furanose
// non-polymer
// non_polymer
// peptide
// pyranose
// saccharide
if (cif::iequals(id, "gly"))
type = "peptide linking";
else if (cif::iequals(group, "l-peptide") or cif::iequals(group, "L-peptide linking") or cif::iequals(group, "peptide") or cif::iequals(group, "p-peptide"))
type = "L-peptide linking";
else if (cif::iequals(group, "DNA"))
type = "DNA linking";
else if (cif::iequals(group, "RNA"))
type = "RNA linking";
else
type = "non-polymer";
m_compounds.push_back(new compound(db, id, name, type, group));
result = m_compounds.back();
}
}
return result;
}
// --------------------------------------------------------------------
std::unique_ptr<compound_factory> compound_factory::s_instance;
thread_local std::unique_ptr<compound_factory> compound_factory::tl_instance;
bool compound_factory::s_use_thread_local_instance;
void compound_factory::init(bool useThreadLocalInstanceOnly)
{
s_use_thread_local_instance = useThreadLocalInstanceOnly;
}
compound_factory::compound_factory()
: m_impl(nullptr)
{
auto ccd = cif::load_resource("components.cif");
if (ccd)
m_impl.reset(new CCD_compound_factory_impl(m_impl));
else if (cif::VERBOSE > 0)
std::cerr << "CCD components.cif file was not found" << std::endl;
const char *clibd_mon = getenv("CLIBD_MON");
if (clibd_mon != nullptr and fs::is_directory(clibd_mon))
m_impl.reset(new CCP4_compound_factory_impl(clibd_mon));
else if (cif::VERBOSE > 0)
std::cerr << "CCP4 monomers library not found, CLIBD_MON is not defined" << std::endl;
}
compound_factory::~compound_factory()
{
}
compound_factory &compound_factory::instance()
{
if (s_use_thread_local_instance)
{
if (not tl_instance)
tl_instance.reset(new compound_factory());
return *tl_instance;
}
else
{
if (not s_instance)
s_instance.reset(new compound_factory());
return *s_instance;
}
}
void compound_factory::clear()
{
if (s_use_thread_local_instance)
tl_instance.reset(nullptr);
else
s_instance.reset();
}
void compound_factory::set_default_dictionary(const fs::path &inDictFile)
{
if (not fs::exists(inDictFile))
throw std::runtime_error("file not found: " + inDictFile.string());
try
{
m_impl.reset(new CCD_compound_factory_impl(m_impl, inDictFile));
}
catch (const std::exception &)
{
std::throw_with_nested(std::runtime_error("Error loading dictionary " + inDictFile.string()));
}
}
void compound_factory::push_dictionary(const fs::path &inDictFile)
{
if (not fs::exists(inDictFile))
throw std::runtime_error("file not found: " + inDictFile.string());
try
{
m_impl.reset(new compound_factory_impl(inDictFile, m_impl));
}
catch (const std::exception &)
{
std::throw_with_nested(std::runtime_error("Error loading dictionary " + inDictFile.string()));
}
}
void compound_factory::pop_dictionary()
{
if (m_impl)
m_impl = m_impl->next();
}
const compound *compound_factory::create(std::string id)
{
return m_impl ? m_impl->get(id) : nullptr;
}
bool compound_factory::is_known_peptide(const std::string &resName) const
{
return m_impl ? m_impl->is_known_peptide(resName) : kAAMap.count(resName) > 0;
}
bool compound_factory::is_known_base(const std::string &resName) const
{
return m_impl ? m_impl->is_known_base(resName) : kBaseMap.count(resName) > 0;
}
} // namespace cif

138
src/condition.cpp Normal file
View File

@@ -0,0 +1,138 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cif++/category.hpp>
#include <cif++/condition.hpp>
namespace cif
{
iset get_category_fields(const category &cat)
{
return cat.key_fields();
}
uint16_t get_column_ix(const category &cat, std::string_view col)
{
return cat.get_column_ix(col);
}
bool is_column_type_uchar(const category &cat, std::string_view col)
{
bool result = false;
auto cv = cat.get_cat_validator();
if (cv)
{
auto iv = cv->get_validator_for_item(col);
if (iv != nullptr and iv->m_type != nullptr)
{
auto type = iv->m_type;
result = type->m_primitive_type == DDL_PrimitiveType::UChar;
}
}
return result;
}
namespace detail
{
condition_impl *key_equals_condition_impl::prepare(const category &c)
{
m_item_ix = get_column_ix(c, m_item_tag);
m_icase = is_column_type_uchar(c, m_item_tag);
if (c.get_cat_validator() != nullptr and
c.key_field_indices().contains(m_item_ix) and
c.key_field_indices().size() == 1)
{
m_single_hit = c[{ { m_item_tag, m_value } }];
}
return this;
}
condition_impl *and_condition_impl::prepare(const category &c)
{
for (auto &sub : mSub)
sub = sub->prepare(c);
for (;;)
{
auto si = find_if(mSub.begin(), mSub.end(), [](condition_impl *sub) { return dynamic_cast<and_condition_impl *>(sub) != nullptr; });
if (si == mSub.end())
break;
and_condition_impl *sub_and = static_cast<and_condition_impl *>(*si);
mSub.erase(si);
mSub.insert(mSub.end(), sub_and->mSub.begin(), sub_and->mSub.end());
sub_and->mSub.clear();
delete sub_and;
}
return this;
}
condition_impl *or_condition_impl::prepare(const category &c)
{
condition_impl *result = this;
mA = mA->prepare(c);
mB = mB->prepare(c);
key_equals_condition_impl *equals = dynamic_cast<key_equals_condition_impl*>(mA);
key_is_empty_condition_impl *empty = dynamic_cast<key_is_empty_condition_impl*>(mB);
if (equals == nullptr and empty == nullptr)
{
equals = dynamic_cast<key_equals_condition_impl*>(mB);
empty = dynamic_cast<key_is_empty_condition_impl*>(mA);
}
if (equals != nullptr and empty != nullptr)
{
result = new detail::key_equals_or_empty_condition_impl(equals, empty);
result = result->prepare(c);
delete this;
}
return result;
}
} // namespace detail
void condition::prepare(const category &c)
{
if (m_impl)
m_impl = m_impl->prepare(c);
m_prepared = true;
}
} // namespace cif

326
src/datablock.cpp Normal file
View File

@@ -0,0 +1,326 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cif++/datablock.hpp>
namespace cif
{
void datablock::set_validator(const validator *v)
{
m_validator = v;
try
{
for (auto &cat : *this)
cat.set_validator(v, *this);
}
catch(const std::exception& e)
{
throw_with_nested(std::runtime_error("Error while setting validator in datablock " + m_name));
}
}
const validator *datablock::get_validator() const
{
return m_validator;
}
bool datablock::is_valid() const
{
if (m_validator == nullptr)
throw std::runtime_error("Validator not specified");
bool result = true;
for (auto &cat : *this)
result = cat.is_valid() and result;
return result;
}
bool datablock::validate_links() const
{
bool result = true;
for (auto &cat : *this)
result = cat.validate_links() and result;
return result;
}
// --------------------------------------------------------------------
category &datablock::operator[](std::string_view name)
{
auto i = std::find_if(begin(), end(), [name](const category &c)
{ return iequals(c.name(), name); });
if (i != end())
return *i;
auto &cat = emplace_back(name);
if (m_validator)
cat.set_validator(m_validator, *this);
return back();
}
const category &datablock::operator[](std::string_view name) const
{
static const category s_empty;
auto i = std::find_if(begin(), end(), [name](const category &c)
{ return iequals(c.name(), name); });
return i == end() ? s_empty : *i;
}
category *datablock::get(std::string_view name)
{
auto i = std::find_if(begin(), end(), [name](const category &c)
{ return iequals(c.name(), name); });
return i == end() ? nullptr : &*i;
}
const category *datablock::get(std::string_view name) const
{
return const_cast<datablock *>(this)->get(name);
}
std::tuple<datablock::iterator, bool> datablock::emplace(std::string_view name)
{
bool is_new = true;
auto i = begin();
while (i != end())
{
if (iequals(name, i->name()))
{
is_new = false;
if (i != begin())
{
auto n = std::next(i);
splice(begin(), *this, i, n);
}
break;
}
++i;
}
if (is_new)
{
auto &c = emplace_front(name);
c.set_validator(m_validator, *this);
}
return std::make_tuple(begin(), is_new);
}
std::vector<std::string> datablock::get_tag_order() const
{
std::vector<std::string> result;
// for entry and audit_conform on top
auto ci = find_if(begin(), end(), [](const category &cat) { return cat.name() == "entry"; });
if (ci != end())
{
auto cto = ci->get_tag_order();
result.insert(result.end(), cto.begin(), cto.end());
}
ci = find_if(begin(), end(), [](const category &cat) { return cat.name() == "audit_conform"; });
if (ci != end())
{
auto cto = ci->get_tag_order();
result.insert(result.end(), cto.begin(), cto.end());
}
for (auto &cat : *this)
{
if (cat.name() == "entry" or cat.name() == "audit_conform")
continue;
auto cto = cat.get_tag_order();
result.insert(result.end(), cto.begin(), cto.end());
}
return result;
}
void datablock::write(std::ostream &os) const
{
os << "data_" << m_name << std::endl
<< "# " << std::endl;
// mmcif support, sort of. First write the 'entry' Category
// and if it exists, _AND_ we have a Validator, write out the
// audit_conform record.
for (auto &cat : *this)
{
if (cat.name() != "entry")
continue;
cat.write(os);
break;
}
// If the dictionary declares an audit_conform category, put it in,
// but only if it does not exist already!
if (get("audit_conform"))
get("audit_conform")->write(os);
else if (m_validator != nullptr and m_validator->get_validator_for_category("audit_conform") != nullptr)
{
category auditConform("audit_conform");
auditConform.emplace({
{"dict_name", m_validator->name()},
{"dict_version", m_validator->version()}});
auditConform.write(os);
}
for (auto &cat : *this)
{
if (cat.name() != "entry" and cat.name() != "audit_conform")
cat.write(os);
}
}
void datablock::write(std::ostream &os, const std::vector<std::string> &tag_order)
{
os << "data_" << m_name << std::endl
<< "# " << std::endl;
std::vector<std::string> cat_order;
for (auto &o : tag_order)
{
std::string cat_name, item_name;
std::tie(cat_name, item_name) = split_tag_name(o);
if (find_if(cat_order.rbegin(), cat_order.rend(), [cat_name](const std::string &s) -> bool
{ return iequals(cat_name, s); }) == cat_order.rend())
cat_order.push_back(cat_name);
}
for (auto &c : cat_order)
{
auto cat = get(c);
if (cat == nullptr)
continue;
std::vector<std::string> items;
for (auto &o : tag_order)
{
std::string cat_name, item_name;
std::tie(cat_name, item_name) = split_tag_name(o);
if (cat_name == c)
items.push_back(item_name);
}
cat->write(os, items);
}
// for any Category we missed in the catOrder
for (auto &cat : *this)
{
if (find_if(cat_order.begin(), cat_order.end(), [&](const std::string &s) -> bool
{ return iequals(cat.name(), s); }) != cat_order.end())
continue;
cat.write(os);
}
}
bool datablock::operator==(const datablock &rhs) const
{
auto &dbA = *this;
auto &dbB = rhs;
std::vector<std::string> catA, catB;
for (auto &cat : dbA)
{
if (not cat.empty())
catA.push_back(cat.name());
}
std::sort(catA.begin(), catA.end());
for (auto &cat : dbB)
{
if (not cat.empty())
catB.push_back(cat.name());
}
std::sort(catB.begin(), catB.end());
// loop over categories twice, to group output
// First iteration is to list missing categories.
std::vector<std::string> missingA, missingB;
auto catA_i = catA.begin(), catB_i = catB.begin();
while (catA_i != catA.end() and catB_i != catB.end())
{
if (not iequals(*catA_i, *catB_i))
return false;
++catA_i, ++catB_i;
}
if (catA_i != catA.end() or catB_i != catB.end())
return false;
// Second loop, now compare category values
catA_i = catA.begin(), catB_i = catB.begin();
while (catA_i != catA.end() and catB_i != catB.end())
{
std::string nA = *catA_i;
to_lower(nA);
std::string nB = *catB_i;
to_lower(nB);
int d = nA.compare(nB);
if (d > 0)
++catB_i;
else if (d < 0)
++catA_i;
else
{
if (not (*dbA.get(*catA_i) == *dbB.get(*catB_i)))
return false;
++catA_i;
++catB_i;
}
}
return true;
}
} // namespace cif::cif

490
src/dictionary_parser.cpp Normal file
View File

@@ -0,0 +1,490 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cif++/condition.hpp>
#include <cif++/dictionary_parser.hpp>
#include <cif++/file.hpp>
#include <cif++/parser.hpp>
namespace cif
{
using namespace literals;
inline void replace_all(std::string &s, std::string_view pat, std::string_view rep)
{
for (std::string::size_type i = s.find(pat); i != std::string::npos; i = s.find(pat, i))
s.replace(i, pat.size(), rep.data(), rep.size());
}
class dictionary_parser : public parser
{
public:
dictionary_parser(validator &validator, std::istream &is, file &f)
: parser(is, f)
, m_validator(validator)
{
}
void load_dictionary()
{
std::unique_ptr<datablock> dict;
auto savedDatablock = m_datablock;
try
{
while (m_lookahead != CIFToken::Eof)
{
switch (m_lookahead)
{
case CIFToken::GLOBAL:
parse_global();
break;
default:
{
dict.reset(new datablock(m_token_value)); // dummy datablock, for constructing the validator only
m_datablock = dict.get();
match(CIFToken::DATA);
parse_datablock();
break;
}
}
}
}
catch (const std::exception &ex)
{
error(ex.what());
}
// store all validators
for (auto &ic : mCategoryValidators)
m_validator.add_category_validator(std::move(ic));
mCategoryValidators.clear();
for (auto &iv : mItemValidators)
{
auto cv = m_validator.get_validator_for_category(iv.first);
if (cv == nullptr)
error("Undefined category '" + iv.first);
for (auto &v : iv.second)
const_cast<category_validator *>(cv)->addItemValidator(std::move(v));
}
// check all item validators for having a typeValidator
if (dict)
link_items();
// store meta information
datablock::iterator info;
bool is_new;
std::tie(info, is_new) = m_datablock->emplace("dictionary");
if (not is_new and not info->empty())
{
auto r = info->front();
m_validator.set_name(r["title"].as<std::string>());
m_validator.version(r["version"].as<std::string>());
}
m_datablock = savedDatablock;
mItemValidators.clear();
}
private:
void parse_save_frame() override
{
if (not m_collected_item_types)
m_collected_item_types = collect_item_types();
std::string saveFrameName = m_token_value;
if (saveFrameName.empty())
error("Invalid save frame, should contain more than just 'save_' here");
bool isCategorySaveFrame = m_token_value[0] != '_';
datablock dict(m_token_value);
datablock::iterator cat = dict.end();
match(CIFToken::SAVE);
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag)
{
if (m_lookahead == CIFToken::LOOP)
{
cat = dict.end(); // should start a new category
match(CIFToken::LOOP);
std::vector<std::string> tags;
while (m_lookahead == CIFToken::Tag)
{
std::string catName, item_name;
std::tie(catName, item_name) = split_tag_name(m_token_value);
if (cat == dict.end())
std::tie(cat, std::ignore) = dict.emplace(catName);
else if (not iequals(cat->name(), catName))
error("inconsistent categories in loop_");
tags.push_back(item_name);
match(CIFToken::Tag);
}
while (m_lookahead == CIFToken::Value)
{
cat->emplace({});
auto row = cat->back();
for (auto tag : tags)
{
row[tag] = m_token_value;
match(CIFToken::Value);
}
}
cat = dict.end();
}
else
{
std::string catName, item_name;
std::tie(catName, item_name) = split_tag_name(m_token_value);
if (cat == dict.end() or not iequals(cat->name(), catName))
std::tie(cat, std::ignore) = dict.emplace(catName);
match(CIFToken::Tag);
if (cat->empty())
cat->emplace({});
cat->back()[item_name] = m_token_value;
match(CIFToken::Value);
}
}
match(CIFToken::SAVE);
if (isCategorySaveFrame)
{
std::string category = dict["category"].front().get<std::string>("id");
std::vector<std::string> keys;
for (auto k : dict["category_key"])
keys.push_back(std::get<1>(split_tag_name(k["name"].as<std::string>())));
iset groups;
for (auto g : dict["category_group"])
groups.insert(g["id"].as<std::string>());
mCategoryValidators.push_back(category_validator{ category, keys, groups });
}
else
{
// if the type code is missing, this must be a pointer, just skip it
std::string typeCode = dict["item_type"].front().get<std::string>("code");
const type_validator *tv = nullptr;
if (not(typeCode.empty() or typeCode == "?"))
tv = m_validator.get_validator_for_type(typeCode);
iset ess;
for (auto e : dict["item_enumeration"])
ess.insert(e["value"].as<std::string>());
std::string defaultValue = dict["item_default"].front().get<std::string>("value");
bool defaultIsNull = false;
if (defaultValue.empty())
{
// TODO: Is this correct???
for (auto r : dict["_item_default"])
{
defaultIsNull = r["value"].is_null();
break;
}
}
// collect the dict from our dataBlock and construct validators
for (auto i : dict["item"])
{
std::string tagName, category, mandatory;
cif::tie(tagName, category, mandatory) = i.get("name", "category_id", "mandatory_code");
std::string cat_name, item_name;
std::tie(cat_name, item_name) = split_tag_name(tagName);
if (cat_name.empty() or item_name.empty())
error("Invalid tag name in _item.name " + tagName);
if (not iequals(category, cat_name) and not(category.empty() or category == "?"))
error("specified category id does match the implicit category name for tag '" + tagName + '\'');
else
category = cat_name;
auto &ivs = mItemValidators[category];
auto vi = find(ivs.begin(), ivs.end(), item_validator{ item_name });
if (vi == ivs.end())
ivs.push_back(item_validator{ item_name, iequals(mandatory, "yes"), tv, ess, defaultValue, defaultIsNull });
else
{
// need to update the itemValidator?
if (vi->m_mandatory != (iequals(mandatory, "yes")))
{
if (VERBOSE > 2)
{
std::cerr << "inconsistent mandatory value for " << tagName << " in dictionary" << std::endl;
if (iequals(tagName, saveFrameName))
std::cerr << "choosing " << mandatory << std::endl;
else
std::cerr << "choosing " << (vi->m_mandatory ? "Y" : "N") << std::endl;
}
if (iequals(tagName, saveFrameName))
vi->m_mandatory = (iequals(mandatory, "yes"));
}
if (vi->m_type != nullptr and tv != nullptr and vi->m_type != tv)
{
if (VERBOSE > 1)
std::cerr << "inconsistent type for " << tagName << " in dictionary" << std::endl;
}
// vi->mMandatory = (iequals(mandatory, "yes"));
if (vi->m_type == nullptr)
vi->m_type = tv;
vi->m_enums.insert(ess.begin(), ess.end());
// anything else yet?
// ...
}
}
// collect the dict from our dataBlock and construct validators
for (auto i : dict["item_linked"])
{
mLinkedItems.emplace(i.get<std::string,std::string>("child_name", "parent_name"));
}
}
}
void link_items()
{
if (not m_datablock)
error("no datablock");
auto &dict = *m_datablock;
// links are identified by a parent category, a child category and a group ID
using key_type = std::tuple<std::string, std::string, int>;
std::map<key_type, size_t> linkIndex;
// Each link group consists of a set of keys
std::vector<std::tuple<std::vector<std::string>, std::vector<std::string>>> linkKeys;
auto addLink = [&](size_t ix, const std::string &pk, const std::string &ck)
{
auto &&[pkeys, ckeys] = linkKeys.at(ix);
bool found = false;
for (size_t i = 0; i < pkeys.size(); ++i)
{
if (pkeys[i] == pk and ckeys[i] == ck)
{
found = true;
break;
}
}
if (not found)
{
pkeys.push_back(pk);
ckeys.push_back(ck);
}
};
auto &linkedGroupList = dict["pdbx_item_linked_group_list"];
for (auto gl : linkedGroupList)
{
std::string child, parent;
int link_group_id;
cif::tie(child, parent, link_group_id) = gl.get("child_name", "parent_name", "link_group_id");
auto civ = m_validator.get_validator_for_item(child);
if (civ == nullptr)
error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
auto piv = m_validator.get_validator_for_item(parent);
if (piv == nullptr)
error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
key_type key{ piv->m_category->m_name, civ->m_category->m_name, link_group_id };
if (not linkIndex.count(key))
{
linkIndex[key] = linkKeys.size();
linkKeys.push_back({});
}
size_t ix = linkIndex.at(key);
addLink(ix, piv->m_tag, civ->m_tag);
}
// Only process inline linked items if the linked group list is absent
if (linkedGroupList.empty())
{
// for links recorded in categories but not in pdbx_item_linked_group_list
for (auto li : mLinkedItems)
{
std::string child, parent;
std::tie(child, parent) = li;
auto civ = m_validator.get_validator_for_item(child);
if (civ == nullptr)
error("in pdbx_item_linked_group_list, item '" + child + "' is not specified");
auto piv = m_validator.get_validator_for_item(parent);
if (piv == nullptr)
error("in pdbx_item_linked_group_list, item '" + parent + "' is not specified");
key_type key{ piv->m_category->m_name, civ->m_category->m_name, 0 };
if (not linkIndex.count(key))
{
linkIndex[key] = linkKeys.size();
linkKeys.push_back({});
}
size_t ix = linkIndex.at(key);
addLink(ix, piv->m_tag, civ->m_tag);
}
}
auto &linkedGroup = dict["pdbx_item_linked_group"];
// now store the links in the validator
for (auto &kv : linkIndex)
{
link_validator link = {};
std::tie(link.m_parent_category, link.m_child_category, link.m_link_group_id) = kv.first;
std::tie(link.m_parent_keys, link.m_child_keys) = linkKeys[kv.second];
// look up the label
for (auto r : linkedGroup.find("category_id"_key == link.m_child_category and "link_group_id"_key == link.m_link_group_id))
{
link.m_link_group_label = r["label"].as<std::string>();
break;
}
m_validator.add_link_validator(std::move(link));
}
// now make sure the itemType is specified for all itemValidators
for (auto &cv : m_validator.m_category_validators)
{
for (auto &iv : cv.m_item_validators)
{
if (iv.m_type == nullptr and cif::VERBOSE >= 0)
std::cerr << "Missing item_type for " << iv.m_tag << std::endl;
}
}
}
bool collect_item_types()
{
bool result = false;
if (not m_datablock)
error("no datablock");
auto &dict = *m_datablock;
for (auto t : dict["item_type_list"])
{
std::string code, primitiveCode, construct;
cif::tie(code, primitiveCode, construct) = t.get("code", "primitive_code", "construct");
replace_all(construct, "\\n", "\n");
replace_all(construct, "\\t", "\t");
replace_all(construct, "\\\n", "");
try
{
type_validator v = {
code, map_to_primitive_type(primitiveCode), construct
};
m_validator.add_type_validator(std::move(v));
}
catch (const std::exception &)
{
std::throw_with_nested(parse_error(/*t.lineNr()*/ 0, "error in regular expression"));
}
// Do not replace an already defined type validator, this won't work with pdbx_v40
// as it has a name that is too strict for its own names :-)
// if (mFileImpl.mTypeValidators.count(v))
// mFileImpl.mTypeValidators.erase(v);
if (VERBOSE >= 5)
std::cerr << "Added type " << code << " (" << primitiveCode << ") => " << construct << std::endl;
result = true;
}
return result;
}
validator &m_validator;
bool m_collected_item_types = false;
std::vector<category_validator> mCategoryValidators;
std::map<std::string, std::vector<item_validator>> mItemValidators;
std::set<std::tuple<std::string, std::string>> mLinkedItems;
};
// --------------------------------------------------------------------
validator parse_dictionary(std::string_view name, std::istream &is)
{
validator result(name);
file f;
dictionary_parser p(result, is, f);
p.load_dictionary();
return result;
}
} // namespace cif

228
src/file.cpp Normal file
View File

@@ -0,0 +1,228 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cif++/file.hpp>
#include <cif++/gzio.hpp>
namespace cif
{
// --------------------------------------------------------------------
void file::set_validator(const validator *v)
{
m_validator = v;
for (auto &db : *this)
db.set_validator(v);
}
bool file::is_valid() const
{
if (m_validator == nullptr)
std::runtime_error("No validator loaded explicitly, cannot continue");
bool result = true;
for (auto &d : *this)
result = d.is_valid() and result;
if (result)
result = validate_links();
return result;
}
bool file::is_valid()
{
if (m_validator == nullptr)
{
if (VERBOSE > 0)
std::cerr << "No dictionary loaded explicitly, loading default" << std::endl;
load_dictionary();
}
bool result = not empty();
for (auto &d : *this)
result = d.is_valid() and result;
if (result)
result = validate_links();
return result;
}
bool file::validate_links() const
{
if (m_validator == nullptr)
std::runtime_error("No validator loaded explicitly, cannot continue");
bool result = true;
for (auto &db : *this)
result = db.validate_links() and result;
return result;
}
void file::load_dictionary()
{
if (not empty())
{
auto *audit_conform = front().get("audit_conform");
if (audit_conform and not audit_conform->empty())
{
std::string name = audit_conform->front().get<std::string>("dict_name");
if (name == "mmcif_pdbx_v50")
name = "mmcif_pdbx.dic"; // we had a bug here in libcifpp...
if (not name.empty())
{
try
{
load_dictionary(name);
}
catch (const std::exception &ex)
{
if (VERBOSE)
std::cerr << "Failed to load dictionary " << std::quoted(name) << ": " << ex.what() << std::endl;
}
}
}
}
// if (not m_validator)
// load_dictionary("mmcif_pdbx.dic"); // TODO: maybe incorrect? Perhaps improve?
}
void file::load_dictionary(std::string_view name)
{
set_validator(&validator_factory::instance()[name]);
}
bool file::contains(std::string_view name) const
{
return std::find_if(begin(), end(), [name](const datablock &db) { return db.name() == name; }) != end();
}
datablock &file::operator[](std::string_view name)
{
auto i = std::find_if(begin(), end(), [name](const datablock &c)
{ return iequals(c.name(), name); });
if (i != end())
return *i;
emplace_back(name);
return back();
}
const datablock &file::operator[](std::string_view name) const
{
static const datablock s_empty;
auto i = std::find_if(begin(), end(), [name](const datablock &c)
{ return iequals(c.name(), name); });
return i == end() ? s_empty : *i;
}
std::tuple<file::iterator, bool> file::emplace(std::string_view name)
{
bool is_new = true;
auto i = begin();
while (i != end())
{
if (iequals(name, i->name()))
{
is_new = false;
if (i != begin())
{
auto n = std::next(i);
splice(begin(), *this, i, n);
}
break;
}
++i;
}
if (is_new)
{
auto &db = emplace_front(name);
db.set_validator(m_validator);
}
return std::make_tuple(begin(), is_new);
}
void file::load(const std::filesystem::path &p)
{
try
{
gzio::ifstream in(p);
if (not in.is_open())
throw std::runtime_error("Could not open file " + p.string());
load(in);
}
catch (const std::exception &ex)
{
throw_with_nested(std::runtime_error("Error reading file " + p.string()));
}
}
void file::load(std::istream &is)
{
auto saved = m_validator;
set_validator(nullptr);
parser p(is, *this);
p.parse_file();
if (saved != nullptr)
set_validator(saved);
else
load_dictionary();
}
void file::save(const std::filesystem::path &p) const
{
gzio::ofstream outFile(p);
save(outFile);
}
void file::save(std::ostream &os) const
{
// if (not is_valid())
// std::cout << "File is not valid!" << std::endl;
for (auto &db : *this)
db.write(os);
}
} // namespace cif

68
src/item.cpp Normal file
View File

@@ -0,0 +1,68 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cassert>
#include <cif++/row.hpp>
namespace cif
{
const item_handle item_handle::s_null_item;
row_handle s_null_row_handle;
item_handle::item_handle()
: m_column(std::numeric_limits<uint16_t>::max())
, m_row_handle(s_null_row_handle)
{
}
std::string_view item_handle::text() const
{
if (not m_row_handle.empty())
{
auto iv = m_row_handle.m_row->get(m_column);
if (iv != nullptr)
return iv->text();
}
return {};
}
void item_handle::assign_value(const item &v)
{
assert(not m_row_handle.empty());
m_row_handle.assign(m_column, v.value(), true);
}
void item_handle::swap(item_handle &b)
{
assert(m_column == b.m_column);
// assert(&m_row_handle.m_category == &b.m_row_handle.m_category);
m_row_handle.swap(m_column, b.m_row_handle);
}
}

2577
src/model.cpp Normal file

File diff suppressed because it is too large Load Diff

826
src/parser.cpp Normal file
View File

@@ -0,0 +1,826 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cassert>
#include <iostream>
#include <map>
#include <regex>
#include <stack>
#include <cif++/utilities.hpp>
#include <cif++/forward_decl.hpp>
#include <cif++/parser.hpp>
#include <cif++/file.hpp>
namespace cif
{
extern int VERBOSE;
}
namespace cif
{
// --------------------------------------------------------------------
sac_parser::sac_parser(std::istream &is, bool init)
: m_source(*is.rdbuf())
{
if (is.rdbuf() == nullptr)
throw std::runtime_error("Attempt to read from uninitialised stream");
m_validate = true;
m_line_nr = 1;
m_bol = true;
if (init)
m_lookahead = get_next_token();
}
// get_next_char takes a char from the buffer, or if it is empty
// from the istream. This function also does carriage/linefeed
// translation.
int sac_parser::get_next_char()
{
int result = std::char_traits<char>::eof();
if (m_buffer.empty())
result = m_source.sbumpc();
else
{
result = m_buffer.back();
m_buffer.pop_back();
}
// very simple CR/LF translation into LF
if (result == '\r')
{
int lookahead = m_source.sbumpc();
if (lookahead != '\n')
m_buffer.push_back(lookahead);
result = '\n';
}
if (result == std::char_traits<char>::eof())
m_token_value.push_back(0);
else
m_token_value.push_back(std::char_traits<char>::to_char_type(result));
if (result == '\n')
++m_line_nr;
if (VERBOSE >= 6)
{
std::cerr << "get_next_char => ";
if (iscntrl(result) or not isprint(result))
std::cerr << int(result) << std::endl;
else
std::cerr << char(result) << std::endl;
}
return result;
}
void sac_parser::retract()
{
assert(not m_token_value.empty());
char ch = m_token_value.back();
if (ch == '\n')
--m_line_nr;
m_buffer.push_back(ch == 0 ? std::char_traits<char>::eof() : std::char_traits<char>::to_int_type(ch));
m_token_value.pop_back();
}
int sac_parser::restart(int start)
{
int result = 0;
while (not m_token_value.empty())
retract();
switch (start)
{
case State::Start:
result = State::Float;
break;
case State::Float:
result = State::Int;
break;
case State::Int:
result = State::Value;
break;
default:
error("Invalid state in SacParser");
}
m_bol = false;
return result;
}
sac_parser::CIFToken sac_parser::get_next_token()
{
const auto kEOF = std::char_traits<char>::eof();
CIFToken result = CIFToken::Unknown;
int quoteChar = 0;
int state = State::Start, start = State::Start;
m_bol = false;
m_token_value.clear();
mTokenType = CIFValue::Unknown;
while (result == CIFToken::Unknown)
{
auto ch = get_next_char();
switch (state)
{
case State::Start:
if (ch == kEOF)
result = CIFToken::Eof;
else if (ch == '\n')
{
m_bol = true;
state = State::White;
}
else if (ch == ' ' or ch == '\t')
state = State::White;
else if (ch == '#')
state = State::Comment;
else if (ch == '_')
state = State::Tag;
else if (ch == ';' and m_bol)
state = State::TextField;
else if (ch == '\'' or ch == '"')
{
quoteChar = ch;
state = State::QuotedString;
}
else
state = start = restart(start);
break;
case State::White:
if (ch == kEOF)
result = CIFToken::Eof;
else if (not isspace(ch))
{
state = State::Start;
retract();
m_token_value.clear();
}
else
m_bol = (ch == '\n');
break;
case State::Comment:
if (ch == '\n')
{
state = State::Start;
m_bol = true;
m_token_value.clear();
}
else if (ch == kEOF)
result = CIFToken::Eof;
else if (not is_any_print(ch))
error("invalid character in comment");
break;
case State::TextField:
if (ch == '\n')
state = State::TextField + 1;
else if (ch == kEOF)
error("unterminated textfield");
// else if (ch == '\\')
// state = State::Esc;
else if (not is_any_print(ch) and cif::VERBOSE > 2)
warning("invalid character in text field '" + std::string({static_cast<char>(ch)}) + "' (" + std::to_string((int)ch) + ")");
break;
// case State::Esc:
// if (ch == '\n')
// break;
case State::TextField + 1:
if (is_text_lead(ch) or ch == ' ' or ch == '\t')
state = State::TextField;
else if (ch == ';')
{
assert(m_token_value.length() >= 2);
m_token_value = m_token_value.substr(1, m_token_value.length() - 3);
mTokenType = CIFValue::TextField;
result = CIFToken::Value;
}
else if (ch == kEOF)
error("unterminated textfield");
else if (ch != '\n')
error("invalid character in text field");
break;
case State::QuotedString:
if (ch == kEOF)
error("unterminated quoted string");
else if (ch == quoteChar)
state = State::QuotedStringQuote;
else if (not is_any_print(ch) and cif::VERBOSE > 2)
warning("invalid character in quoted string: '" + std::string({static_cast<char>(ch)}) + "' (" + std::to_string((int)ch) + ")");
break;
case State::QuotedStringQuote:
if (is_white(ch))
{
retract();
result = CIFToken::Value;
mTokenType = CIFValue::String;
if (m_token_value.length() < 2)
error("Invalid quoted string token");
m_token_value = m_token_value.substr(1, m_token_value.length() - 2);
}
else if (ch == quoteChar)
;
else if (is_any_print(ch))
state = State::QuotedString;
else if (ch == kEOF)
error("unterminated quoted string");
else
error("invalid character in quoted string");
break;
case State::Tag:
if (not is_non_blank(ch))
{
retract();
result = CIFToken::Tag;
}
break;
case State::Float:
if (ch == '+' or ch == '-')
{
state = State::Float + 1;
}
else if (isdigit(ch))
state = State::Float + 1;
else
state = start = restart(start);
break;
case State::Float + 1:
// if (ch == '(') // numeric???
// mState = State::NumericSuffix;
// else
if (ch == '.')
state = State::Float + 2;
else if (tolower(ch) == 'e')
state = State::Float + 3;
else if (is_white(ch) or ch == kEOF)
{
retract();
result = CIFToken::Value;
mTokenType = CIFValue::Int;
}
else
state = start = restart(start);
break;
// parsed '.'
case State::Float + 2:
if (tolower(ch) == 'e')
state = State::Float + 3;
else if (is_white(ch) or ch == kEOF)
{
retract();
result = CIFToken::Value;
mTokenType = CIFValue::Float;
}
else
state = start = restart(start);
break;
// parsed 'e'
case State::Float + 3:
if (ch == '-' or ch == '+')
state = State::Float + 4;
else if (isdigit(ch))
state = State::Float + 5;
else
state = start = restart(start);
break;
case State::Float + 4:
if (isdigit(ch))
state = State::Float + 5;
else
state = start = restart(start);
break;
case State::Float + 5:
if (is_white(ch) or ch == kEOF)
{
retract();
result = CIFToken::Value;
mTokenType = CIFValue::Float;
}
else
state = start = restart(start);
break;
case State::Int:
if (isdigit(ch) or ch == '+' or ch == '-')
state = State::Int + 1;
else
state = start = restart(start);
break;
case State::Int + 1:
if (is_white(ch) or ch == kEOF)
{
retract();
result = CIFToken::Value;
mTokenType = CIFValue::Int;
}
else
state = start = restart(start);
break;
case State::Value:
if (ch == '_')
{
std::string s = to_lower_copy(m_token_value);
if (s == "global_")
result = CIFToken::GLOBAL;
else if (s == "stop_")
result = CIFToken::STOP;
else if (s == "loop_")
result = CIFToken::LOOP;
else if (s == "data_")
{
state = State::DATA;
continue;
}
else if (s == "save_")
{
state = State::SAVE;
continue;
}
}
if (result == CIFToken::Unknown and not is_non_blank(ch))
{
retract();
result = CIFToken::Value;
if (m_token_value == ".")
mTokenType = CIFValue::Inapplicable;
else if (m_token_value == "?")
{
mTokenType = CIFValue::Unknown;
m_token_value.clear();
}
}
break;
case State::DATA:
case State::SAVE:
if (not is_non_blank(ch))
{
retract();
if (state == State::DATA)
result = CIFToken::DATA;
else
result = CIFToken::SAVE;
m_token_value.erase(m_token_value.begin(), m_token_value.begin() + 5);
}
break;
default:
assert(false);
error("Invalid state in get_next_token");
break;
}
}
if (VERBOSE >= 5)
{
std::cerr << get_token_name(result);
if (mTokenType != CIFValue::Unknown)
std::cerr << ' ' << get_value_name(mTokenType);
if (result != CIFToken::Eof)
std::cerr << " " << std::quoted(m_token_value);
std::cerr << std::endl;
}
return result;
}
void sac_parser::match(CIFToken token)
{
if (m_lookahead != token)
error(std::string("Unexpected token, expected ") + get_token_name(token) + " but found " + get_token_name(m_lookahead));
m_lookahead = get_next_token();
}
bool sac_parser::parse_single_datablock(const std::string &datablock)
{
// first locate the start, as fast as we can
enum
{
start,
comment,
string,
string_quote,
qstring,
data
} state = start;
int quote = 0;
bool bol = true;
std::string dblk = "data_" + datablock;
std::string::size_type si = 0;
bool found = false;
for (auto ch = m_source.sbumpc(); not found and ch != std::streambuf::traits_type::eof(); ch = m_source.sbumpc())
{
switch (state)
{
case start:
switch (ch)
{
case '#': state = comment; break;
case 'd':
case 'D':
state = data;
si = 1;
break;
case '\'':
case '"':
state = string;
quote = ch;
break;
case ';':
if (bol)
state = qstring;
break;
}
break;
case comment:
if (ch == '\n')
state = start;
break;
case string:
if (ch == quote)
state = string_quote;
break;
case string_quote:
if (std::isspace(ch))
state = start;
else
state = string;
break;
case qstring:
if (ch == ';' and bol)
state = start;
break;
case data:
if (isspace(ch) and dblk[si] == 0)
found = true;
else if (dblk[si++] != ch)
state = start;
break;
}
bol = (ch == '\n');
}
if (found)
{
produce_datablock(datablock);
m_lookahead = get_next_token();
parse_datablock();
}
return found;
}
sac_parser::datablock_index sac_parser::index_datablocks()
{
datablock_index index;
// first locate the start, as fast as we can
enum
{
start,
comment,
string,
string_quote,
qstring,
data,
data_name
} state = start;
int quote = 0;
bool bol = true;
const char dblk[] = "data_";
std::string::size_type si = 0;
std::string datablock;
for (auto ch = m_source.sbumpc(); ch != std::streambuf::traits_type::eof(); ch = m_source.sbumpc())
{
switch (state)
{
case start:
switch (ch)
{
case '#': state = comment; break;
case 'd':
case 'D':
state = data;
si = 1;
break;
case '\'':
case '"':
state = string;
quote = ch;
break;
case ';':
if (bol)
state = qstring;
break;
}
break;
case comment:
if (ch == '\n')
state = start;
break;
case string:
if (ch == quote)
state = string_quote;
break;
case string_quote:
if (std::isspace(ch))
state = start;
else
state = string;
break;
case qstring:
if (ch == ';' and bol)
state = start;
break;
case data:
if (dblk[si] == 0 and is_non_blank(ch))
{
datablock = {static_cast<char>(ch)};
state = data_name;
}
else if (dblk[si++] != ch)
state = start;
break;
case data_name:
if (is_non_blank(ch))
datablock.insert(datablock.end(), char(ch));
else if (isspace(ch))
{
if (not datablock.empty())
index[datablock] = m_source.pubseekoff(0, std::ios_base::cur, std::ios_base::in);
state = start;
}
else
state = start;
break;
}
bol = (ch == '\n');
}
return index;
}
bool sac_parser::parse_single_datablock(const std::string &datablock, const datablock_index &index)
{
bool result = false;
auto i = index.find(datablock);
if (i != index.end())
{
m_source.pubseekpos(i->second, std::ios_base::in);
produce_datablock(datablock);
m_lookahead = get_next_token();
parse_datablock();
result = true;
}
return result;
}
void sac_parser::parse_file()
{
while (m_lookahead != CIFToken::Eof)
{
switch (m_lookahead)
{
case CIFToken::GLOBAL:
parse_global();
break;
case CIFToken::DATA:
produce_datablock(m_token_value);
match(CIFToken::DATA);
parse_datablock();
break;
default:
error("This file does not seem to be an mmCIF file");
break;
}
}
}
void sac_parser::parse_global()
{
match(CIFToken::GLOBAL);
while (m_lookahead == CIFToken::Tag)
{
match(CIFToken::Tag);
match(CIFToken::Value);
}
}
void sac_parser::parse_datablock()
{
static const std::string kUnitializedCategory("<invalid>");
std::string cat = kUnitializedCategory; // intial value acts as a guard for empty category names
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag or m_lookahead == CIFToken::SAVE)
{
switch (m_lookahead)
{
case CIFToken::LOOP:
{
cat = kUnitializedCategory; // should start a new category
match(CIFToken::LOOP);
std::vector<std::string> tags;
while (m_lookahead == CIFToken::Tag)
{
std::string catName, itemName;
std::tie(catName, itemName) = split_tag_name(m_token_value);
if (cat == kUnitializedCategory)
{
produce_category(catName);
cat = catName;
}
else if (not iequals(cat, catName))
error("inconsistent categories in loop_");
tags.push_back(itemName);
match(CIFToken::Tag);
}
while (m_lookahead == CIFToken::Value)
{
produce_row();
for (auto tag : tags)
{
produce_item(cat, tag, m_token_value);
match(CIFToken::Value);
}
}
cat.clear();
break;
}
case CIFToken::Tag:
{
std::string catName, itemName;
std::tie(catName, itemName) = split_tag_name(m_token_value);
if (not iequals(cat, catName))
{
produce_category(catName);
cat = catName;
produce_row();
}
match(CIFToken::Tag);
produce_item(cat, itemName, m_token_value);
match(CIFToken::Value);
break;
}
case CIFToken::SAVE:
parse_save_frame();
break;
default:
assert(false);
break;
}
}
}
void sac_parser::parse_save_frame()
{
error("A regular CIF file should not contain a save frame");
}
// --------------------------------------------------------------------
void parser::produce_datablock(const std::string &name)
{
const auto &[iter, ignore] = m_file.emplace(name);
m_datablock = &(*iter);
}
void parser::produce_category(const std::string &name)
{
if (VERBOSE >= 4)
std::cerr << "producing category " << name << std::endl;
const auto &[cat, ignore] = m_datablock->emplace(name);
m_category = &*cat;
}
void parser::produce_row()
{
if (VERBOSE >= 4)
std::cerr << "producing row for category " << m_category->name() << std::endl;
if (m_category == nullptr)
error("inconsistent categories in loop_");
m_category->emplace({});
m_row = m_category->back();
// m_row.lineNr(m_line_nr);
}
void parser::produce_item(const std::string &category, const std::string &item, const std::string &value)
{
if (VERBOSE >= 4)
std::cerr << "producing _" << category << '.' << item << " -> " << value << std::endl;
if (m_category == nullptr or not iequals(category, m_category->name()))
error("inconsistent categories in loop_");
m_row[item] = m_token_value;
}
} // namespace cif

3749
src/pdb/cif2pdb.cpp Normal file

File diff suppressed because it is too large Load Diff

6240
src/pdb/pdb2cif.cpp Normal file

File diff suppressed because it is too large Load Diff

1501
src/pdb/pdb2cif_remark_3.cpp Normal file

File diff suppressed because it is too large Load Diff

2016
src/pdb/tls.cpp Normal file

File diff suppressed because it is too large Load Diff

544
src/point.cpp Normal file
View File

@@ -0,0 +1,544 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cassert>
#include <random>
#include <cif++/point.hpp>
namespace cif
{
// --------------------------------------------------------------------
// We're using expression templates here
template <typename M>
class MatrixExpression
{
public:
uint32_t dim_m() const { return static_cast<const M &>(*this).dim_m(); }
uint32_t dim_n() const { return static_cast<const M &>(*this).dim_n(); }
double &operator()(uint32_t i, uint32_t j)
{
return static_cast<M &>(*this).operator()(i, j);
}
double operator()(uint32_t i, uint32_t j) const
{
return static_cast<const M &>(*this).operator()(i, j);
}
};
// --------------------------------------------------------------------
// matrix is m x n, addressing i,j is 0 <= i < m and 0 <= j < n
// element m i,j is mapped to [i * n + j] and thus storage is row major
class Matrix : public MatrixExpression<Matrix>
{
public:
template <typename M2>
Matrix(const MatrixExpression<M2> &m)
: m_m(m.dim_m())
, m_n(m.dim_n())
, m_data(m_m * m_n)
{
for (uint32_t i = 0; i < m_m; ++i)
{
for (uint32_t j = 0; j < m_n; ++j)
operator()(i, j) = m(i, j);
}
}
Matrix(size_t m, size_t n, double v = 0)
: m_m(m)
, m_n(n)
, m_data(m_m * m_n)
{
std::fill(m_data.begin(), m_data.end(), v);
}
Matrix() = default;
Matrix(Matrix &&m) = default;
Matrix(const Matrix &m) = default;
Matrix &operator=(Matrix &&m) = default;
Matrix &operator=(const Matrix &m) = default;
uint32_t dim_m() const { return m_m; }
uint32_t dim_n() const { return m_n; }
double operator()(uint32_t i, uint32_t j) const
{
assert(i < m_m);
assert(j < m_n);
return m_data[i * m_n + j];
}
double &operator()(uint32_t i, uint32_t j)
{
assert(i < m_m);
assert(j < m_n);
return m_data[i * m_n + j];
}
private:
uint32_t m_m = 0, m_n = 0;
std::vector<double> m_data;
};
// --------------------------------------------------------------------
class SymmetricMatrix : public MatrixExpression<SymmetricMatrix>
{
public:
SymmetricMatrix(uint32_t n, double v = 0)
: m_n(n)
, m_data((m_n * (m_n + 1)) / 2)
{
std::fill(m_data.begin(), m_data.end(), v);
}
SymmetricMatrix() = default;
SymmetricMatrix(SymmetricMatrix &&m) = default;
SymmetricMatrix(const SymmetricMatrix &m) = default;
SymmetricMatrix &operator=(SymmetricMatrix &&m) = default;
SymmetricMatrix &operator=(const SymmetricMatrix &m) = default;
uint32_t dim_m() const { return m_n; }
uint32_t dim_n() const { return m_n; }
double operator()(uint32_t i, uint32_t j) const
{
return i < j
? m_data[(j * (j + 1)) / 2 + i]
: m_data[(i * (i + 1)) / 2 + j];
}
double &operator()(uint32_t i, uint32_t j)
{
if (i > j)
std::swap(i, j);
assert(j < m_n);
return m_data[(j * (j + 1)) / 2 + i];
}
private:
uint32_t m_n;
std::vector<double> m_data;
};
class IdentityMatrix : public MatrixExpression<IdentityMatrix>
{
public:
IdentityMatrix(uint32_t n)
: m_n(n)
{
}
uint32_t dim_m() const { return m_n; }
uint32_t dim_n() const { return m_n; }
double operator()(uint32_t i, uint32_t j) const
{
return i == j ? 1 : 0;
}
private:
uint32_t m_n;
};
// --------------------------------------------------------------------
// matrix functions, implemented as expression templates
template <typename M1, typename M2>
class MatrixSubtraction : public MatrixExpression<MatrixSubtraction<M1, M2>>
{
public:
MatrixSubtraction(const M1 &m1, const M2 &m2)
: m_m1(m1)
, m_m2(m2)
{
assert(m_m1.dim_m() == m_m2.dim_m());
assert(m_m1.dim_n() == m_m2.dim_n());
}
uint32_t dim_m() const { return m_m1.dim_m(); }
uint32_t dim_n() const { return m_m1.dim_n(); }
double operator()(uint32_t i, uint32_t j) const
{
return m_m1(i, j) - m_m2(i, j);
}
private:
const M1 &m_m1;
const M2 &m_m2;
};
template <typename M1, typename M2>
MatrixSubtraction<M1, M2> operator-(const MatrixExpression<M1> &m1, const MatrixExpression<M2> &m2)
{
return MatrixSubtraction(*static_cast<const M1 *>(&m1), *static_cast<const M2 *>(&m2));
}
template <typename M>
class MatrixMultiplication : public MatrixExpression<MatrixMultiplication<M>>
{
public:
MatrixMultiplication(const M &m, double v)
: m_m(m)
, m_v(v)
{
}
uint32_t dim_m() const { return m_m.dim_m(); }
uint32_t dim_n() const { return m_m.dim_n(); }
double operator()(uint32_t i, uint32_t j) const
{
return m_m(i, j) * m_v;
}
private:
const M &m_m;
double m_v;
};
template <typename M>
MatrixMultiplication<M> operator*(const MatrixExpression<M> &m, double v)
{
return MatrixMultiplication(*static_cast<const M *>(&m), v);
}
// --------------------------------------------------------------------
template <class M1>
Matrix Cofactors(const M1 &m)
{
Matrix cf(m.dim_m(), m.dim_m());
const size_t ixs[4][3] = {
{ 1, 2, 3 },
{ 0, 2, 3 },
{ 0, 1, 3 },
{ 0, 1, 2 }
};
for (size_t x = 0; x < 4; ++x)
{
const size_t *ix = ixs[x];
for (size_t y = 0; y < 4; ++y)
{
const size_t *iy = ixs[y];
cf(x, y) =
m(ix[0], iy[0]) * m(ix[1], iy[1]) * m(ix[2], iy[2]) +
m(ix[0], iy[1]) * m(ix[1], iy[2]) * m(ix[2], iy[0]) +
m(ix[0], iy[2]) * m(ix[1], iy[0]) * m(ix[2], iy[1]) -
m(ix[0], iy[2]) * m(ix[1], iy[1]) * m(ix[2], iy[0]) -
m(ix[0], iy[1]) * m(ix[1], iy[0]) * m(ix[2], iy[2]) -
m(ix[0], iy[0]) * m(ix[1], iy[2]) * m(ix[2], iy[1]);
if ((x + y) % 2 == 1)
cf(x, y) *= -1;
}
}
return cf;
}
// --------------------------------------------------------------------
template<typename T>
quaternion_type<T> normalize(quaternion_type<T> q)
{
std::valarray<double> t(4);
t[0] = q.get_a();
t[1] = q.get_b();
t[2] = q.get_c();
t[3] = q.get_d();
t *= t;
double length = std::sqrt(t.sum());
if (length > 0.001)
q /= static_cast<quaternion::value_type>(length);
else
q = quaternion(1, 0, 0, 0);
return q;
}
// --------------------------------------------------------------------
quaternion construct_from_angle_axis(float angle, point axis)
{
auto q = std::cos((angle * kPI / 180) / 2);
auto s = std::sqrt(1 - q * q);
axis.normalize();
return normalize(quaternion{
static_cast<float>(q),
static_cast<float>(s * axis.m_x),
static_cast<float>(s * axis.m_y),
static_cast<float>(s * axis.m_z) });
}
std::tuple<double, point> quaternion_to_angle_axis(quaternion q)
{
if (q.get_a() > 1)
q = normalize(q);
// angle:
double angle = 2 * std::acos(q.get_a());
angle = angle * 180 / kPI;
// axis:
float s = std::sqrt(1 - q.get_a() * q.get_a());
if (s < 0.001)
s = 1;
point axis(q.get_b() / s, q.get_c() / s, q.get_d() / s);
return { angle, axis };
}
point center_points(std::vector<point> &Points)
{
point t;
for (point &pt : Points)
{
t.m_x += pt.m_x;
t.m_y += pt.m_y;
t.m_z += pt.m_z;
}
t.m_x /= Points.size();
t.m_y /= Points.size();
t.m_z /= Points.size();
for (point &pt : Points)
{
pt.m_x -= t.m_x;
pt.m_y -= t.m_y;
pt.m_z -= t.m_z;
}
return t;
}
point centroid(const std::vector<point> &pts)
{
point result;
for (auto &pt : pts)
result += pt;
result /= static_cast<float>(pts.size());
return result;
}
double RMSd(const std::vector<point> &a, const std::vector<point> &b)
{
double sum = 0;
for (uint32_t i = 0; i < a.size(); ++i)
{
std::valarray<double> d(3);
d[0] = b[i].m_x - a[i].m_x;
d[1] = b[i].m_y - a[i].m_y;
d[2] = b[i].m_z - a[i].m_z;
d *= d;
sum += d.sum();
}
return std::sqrt(sum / a.size());
}
// The next function returns the largest solution for a quartic equation
// based on Ferrari's algorithm.
// A depressed quartic is of the form:
//
// x^4 + ax^2 + bx + c = 0
//
// (since I'm too lazy to find out a better way, I've implemented the
// routine using complex values to avoid nan's as a result of taking
// sqrt of a negative number)
double LargestDepressedQuarticSolution(double a, double b, double c)
{
std::complex<double> P = -(a * a) / 12 - c;
std::complex<double> Q = -(a * a * a) / 108 + (a * c) / 3 - (b * b) / 8;
std::complex<double> R = -Q / 2.0 + std::sqrt((Q * Q) / 4.0 + (P * P * P) / 27.0);
std::complex<double> U = std::pow(R, 1 / 3.0);
std::complex<double> y;
if (U == 0.0)
y = -5.0 * a / 6.0 + U - std::pow(Q, 1.0 / 3.0);
else
y = -5.0 * a / 6.0 + U - P / (3.0 * U);
std::complex<double> W = std::sqrt(a + 2.0 * y);
// And to get the final result:
// result = (±W + std::sqrt(-(3 * alpha + 2 * y ± 2 * beta / W))) / 2;
// We want the largest result, so:
std::valarray<double> t(4);
t[0] = ((W + std::sqrt(-(3.0 * a + 2.0 * y + 2.0 * b / W))) / 2.0).real();
t[1] = ((W + std::sqrt(-(3.0 * a + 2.0 * y - 2.0 * b / W))) / 2.0).real();
t[2] = ((-W + std::sqrt(-(3.0 * a + 2.0 * y + 2.0 * b / W))) / 2.0).real();
t[3] = ((-W + std::sqrt(-(3.0 * a + 2.0 * y - 2.0 * b / W))) / 2.0).real();
return t.max();
}
quaternion align_points(const std::vector<point> &pa, const std::vector<point> &pb)
{
// First calculate M, a 3x3 Matrix containing the sums of products of the coordinates of A and B
Matrix M(3, 3, 0);
for (uint32_t i = 0; i < pa.size(); ++i)
{
const point &a = pa[i];
const point &b = pb[i];
M(0, 0) += a.m_x * b.m_x;
M(0, 1) += a.m_x * b.m_y;
M(0, 2) += a.m_x * b.m_z;
M(1, 0) += a.m_y * b.m_x;
M(1, 1) += a.m_y * b.m_y;
M(1, 2) += a.m_y * b.m_z;
M(2, 0) += a.m_z * b.m_x;
M(2, 1) += a.m_z * b.m_y;
M(2, 2) += a.m_z * b.m_z;
}
// Now calculate N, a symmetric 4x4 Matrix
SymmetricMatrix N(4);
N(0, 0) = M(0, 0) + M(1, 1) + M(2, 2);
N(0, 1) = M(1, 2) - M(2, 1);
N(0, 2) = M(2, 0) - M(0, 2);
N(0, 3) = M(0, 1) - M(1, 0);
N(1, 1) = M(0, 0) - M(1, 1) - M(2, 2);
N(1, 2) = M(0, 1) + M(1, 0);
N(1, 3) = M(0, 2) + M(2, 0);
N(2, 2) = -M(0, 0) + M(1, 1) - M(2, 2);
N(2, 3) = M(1, 2) + M(2, 1);
N(3, 3) = -M(0, 0) - M(1, 1) + M(2, 2);
// det(N - λI) = 0
// find the largest λ (λm)
//
// Aλ4 + Bλ3 + Cλ2 + Dλ + E = 0
// A = 1
// B = 0
// and so this is a so-called depressed quartic
// solve it using Ferrari's algorithm
double C = -2 * (M(0, 0) * M(0, 0) + M(0, 1) * M(0, 1) + M(0, 2) * M(0, 2) +
M(1, 0) * M(1, 0) + M(1, 1) * M(1, 1) + M(1, 2) * M(1, 2) +
M(2, 0) * M(2, 0) + M(2, 1) * M(2, 1) + M(2, 2) * M(2, 2));
double D = 8 * (M(0, 0) * M(1, 2) * M(2, 1) +
M(1, 1) * M(2, 0) * M(0, 2) +
M(2, 2) * M(0, 1) * M(1, 0)) -
8 * (M(0, 0) * M(1, 1) * M(2, 2) +
M(1, 2) * M(2, 0) * M(0, 1) +
M(2, 1) * M(1, 0) * M(0, 2));
// E is the determinant of N:
double E =
(N(0, 0) * N(1, 1) - N(0, 1) * N(0, 1)) * (N(2, 2) * N(3, 3) - N(2, 3) * N(2, 3)) +
(N(0, 1) * N(0, 2) - N(0, 0) * N(2, 1)) * (N(2, 1) * N(3, 3) - N(2, 3) * N(1, 3)) +
(N(0, 0) * N(1, 3) - N(0, 1) * N(0, 3)) * (N(2, 1) * N(2, 3) - N(2, 2) * N(1, 3)) +
(N(0, 1) * N(2, 1) - N(1, 1) * N(0, 2)) * (N(0, 2) * N(3, 3) - N(2, 3) * N(0, 3)) +
(N(1, 1) * N(0, 3) - N(0, 1) * N(1, 3)) * (N(0, 2) * N(2, 3) - N(2, 2) * N(0, 3)) +
(N(0, 2) * N(1, 3) - N(2, 1) * N(0, 3)) * (N(0, 2) * N(1, 3) - N(2, 1) * N(0, 3));
// solve quartic
double lambda = LargestDepressedQuarticSolution(C, D, E);
// calculate t = (N - λI)
Matrix t = N - IdentityMatrix(4) * lambda;
// calculate a Matrix of cofactors for t
Matrix cf = Cofactors(t);
int maxR = 0;
for (int r = 1; r < 4; ++r)
{
if (std::abs(cf(r, 0)) > std::abs(cf(maxR, 0)))
maxR = r;
}
quaternion q(
static_cast<float>(cf(maxR, 0)),
static_cast<float>(cf(maxR, 1)),
static_cast<float>(cf(maxR, 2)),
static_cast<float>(cf(maxR, 3)));
q = normalize(q);
return q;
}
// --------------------------------------------------------------------
point nudge(point p, float offset)
{
static std::random_device rd;
static std::mt19937_64 rng(rd());
std::uniform_real_distribution<float> randomAngle(0, 2 * kPI);
std::normal_distribution<> randomOffset(0, offset);
float theta = randomAngle(rng);
float phi1 = randomAngle(rng) - kPI;
float phi2 = randomAngle(rng) - kPI;
quaternion q = spherical(1.0f, theta, phi1, phi2);
point r{ 0, 0, 1 };
r.rotate(q);
r *= randomOffset(rng);
return p + r;
}
} // namespace cif

113
src/row.cpp Normal file
View File

@@ -0,0 +1,113 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2022 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cif++/category.hpp>
namespace cif
{
void row_handle::assign(size_t column, std::string_view value, bool updateLinked, bool validate)
{
if (not m_category)
throw std::runtime_error("uninitialized row");
m_category->update_value(m_row, column, value, updateLinked, validate);
}
uint16_t row_handle::get_column_ix(std::string_view name) const
{
if (not m_category)
throw std::runtime_error("uninitialized row");
return m_category->get_column_ix(name);
}
std::string_view row_handle::get_column_name(uint16_t ix) const
{
if (not m_category)
throw std::runtime_error("uninitialized row");
return m_category->get_column_name(ix);
}
uint16_t row_handle::add_column(std::string_view name)
{
if (not m_category)
throw std::runtime_error("uninitialized row");
return m_category->add_column(name);
}
void row_handle::swap(size_t column, row_handle &b)
{
if (not m_category)
throw std::runtime_error("uninitialized row");
m_category->swap_item(column, *this, b);
}
// --------------------------------------------------------------------
row_initializer::row_initializer(row_handle rh)
{
if (not rh.m_category)
throw std::runtime_error("uninitialized row");
assert(rh.m_row);
row *r = rh.get_row();
auto &cat = *rh.m_category;
for (size_t ix = 0; ix < r->size(); ++ix)
{
auto &i = r->operator[](ix);
if (not i)
continue;
emplace_back(cat.get_column_name(ix), i.text());
}
}
void row_initializer::set_value(std::string_view name, std::string_view value)
{
for (auto &i : *this)
{
if (i.name() == name)
{
i.value(value);
return;
}
}
emplace_back(name, value);
}
void row_initializer::set_value_if_empty(std::string_view name, std::string_view value)
{
if (find_if(begin(), end(), [name](auto &i) { return i.name() == name; }) == end())
emplace_back(name, value);
}
} // namespace cif

153
src/symmetry.cpp Normal file
View File

@@ -0,0 +1,153 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdexcept>
#include <cif++/symmetry.hpp>
#include "./symop_table_data.hpp"
namespace cif
{
// --------------------------------------------------------------------
// Unfortunately, clipper has a different numbering scheme than PDB
// for rotation numbers. So we created a table to map those.
// Perhaps a bit over the top, but hey....
// --------------------------------------------------------------------
int get_space_group_number(std::string spacegroup)
{
if (spacegroup == "P 21 21 2 A")
spacegroup = "P 21 21 2 (a)";
else if (spacegroup.empty())
throw std::runtime_error("No spacegroup, cannot continue");
int result = 0;
const size_t N = kNrOfSpaceGroups;
int32_t L = 0, R = static_cast<int32_t>(N - 1);
while (L <= R)
{
int32_t i = (L + R) / 2;
int d = spacegroup.compare(kSpaceGroups[i].name);
if (d > 0)
L = i + 1;
else if (d < 0)
R = i - 1;
else
{
result = kSpaceGroups[i].nr;
break;
}
}
// not found, see if we can find a match based on xHM name
if (result == 0)
{
for (size_t i = 0; i < kNrOfSpaceGroups; ++i)
{
auto& sp = kSpaceGroups[i];
if (sp.xHM == spacegroup)
{
result = sp.nr;
break;
}
}
}
if (result == 0)
throw std::runtime_error("Spacegroup name " + spacegroup + " was not found in table");
return result;
}
// --------------------------------------------------------------------
int get_space_group_number(std::string spacegroup, space_group_name type)
{
if (spacegroup == "P 21 21 2 A")
spacegroup = "P 21 21 2 (a)";
else if (spacegroup.empty())
throw std::runtime_error("No spacegroup, cannot continue");
int result = 0;
if (type == space_group_name::full)
{
const size_t N = kNrOfSpaceGroups;
int32_t L = 0, R = static_cast<int32_t>(N - 1);
while (L <= R)
{
int32_t i = (L + R) / 2;
int d = spacegroup.compare(kSpaceGroups[i].name);
if (d > 0)
L = i + 1;
else if (d < 0)
R = i - 1;
else
{
result = kSpaceGroups[i].nr;
break;
}
}
}
else if (type == space_group_name::xHM)
{
for (auto &sg : kSpaceGroups)
{
if (sg.xHM == spacegroup)
{
result = sg.nr;
break;
}
}
}
else
{
for (auto &sg : kSpaceGroups)
{
if (sg.Hall == spacegroup)
{
result = sg.nr;
break;
}
}
}
// not found, see if we can find a match based on xHM name
if (result == 0)
throw std::runtime_error("Spacegroup name " + spacegroup + " was not found in table");
return result;
}
}

440
src/symop-map-generator.cpp Normal file
View File

@@ -0,0 +1,440 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cassert>
#include <array>
#include <iostream>
#include <iomanip>
#include <fstream>
#include <regex>
#include <map>
#include <filesystem>
#include <cstdlib>
namespace fs = std::filesystem;
std::regex kNameRx(R"(^(\d+) +(\d+) +(\d+) +(\S+) +(\S+) +(\S+) +'([^']+)'( +'([^']+)')?(?: +!.+)?$)");
class SymopParser
{
public:
SymopParser() {}
std::array<int,15> parse(const std::string& s)
{
m_p = s.begin();
m_e = s.end();
m_lookahead = next_token();
parsepart(0);
match((Token)',');
parsepart(1);
match((Token)',');
parsepart(2);
if (m_lookahead != 0 or m_p != m_e)
throw std::runtime_error("symmetry expression contains more data than expected");
return {
m_rot[0][0], m_rot[0][1], m_rot[0][2],
m_rot[1][0], m_rot[1][1], m_rot[1][2],
m_rot[2][0], m_rot[2][1], m_rot[2][2],
m_trn[0][0], m_trn[0][1],
m_trn[1][0], m_trn[1][1],
m_trn[2][0], m_trn[2][1]
};
}
private:
enum Token : int { Eof = 0, Number = 256, XYZ };
std::string to_string(Token t)
{
switch (t)
{
case Eof: return "end of expression";
case Number: return "number";
case XYZ: return "'x', 'y' or 'z'";
default:
if (isprint(t))
return std::string({'\'', static_cast<char>(t), '\''});
return "invalid character " + std::to_string(static_cast<int>(t));
}
}
Token next_token()
{
Token result = Eof;
while (m_p != m_e)
{
char ch = *m_p++;
if (ch == ' ')
continue;
switch (ch)
{
case 'x':
case 'X':
result = XYZ;
m_nr = 0;
break;
case 'y':
case 'Y':
result = XYZ;
m_nr = 1;
break;
case 'z':
case 'Z':
result = XYZ;
m_nr = 2;
break;
default:
if (isdigit(ch))
{
m_nr = ch - '0';
result = Number;
}
else
result = (Token)ch;
break;
}
break;
}
return result;
}
void match(Token token)
{
if (m_lookahead != token)
throw std::runtime_error("Unexpected character " + to_string(m_lookahead) + " expected " + to_string(token));
m_lookahead = next_token();
}
void parsepart(int row)
{
do
{
int sign = m_lookahead == '-' ? -1 : 1;
if (m_lookahead == '-' or m_lookahead == '+')
match(m_lookahead);
if (m_lookahead == Number)
{
m_trn[row][0] = sign * m_nr;
match(Number);
match((Token)'/');
m_trn[row][1] = m_nr;
match(Number);
}
else
{
m_rot[row][m_nr] = sign;
match(XYZ);
}
}
while (m_lookahead == '+' or m_lookahead == '-');
}
Token m_lookahead;
int m_nr;
std::string m_s;
std::string::const_iterator m_p, m_e;
int m_rot[3][3] = {};
int m_trn[3][2] = {};
};
std::array<int,15> move_symop(std::array<int,15> symop, const std::array<int,15>& cenop)
{
for (int i = 9; i < 15; i += 2)
{
if (cenop[i] == 0)
continue;
assert(cenop[i + 1] != 0);
if (symop[i] == 0)
{
assert(symop[i + 1] == 0);
symop[i] = cenop[i];
symop[i + 1] = cenop[i + 1];
continue;
}
if (symop[i + 1] == cenop[i + 1])
symop[i] += cenop[i];
else
{
int d = symop[i + 1] * cenop[i + 1];
int n = symop[i] * cenop[i + 1] + symop[i + 1] * cenop[i];
symop[i] = n;
symop[i + 1] = d;
}
for (int j = 5; j > 1; --j)
if (symop[i] % j == 0 and symop[i + 1] % j == 0)
{
symop[i] /= j;
symop[i + 1] /= j;
}
symop[i] = (symop[i] + symop[i + 1]) % symop[i + 1];
if (symop[i] == 0)
symop[i + 1] = 0;
}
return symop;
}
int main(int argc, char* const argv[])
{
using namespace std::literals;
fs::path tmpFile;
try
{
if (argc != 3)
{
std::cerr << "Usage symop-map-generator <input-file> <output-file>" << std::endl;
exit(1);
}
fs::path input(argv[1]);
fs::path output(argv[2]);
tmpFile = output.parent_path() / (output.filename().string() + ".tmp");
std::ofstream out(tmpFile);
if (not out.is_open())
throw std::runtime_error("Failed to open output file");
// --------------------------------------------------------------------
// store symop data here
std::vector<std::tuple<int,int,std::array<int,15>>> data;
// -----------------------------------------------------------------------
struct SymInfoBlock
{
int nr;
std::string xHM;
std::string Hall;
std::string old[2];
};
std::map<int,SymInfoBlock> symInfo;
int symopnr, mysymnr = 10000;
std::ifstream file(input);
if (not file.is_open())
throw std::runtime_error("Could not open syminfo.lib file");
enum class State { skip, spacegroup } state = State::skip;
std::string line;
const std::regex rx(R"(^symbol +(Hall|xHM|old) +'(.+?)'(?: +'(.+?)')?$)"),
rx2(R"(symbol ccp4 (\d+))");;
SymInfoBlock cur = {};
std::vector<std::array<int,15>> symops, cenops;
while (getline(file, line))
{
switch (state)
{
case State::skip:
if (line == "begin_spacegroup")
{
state = State::spacegroup;
symopnr = 1;
++mysymnr;
cur = { mysymnr };
}
break;
case State::spacegroup:
{
std::smatch m;
if (std::regex_match(line, m, rx))
{
if (m[1] == "old")
{
cur.old[0] = m[2];
if (m[3].matched)
cur.old[1] = m[3];
}
else if (m[1] == "xHM")
cur.xHM = m[2];
else if (m[1] == "Hall")
cur.Hall = m[2];
}
else if (regex_match(line, m, rx2))
{
int nr = stoi(m[1]);
if (nr != 0)
cur.nr = nr;
}
else if (line.compare(0, 6, "symop ") == 0)
{
SymopParser p;
symops.emplace_back(p.parse(line.substr(6)));
}
else if (line.compare(0, 6, "cenop ") == 0)
{
SymopParser p;
cenops.emplace_back(p.parse(line.substr(6)));
}
else if (line == "end_spacegroup")
{
for (auto& cenop: cenops)
{
for (auto symop: symops)
{
symop = move_symop(symop, cenop);
data.emplace_back(cur.nr, symopnr, symop);
++symopnr;
}
}
symInfo.emplace(cur.nr, cur);
state = State::skip;
symops.clear();
cenops.clear();
}
break;
}
}
}
// --------------------------------------------------------------------
sort(data.begin(), data.end());
// --------------------------------------------------------------------
out << R"(// This file was generated from $CLIBD/symop.lib
// and $CLIBD/syminfo.lib using symop-map-generator,
// part of the PDB-REDO suite of programs.
#include <cif++/symmetry.hpp>
namespace cif
{
const space_group kSpaceGroups[] =
{
)";
std::vector<std::tuple<std::string,int,std::string,std::string>> spacegroups;
for (auto& [nr, info]: symInfo)
{
spacegroups.emplace_back(info.old[0], nr, info.xHM, info.Hall);
if (info.old[1].empty() == false)
spacegroups.emplace_back(info.old[1], nr, info.xHM, info.Hall);
}
sort(spacegroups.begin(), spacegroups.end());
for (auto [old, nr, xHM, Hall]: spacegroups)
{
old = '"' + old + '"' + std::string(20 - old.length(), ' ');
xHM = '"' + xHM + '"' + std::string(30 - xHM.length(), ' ');
for (std::string::size_type p = Hall.length(); p > 0; --p)
{
if (Hall[p - 1] == '"')
Hall.insert(p - 1, "\\", 1);
}
Hall = '"' + Hall + '"' + std::string(40 - Hall.length(), ' ');
out << "\t{ " << old << ", " << xHM << ", " << Hall << ", " << nr << " }," << std::endl;
}
out << R"(
};
const size_t kNrOfSpaceGroups = sizeof(kSpaceGroups) / sizeof(space_group);
const symop_datablock kSymopNrTable[] = {
)" << std::endl;
int spacegroupNr = 0;
for (auto& sd: data)
{
int sp, o;
std::tie(sp, o, std::ignore) = sd;
if (sp > spacegroupNr)
out << " // " << symInfo[sp].xHM << std::endl;
spacegroupNr = sp;
out << " { " << std::setw(3) << sp
<< ", " << std::setw(3) << o << ", { ";
for (auto& i: std::get<2>(sd))
out << std::setw(2) << i << ',';
out << " } }," << std::endl;
}
out << R"(};
const size_t kSymopNrTableSize = sizeof(kSymopNrTable) / sizeof(symop_datablock);
} // namespace mmcif
)" << std::endl;
out.close();
fs::rename(tmpFile, output);
}
catch (const std::exception& ex)
{
std::cerr << std::endl
<< "Program terminated due to error:" << std::endl
<< ex.what() << std::endl;
}
return 0;
}

8660
src/symop_table_data.hpp Normal file

File diff suppressed because it is too large Load Diff

506
src/text.cpp Normal file
View File

@@ -0,0 +1,506 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <algorithm>
#include <cassert>
#include <cif++/text.hpp>
namespace cif
{
// --------------------------------------------------------------------
// This really makes a difference, having our own tolower routines
const uint8_t kCharToLowerMap[256] =
{
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f,
0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f,
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f,
0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf,
0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf,
0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf,
0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf,
0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef,
0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff};
// --------------------------------------------------------------------
bool iequals(std::string_view a, std::string_view b)
{
bool result = a.length() == b.length();
for (auto ai = a.begin(), bi = b.begin(); result and ai != a.end(); ++ai, ++bi)
result = kCharToLowerMap[uint8_t(*ai)] == kCharToLowerMap[uint8_t(*bi)];
// result = tolower(*ai) == tolower(*bi);
return result;
}
bool iequals(const char *a, const char *b)
{
bool result = true;
for (; result and *a and *b; ++a, ++b)
result = tolower(*a) == tolower(*b);
return result and *a == *b;
}
int icompare(std::string_view a, std::string_view b)
{
int d = 0;
auto ai = a.begin(), bi = b.begin();
for (; d == 0 and ai != a.end() and bi != b.end(); ++ai, ++bi)
d = tolower(*ai) - tolower(*bi);
if (d == 0)
{
if (ai != a.end())
d = 1;
else if (bi != b.end())
d = -1;
}
return d;
}
int icompare(const char *a, const char *b)
{
int d = 0;
for (; d == 0 and *a != 0 and *b != 0; ++a, ++b)
d = tolower(*a) - tolower(*b);
if (d == 0)
{
if (*a != 0)
d = 1;
else if (*b != 0)
d = -1;
}
return d;
}
void to_lower(std::string &s)
{
for (auto &c : s)
c = tolower(c);
}
std::string to_lower_copy(std::string_view s)
{
std::string result(s);
for (auto &c : result)
c = tolower(c);
return result;
}
void to_upper(std::string &s)
{
for (auto &c : s)
c = toupper(c);
}
void replace_all(std::string &s, std::string_view what, std::string_view with)
{
for (std::string::size_type p = s.find(what); p != std::string::npos; p = s.find(what, p))
{
s.replace(p, what.length(), with);
p += with.length();
}
}
bool icontains(std::string_view s, std::string_view q)
{
return contains(to_lower_copy(s), to_lower_copy(q));
}
void trim_right(std::string &s)
{
auto e = s.end();
while (e != s.begin())
{
auto pe = std::prev(e);
if (not std::isspace(*pe))
break;
e = pe;
}
if (e != s.end())
s.erase(e, s.end());
}
std::string trim_right_copy(std::string_view s)
{
auto e = s.end();
while (e != s.begin())
{
auto pe = std::prev(e);
if (not std::isspace(*pe))
break;
e = pe;
}
return {s.begin(), e};
}
std::string trim_left_copy(std::string_view s)
{
auto b = s.begin();
while (b != s.end())
{
if (not std::isspace(*b))
break;
b = std::next(b);
}
return {b, s.end()};
}
void trim_left(std::string &s)
{
auto b = s.begin();
while (b != s.end())
{
if (not std::isspace(*b))
break;
b = std::next(b);
}
s.erase(s.begin(), b);
}
void trim(std::string &s)
{
trim_right(s);
trim_left(s);
}
std::string trim_copy(std::string_view s)
{
return trim_left_copy(trim_right_copy(s));
}
// --------------------------------------------------------------------
std::tuple<std::string, std::string> split_tag_name(std::string_view tag)
{
if (tag.empty())
throw std::runtime_error("empty tag");
if (tag[0] != '_')
throw std::runtime_error("tag '" + std::string { tag } + "' does not start with underscore");
auto s = tag.find('.');
if (s == std::string::npos)
// throw std::runtime_error("tag does not contain dot (" + std::string{ tag } + ')');
return std::tuple<std::string, std::string>{ "", tag.substr(1) };
else
return std::tuple<std::string, std::string>{tag.substr(1, s - 1), tag.substr(s + 1)};
}
// --------------------------------------------------------------------
std::string cif_id_for_number(int number)
{
std::string result;
if (number >= 26 * 26 * 26)
result = 'L' + std::to_string(number);
else
{
if (number >= 26 * 26)
{
int v = number / (26 * 26);
result += char('A' - 1 + v);
number %= (26 * 26);
}
if (number >= 26)
{
int v = number / 26;
result += char('A' - 1 + v);
number %= 26;
}
result += char('A' + number);
}
assert(not result.empty());
return result;
}
// --------------------------------------------------------------------
// Simplified line breaking code taken from a decent text editor.
// In this case, simplified means it only supports ASCII.
enum LineBreakClass
{
kLBC_OpenPunctuation,
kLBC_ClosePunctuation,
kLBC_CloseParenthesis,
kLBC_Quotation,
kLBC_NonBreaking,
kLBC_Nonstarter,
kLBC_Exlamation,
kLBC_SymbolAllowingBreakAfter,
kLBC_InfixNumericSeparator,
kLBC_PrefixNumeric,
kLBC_PostfixNumeric,
kLBC_Numeric,
kLBC_Alphabetic,
kLBC_Ideographic,
kLBC_Inseperable,
kLBC_Hyphen,
kLBC_BreakAfter,
kLBC_BreakBefor,
kLBC_BreakOpportunityBeforeAndAfter,
kLBC_ZeroWidthSpace,
kLBC_CombiningMark,
kLBC_WordJoiner,
kLBC_HangulLVSyllable,
kLBC_HangulLVTSyllable,
kLBC_HangulLJamo,
kLBC_HangulVJamo,
kLBC_HangulTJamo,
kLBC_MandatoryBreak,
kLBC_CarriageReturn,
kLBC_LineFeed,
kLBC_NextLine,
kLBC_Surrogate,
kLBC_Space,
kLBC_ContigentBreakOpportunity,
kLBC_Ambiguous,
kLBC_ComplexContext,
kLBC_Unknown
};
const LineBreakClass kASCII_LBTable[128] =
{
kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark,
kLBC_CombiningMark, kLBC_BreakAfter, kLBC_LineFeed, kLBC_MandatoryBreak, kLBC_MandatoryBreak, kLBC_CarriageReturn, kLBC_CombiningMark, kLBC_CombiningMark,
kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark,
kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark, kLBC_CombiningMark,
kLBC_Space, kLBC_Exlamation, kLBC_Quotation, kLBC_Alphabetic, kLBC_PrefixNumeric, kLBC_PostfixNumeric, kLBC_Alphabetic, kLBC_Quotation,
kLBC_OpenPunctuation, kLBC_CloseParenthesis, kLBC_Alphabetic, kLBC_PrefixNumeric,
// comma treated differently here, it is not a numeric separator in PDB
kLBC_SymbolAllowingBreakAfter /* kLBC_InfixNumericSeparator */,
kLBC_Hyphen, kLBC_InfixNumericSeparator, kLBC_SymbolAllowingBreakAfter,
kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric, kLBC_Numeric,
kLBC_Numeric, kLBC_Numeric, kLBC_InfixNumericSeparator, kLBC_InfixNumericSeparator, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Exlamation,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_OpenPunctuation, kLBC_PrefixNumeric, kLBC_CloseParenthesis, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic,
kLBC_Alphabetic, kLBC_Alphabetic, kLBC_Alphabetic, kLBC_OpenPunctuation, kLBC_BreakAfter, kLBC_ClosePunctuation, kLBC_Alphabetic, kLBC_CombiningMark};
std::string::const_iterator nextLineBreak(std::string::const_iterator text, std::string::const_iterator end)
{
if (text == end)
return text;
enum breakAction
{
DBK = 0, // direct break (blank in table)
IBK, // indirect break (% in table)
PBK, // prohibited break (^ in table)
CIB, // combining indirect break
CPB // combining prohibited break
};
const breakAction brkTable[27][27] = {
// OP CL CP QU GL NS EX SY IS PR PO NU AL ID IN HY BA BB B2 ZW CM WJ H2 H3 JL JV JT
/* OP */ {PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, PBK, CPB, PBK, PBK, PBK, PBK, PBK, PBK},
/* CL */ {DBK, PBK, PBK, IBK, IBK, PBK, PBK, PBK, PBK, IBK, IBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* CP */ {DBK, PBK, PBK, IBK, IBK, PBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* QU */ {PBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
/* GL */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
/* NS */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* EX */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* SY */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* IS */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* PR */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, IBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
/* PO */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* NU */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* AL */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* ID */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* IN */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* HY */ {DBK, PBK, PBK, IBK, DBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* BA */ {DBK, PBK, PBK, IBK, DBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* BB */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
/* B2 */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, IBK, IBK, DBK, PBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* ZW */ {DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK, PBK, DBK, DBK, DBK, DBK, DBK, DBK, DBK},
/* CM */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, DBK, IBK, IBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, DBK},
/* WJ */ {IBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, IBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, IBK},
/* H2 */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, IBK, IBK},
/* H3 */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, IBK},
/* JL */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, IBK, IBK, IBK, IBK, DBK},
/* JV */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, IBK, IBK},
/* JT */ {DBK, PBK, PBK, IBK, IBK, IBK, PBK, PBK, PBK, DBK, IBK, DBK, DBK, DBK, IBK, IBK, IBK, DBK, DBK, PBK, CIB, PBK, DBK, DBK, DBK, DBK, IBK},
};
uint8_t ch = static_cast<uint8_t>(*text);
LineBreakClass cls;
if (ch == '\n')
cls = kLBC_MandatoryBreak;
else if (ch < 128)
{
cls = kASCII_LBTable[ch];
if (cls > kLBC_MandatoryBreak and cls != kLBC_Space) // duh...
cls = kLBC_Alphabetic;
}
else
cls = kLBC_Unknown;
if (cls == kLBC_Space)
cls = kLBC_WordJoiner;
LineBreakClass ncls = cls;
while (++text != end and cls != kLBC_MandatoryBreak)
{
ch = *text;
LineBreakClass lcls = ncls;
if (ch == '\n')
{
++text;
break;
}
ncls = kASCII_LBTable[ch];
if (ncls == kLBC_Space)
continue;
breakAction brk = brkTable[cls][ncls];
if (brk == DBK or (brk == IBK and lcls == kLBC_Space))
break;
cls = ncls;
}
return text;
}
std::vector<std::string> wrapLine(const std::string &text, size_t width)
{
std::vector<std::string> result;
std::vector<size_t> offsets = {0};
auto b = text.begin();
while (b != text.end())
{
auto e = nextLineBreak(b, text.end());
offsets.push_back(e - text.begin());
b = e;
}
size_t count = offsets.size() - 1;
std::vector<size_t> minima(count + 1, 1000000);
minima[0] = 0;
std::vector<size_t> breaks(count + 1, 0);
for (size_t i = 0; i < count; ++i)
{
size_t j = i + 1;
while (j <= count)
{
size_t w = offsets[j] - offsets[i];
if (w > width)
break;
while (w > 0 and isspace(text[offsets[i] + w - 1]))
--w;
size_t cost = minima[i];
if (j < count) // last line may be shorter
cost += (width - w) * (width - w);
if (cost < minima[j])
{
minima[j] = cost;
breaks[j] = i;
}
++j;
}
}
size_t j = count;
while (j > 0)
{
size_t i = breaks[j];
result.push_back(text.substr(offsets[i], offsets[j] - offsets[i]));
j = i;
}
reverse(result.begin(), result.end());
return result;
}
std::vector<std::string> word_wrap(const std::string &text, size_t width)
{
std::vector<std::string> result;
for (auto p : cif::split<std::string>(text, "\n"))
{
if (p.empty())
{
result.push_back("");
continue;
}
auto lines = wrapLine(p, width);
result.insert(result.end(), lines.begin(), lines.end());
}
return result;
}
} // namespace cif

944
src/utilities.cpp Normal file
View File

@@ -0,0 +1,944 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <atomic>
#include <cassert>
#include <cmath>
#include <cstring>
#include <fstream>
#include <functional>
#include <iomanip>
#include <iostream>
#include <map>
#include <mutex>
#include <regex>
#include <sstream>
#include <thread>
#if not defined(_MSC_VER)
#include <sys/ioctl.h>
#include <termios.h>
#endif
#include <cif++/utilities.hpp>
#include "revision.hpp"
namespace fs = std::filesystem;
// --------------------------------------------------------------------
namespace cif
{
int VERBOSE = 0;
// --------------------------------------------------------------------
std::string get_version_nr()
{
std::ostringstream s;
write_version_string(s, false);
return s.str();
}
// --------------------------------------------------------------------
#ifdef _MSC_VER
}
#include <Windows.h>
#include <libloaderapi.h>
#include <wincon.h>
#include <codecvt>
namespace cif
{
uint32_t get_terminal_width()
{
CONSOLE_SCREEN_BUFFER_INFO csbi;
::GetConsoleScreenBufferInfo(::GetStdHandle(STD_OUTPUT_HANDLE), &csbi);
return csbi.srWindow.Right - csbi.srWindow.Left + 1;
}
std::string GetExecutablePath()
{
WCHAR buffer[4096];
DWORD n = ::GetModuleFileNameW(nullptr, buffer, sizeof(buffer) / sizeof(WCHAR));
if (n == 0)
throw std::runtime_error("could not get exe path");
std::wstring ws(buffer);
// convert from utf16 to utf8
std::wstring_convert<std::codecvt_utf8<wchar_t>> conv1;
std::string u8str = conv1.to_bytes(ws);
return u8str;
}
#else
#include <limits.h>
uint32_t get_terminal_width()
{
uint32_t result = 80;
if (isatty(STDOUT_FILENO))
{
struct winsize w;
ioctl(0, TIOCGWINSZ, &w);
result = w.ws_col;
}
return result;
}
std::string get_executable_path()
{
using namespace std::literals;
char path[PATH_MAX] = "";
if (readlink("/proc/self/exe", path, sizeof(path)) == -1)
throw std::runtime_error("could not get exe path "s + strerror(errno));
return {path};
}
#endif
// --------------------------------------------------------------------
struct ProgressImpl
{
ProgressImpl(int64_t inMax, const std::string &inAction)
: mMax(inMax)
, mConsumed(0)
, mAction(inAction)
, mMessage(inAction)
, mThread(std::bind(&ProgressImpl::Run, this))
{
}
void Run();
void Stop()
{
mStop = true;
if (mThread.joinable())
mThread.join();
}
void PrintProgress();
void PrintDone();
int64_t mMax;
std::atomic<int64_t> mConsumed;
int64_t mLastConsumed = 0;
int mSpinnerIndex = 0;
std::string mAction, mMessage;
std::mutex mMutex;
std::thread mThread;
std::chrono::time_point<std::chrono::system_clock>
mStart = std::chrono::system_clock::now();
bool mStop = false;
};
void ProgressImpl::Run()
{
bool printedAny = false;
try
{
for (;;)
{
std::this_thread::sleep_for(std::chrono::milliseconds(100));
std::unique_lock lock(mMutex);
if (mStop or mConsumed == mMax)
break;
auto elapsed = std::chrono::system_clock::now() - mStart;
if (elapsed < std::chrono::seconds(5))
continue;
PrintProgress();
printedAny = true;
}
}
catch (...)
{
}
if (printedAny)
PrintDone();
}
void ProgressImpl::PrintProgress()
{
// const char* kBlocks[] = {
// " ", // 0
// u8"\u258F", // 1
// u8"\u258E", // 2
// u8"\u258D", // 3
// u8"\u258C", // 4
// u8"\u258B", // 5
// u8"\u258A", // 6
// u8"\u2589", // 7
// u8"\u2588", // 8
// };
const char *kBlocks[] = {
" ", // 0
" ", // 1
" ", // 2
"-", // 3
"-", // 4
"-", // 5
"=", // 6
"=", // 7
"=", // 8
};
uint32_t width = get_terminal_width();
std::string msg;
msg.reserve(width + 1);
if (mMessage.length() <= 20)
{
msg = mMessage;
if (msg.length() < 20)
msg.append(20 - msg.length(), ' ');
}
else
msg = mMessage.substr(0, 17) + "...";
msg += " |";
int64_t consumed = mConsumed;
float progress = static_cast<float>(consumed) / mMax;
int pi = static_cast<int>(std::ceil(progress * 33 * 8));
// int tw = width - 28;
// int twd = static_cast<int>(tw * progress + 0.5f);
// msg.append(twd, '=');
// msg.append(tw - twd, ' ');
for (int i = 0; i < 33; ++i)
{
if (pi <= 0)
msg += kBlocks[0];
else if (pi >= 8)
msg += kBlocks[8];
else
msg += kBlocks[pi];
pi -= 8;
}
msg.append("| ");
const char kSpinner[] = {' ', '.', 'o', 'O', '0', 'O', 'o', '.'};
const size_t kSpinnerCount = sizeof(kSpinner);
if (mLastConsumed < consumed)
{
mLastConsumed = consumed;
mSpinnerIndex = (mSpinnerIndex + 1) % kSpinnerCount;
}
const char spinner[2] = {kSpinner[mSpinnerIndex], 0};
msg.append(spinner);
// int perc = static_cast<int>(100 * progress);
// if (perc < 100)
// msg += ' ';
// if (perc < 10)
// msg += ' ';
// msg += to_string(perc);
// msg += '%';
std::cout << '\r' << msg;
std::cout.flush();
}
namespace
{
std::ostream &operator<<(std::ostream &os, const std::chrono::duration<double> &t)
{
uint64_t s = static_cast<uint64_t>(std::trunc(t.count()));
if (s > 24 * 60 * 60)
{
auto days = s / (24 * 60 * 60);
os << days << "d ";
s %= 24 * 60 * 60;
}
if (s > 60 * 60)
{
auto hours = s / (60 * 60);
os << hours << "h ";
s %= 60 * 60;
}
if (s > 60)
{
auto minutes = s / 60;
os << minutes << "m ";
s %= 60;
}
double ss = s + 1e-6 * (t.count() - s);
os << std::fixed << std::setprecision(1) << ss << 's';
return os;
}
} // namespace
void ProgressImpl::PrintDone()
{
std::chrono::duration<double> elapsed = std::chrono::system_clock::now() - mStart;
std::ostringstream msgstr;
msgstr << mAction << " done in " << elapsed << " seconds";
auto msg = msgstr.str();
uint32_t width = get_terminal_width();
if (msg.length() < width)
msg += std::string(width - msg.length(), ' ');
std::cout << '\r' << msg << std::endl;
}
Progress::Progress(int64_t inMax, const std::string &inAction)
: m_impl(nullptr)
{
if (isatty(STDOUT_FILENO) and VERBOSE >= 0)
m_impl = new ProgressImpl(inMax, inAction);
}
Progress::~Progress()
{
if (m_impl != nullptr)
m_impl->Stop();
delete m_impl;
}
void Progress::consumed(int64_t inConsumed)
{
if (m_impl != nullptr and
(m_impl->mConsumed += inConsumed) >= m_impl->mMax)
{
m_impl->Stop();
}
}
void Progress::progress(int64_t inProgress)
{
if (m_impl != nullptr and
(m_impl->mConsumed = inProgress) >= m_impl->mMax)
{
m_impl->Stop();
}
}
void Progress::message(const std::string &inMessage)
{
if (m_impl != nullptr)
{
std::unique_lock lock(m_impl->mMutex);
m_impl->mMessage = inMessage;
}
}
} // namespace cif
// --------------------------------------------------------------------
//
// Try to find a named resource. Can be either a local file with this name,
// a file located in our cache directory or a resource linked in with mrc.
//
// We have a special, private version of mrsrc here. To be able to create
// shared libraries and still be able to link when there's no mrc used.
namespace mrsrc
{
/// \brief Internal data structure as generated by mrc
struct rsrc_imp
{
unsigned int m_next;
unsigned int m_child;
unsigned int m_name;
unsigned int m_size;
unsigned int m_data;
};
} // namespace mrsrc
#if _MSC_VER
extern "C" const mrsrc::rsrc_imp *gResourceIndexDefault[1] = {};
extern "C" const char *gResourceDataDefault[1] = {};
extern "C" const char *gResourceNameDefault[1] = {};
extern "C" const mrsrc::rsrc_imp gResourceIndex[];
extern "C" const char gResourceData[];
extern "C" const char gResourceName[];
#pragma comment(linker, "/alternatename:gResourceIndex=gResourceIndexDefault")
#pragma comment(linker, "/alternatename:gResourceData=gResourceDataDefault")
#pragma comment(linker, "/alternatename:gResourceName=gResourceNameDefault")
#else
extern const __attribute__((weak)) mrsrc::rsrc_imp gResourceIndex[];
extern const __attribute__((weak)) char gResourceData[];
extern const __attribute__((weak)) char gResourceName[];
const mrsrc::rsrc_imp gResourceIndex[1] = {};
const char gResourceData[1] = {};
const char gResourceName[1] = {};
#endif
namespace mrsrc
{
class rsrc_data
{
public:
static rsrc_data &instance()
{
static rsrc_data s_instance;
return s_instance;
}
const rsrc_imp *index() const { return m_index; }
const char *data(unsigned int offset) const
{
return m_data + offset;
}
const char *name(unsigned int offset) const
{
return m_name + offset;
}
private:
rsrc_data()
{
// if (gResourceIndex and (gResourceIndex[0].m_child > 0 or gResourceIndex[0].m_size > 0) and gResourceIndex and gResourceName)
if (gResourceIndex[0].m_child > 0 or gResourceIndex[0].m_size > 0)
{
m_index = gResourceIndex;
m_data = gResourceData;
m_name = gResourceName;
}
}
rsrc_imp m_dummy = {};
const rsrc_imp *m_index = &m_dummy;
const char *m_data = "";
const char *m_name = "";
};
/// \brief Class mrsrc::rsrc contains a pointer to the data in the
/// resource, as well as offering an iterator interface to its
/// children.
class rsrc
{
public:
rsrc()
: m_impl(rsrc_data::instance().index())
{
}
rsrc(const rsrc &other)
: m_impl(other.m_impl)
{
}
rsrc &operator=(const rsrc &other)
{
m_impl = other.m_impl;
return *this;
}
rsrc(std::filesystem::path path);
std::string name() const { return rsrc_data::instance().name(m_impl->m_name); }
const char *data() const { return rsrc_data::instance().data(m_impl->m_data); }
unsigned long size() const { return m_impl->m_size; }
explicit operator bool() const { return m_impl != NULL and m_impl->m_size > 0; }
template <typename RSRC>
class iterator_t
{
public:
using iterator_category = std::input_iterator_tag;
using value_type = RSRC;
using difference_type = std::ptrdiff_t;
using pointer = value_type *;
using reference = value_type &;
iterator_t(const rsrc_imp *cur)
: m_cur(cur)
{
}
iterator_t(const iterator_t &i)
: m_cur(i.m_cur)
{
}
iterator_t &operator=(const iterator_t &i)
{
m_cur = i.m_cur;
return *this;
}
reference operator*() { return m_cur; }
pointer operator->() { return &m_cur; }
iterator_t &operator++()
{
if (m_cur.m_impl->m_next)
m_cur.m_impl = rsrc_data::instance().index() + m_cur.m_impl->m_next;
else
m_cur.m_impl = nullptr;
return *this;
}
iterator_t operator++(int)
{
auto tmp(*this);
this->operator++();
return tmp;
}
bool operator==(const iterator_t &rhs) const { return m_cur.m_impl == rhs.m_cur.m_impl; }
bool operator!=(const iterator_t &rhs) const { return m_cur.m_impl != rhs.m_cur.m_impl; }
private:
value_type m_cur;
};
using iterator = iterator_t<rsrc>;
iterator begin() const
{
const rsrc_imp *impl = nullptr;
if (m_impl and m_impl->m_child)
impl = rsrc_data::instance().index() + m_impl->m_child;
return iterator(impl);
}
iterator end() const
{
return iterator(nullptr);
}
private:
rsrc(const rsrc_imp *imp)
: m_impl(imp)
{
}
const rsrc_imp *m_impl;
};
inline rsrc::rsrc(std::filesystem::path p)
{
m_impl = rsrc_data::instance().index();
// using std::filesytem::path would have been natural here of course...
auto pb = p.begin();
auto pe = p.end();
while (m_impl != nullptr and pb != pe)
{
auto name = *pb++;
const rsrc_imp *impl = nullptr;
for (rsrc child : *this)
{
if (child.name() == name)
{
impl = child.m_impl;
break;
}
}
m_impl = impl;
}
if (pb != pe) // not found
m_impl = nullptr;
}
// --------------------------------------------------------------------
template <typename CharT, typename Traits>
class basic_streambuf : public std::basic_streambuf<CharT, Traits>
{
public:
typedef CharT char_type;
typedef Traits traits_type;
typedef typename traits_type::int_type int_type;
typedef typename traits_type::pos_type pos_type;
typedef typename traits_type::off_type off_type;
/// \brief constructor taking a \a path to the resource in memory
basic_streambuf(const std::string &path)
: m_rsrc(path)
{
init();
}
/// \brief constructor taking a \a rsrc
basic_streambuf(const rsrc &rsrc)
: m_rsrc(rsrc)
{
init();
}
basic_streambuf(const basic_streambuf &) = delete;
basic_streambuf(basic_streambuf &&rhs)
: basic_streambuf(rhs.m_rsrc)
{
}
basic_streambuf &operator=(const basic_streambuf &) = delete;
basic_streambuf &operator=(basic_streambuf &&rhs)
{
swap(rhs);
return *this;
}
void swap(basic_streambuf &rhs)
{
std::swap(m_begin, rhs.m_begin);
std::swap(m_end, rhs.m_end);
std::swap(m_current, rhs.m_current);
}
private:
void init()
{
m_begin = reinterpret_cast<const char_type *>(m_rsrc.data());
m_end = reinterpret_cast<const char_type *>(m_rsrc.data() + m_rsrc.size());
m_current = m_begin;
}
int_type underflow()
{
if (m_current == m_end)
return traits_type::eof();
return traits_type::to_int_type(*m_current);
}
int_type uflow()
{
if (m_current == m_end)
return traits_type::eof();
return traits_type::to_int_type(*m_current++);
}
int_type pbackfail(int_type ch)
{
if (m_current == m_begin or (ch != traits_type::eof() and ch != m_current[-1]))
return traits_type::eof();
return traits_type::to_int_type(*--m_current);
}
std::streamsize showmanyc()
{
assert(std::less_equal<const char *>()(m_current, m_end));
return m_end - m_current;
}
pos_type seekoff(off_type off, std::ios_base::seekdir dir, std::ios_base::openmode which)
{
switch (dir)
{
case std::ios_base::beg:
m_current = m_begin + off;
break;
case std::ios_base::end:
m_current = m_end + off;
break;
case std::ios_base::cur:
m_current += off;
break;
default:
break;
}
if (m_current < m_begin)
m_current = m_begin;
if (m_current > m_end)
m_current = m_end;
return m_current - m_begin;
}
pos_type seekpos(pos_type pos, std::ios_base::openmode which)
{
m_current = m_begin + pos;
if (m_current < m_begin)
m_current = m_begin;
if (m_current > m_end)
m_current = m_end;
return m_current - m_begin;
}
private:
rsrc m_rsrc;
const char_type *m_begin;
const char_type *m_end;
const char_type *m_current;
};
using streambuf = basic_streambuf<char, std::char_traits<char>>;
// --------------------------------------------------------------------
// class mrsrc::istream
template <typename CharT, typename Traits>
class basic_istream : public std::basic_istream<CharT, Traits>
{
public:
typedef CharT char_type;
typedef Traits traits_type;
typedef typename traits_type::int_type int_type;
typedef typename traits_type::pos_type pos_type;
typedef typename traits_type::off_type off_type;
private:
using __streambuf_type = basic_streambuf<CharT, Traits>;
using __istream_type = std::basic_istream<CharT, Traits>;
__streambuf_type m_buffer;
public:
basic_istream(const std::string &path)
: __istream_type(&m_buffer)
, m_buffer(path)
{
this->init(&m_buffer);
}
basic_istream(rsrc &resource)
: __istream_type(&m_buffer)
, m_buffer(resource)
{
this->init(&m_buffer);
}
basic_istream(const basic_istream &) = delete;
basic_istream(basic_istream &&rhs)
: __istream_type(std::move(rhs))
, m_buffer(std::move(rhs.m_buffer))
{
__istream_type::set_rdbuf(&m_buffer);
}
basic_istream &operator=(const basic_istream &) = delete;
basic_istream &operator=(basic_istream &&rhs)
{
__istream_type::operator=(std::move(rhs));
m_buffer = std::move(rhs.m_buffer);
return *this;
}
void swap(basic_istream &rhs)
{
__istream_type::swap(rhs);
m_buffer.swap(rhs.m_buffer);
}
__streambuf_type *rdbuf() const
{
return const_cast<__streambuf_type *>(&m_buffer);
}
};
using istream = basic_istream<char, std::char_traits<char>>;
} // namespace mrsrc
// --------------------------------------------------------------------
namespace cif
{
// --------------------------------------------------------------------
class ResourcePool
{
public:
static ResourcePool &instance()
{
static std::unique_ptr<ResourcePool> s_instance(new ResourcePool);
return *s_instance;
}
void pushDir(fs::path dir)
{
std::error_code ec;
if (fs::exists(dir, ec) and not ec)
mDirs.push_front(dir);
}
void pushDir(const char *path)
{
if (path != nullptr)
pushDir(fs::path(path));
}
void pushAlias(const std::string &name, std::filesystem::path dataFile)
{
std::error_code ec;
if (not fs::exists(dataFile, ec) or ec)
throw std::runtime_error("Attempt to add a file resource for " + name + " that cannot be used (" + dataFile.string() + ") :" + ec.message());
mLocalResources[name] = dataFile;
}
std::unique_ptr<std::istream> load(fs::path name);
private:
ResourcePool();
std::unique_ptr<std::ifstream> open(fs::path &p)
{
std::unique_ptr<std::ifstream> result;
try
{
if (fs::exists(p))
{
std::unique_ptr<std::ifstream> file(new std::ifstream(p, std::ios::binary));
if (file->is_open())
result.reset(file.release());
}
}
catch (...)
{
}
return result;
}
std::map<std::string, std::filesystem::path> mLocalResources;
std::deque<fs::path> mDirs;
};
ResourcePool::ResourcePool()
{
#if defined(DATA_DIR)
pushDir(DATA_DIR);
#endif
pushDir(getenv("LIBCIFPP_DATA_DIR"));
auto ccp4 = getenv("CCP4");
if (ccp4 != nullptr)
pushDir(fs::path(ccp4) / "share" / "libcifpp");
#if defined(CACHE_DIR)
pushDir(CACHE_DIR);
#endif
}
std::unique_ptr<std::istream> ResourcePool::load(fs::path name)
{
std::unique_ptr<std::istream> result;
std::error_code ec;
fs::path p = name;
if (mLocalResources.count(name.string()))
result = open(mLocalResources[name.string()]);
for (auto di = mDirs.begin(); not result and di != mDirs.end(); ++di)
{
auto p2 = *di / p;
if (fs::exists(p2, ec) and not ec)
result = open(p2);
}
// if (not result and gResourceData)
if (not result and (gResourceIndex[0].m_child > 0 or gResourceIndex[0].m_size > 0))
{
mrsrc::rsrc rsrc(name);
if (rsrc)
result.reset(new mrsrc::istream(rsrc));
}
return result;
}
// --------------------------------------------------------------------
void add_data_directory(std::filesystem::path dataDir)
{
ResourcePool::instance().pushDir(dataDir);
}
void add_file_resource(const std::string &name, std::filesystem::path dataFile)
{
ResourcePool::instance().pushAlias(name, dataFile);
}
std::unique_ptr<std::istream> load_resource(std::filesystem::path name)
{
return ResourcePool::instance().load(name);
}
} // namespace cif

493
src/validate.cpp Normal file
View File

@@ -0,0 +1,493 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cassert>
#include <fstream>
#include <iostream>
// The validator depends on regular expressions. Unfortunately,
// the implementation of std::regex in g++ is buggy and crashes
// on reading the pdbx dictionary. Therefore, in case g++ is used
// the code will use boost::regex instead.
#if USE_BOOST_REGEX
#include <boost/regex.hpp>
using boost::regex;
#else
#include <regex>
using std::regex;
#endif
#include <cif++/dictionary_parser.hpp>
#include <cif++/validate.hpp>
#include <cif++/utilities.hpp>
#include <cif++/gzio.hpp>
namespace cif
{
struct regex_impl : public regex
{
regex_impl(std::string_view rx)
: regex(rx.begin(), rx.end(), regex::extended | regex::optimize)
{
}
};
validation_error::validation_error(const std::string &msg)
: m_msg(msg)
{
}
validation_error::validation_error(const std::string &cat, const std::string &item, const std::string &msg)
: m_msg("When validating _" + cat + '.' + item + ": " + msg)
{
}
// --------------------------------------------------------------------
DDL_PrimitiveType map_to_primitive_type(std::string_view s)
{
DDL_PrimitiveType result;
if (iequals(s, "char"))
result = DDL_PrimitiveType::Char;
else if (iequals(s, "uchar"))
result = DDL_PrimitiveType::UChar;
else if (iequals(s, "numb"))
result = DDL_PrimitiveType::Numb;
else
throw validation_error("Not a known primitive type");
return result;
}
// --------------------------------------------------------------------
type_validator::type_validator(std::string_view name, DDL_PrimitiveType type, std::string_view rx)
: m_name(name)
, m_primitive_type(type)
, m_rx(new regex_impl(rx.empty() ? ".+" : rx)) /// Empty regular expressions are not allowed, in libcpp's std::regex (POSIX?)
{
}
type_validator::~type_validator()
{
delete m_rx;
}
template <typename T>
struct my_from_chars
{
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
{
return cif::from_chars(a, b, d);
}
};
template <typename T>
struct std_from_chars
{
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
{
return std::from_chars(a, b, d);
}
};
int type_validator::compare(std::string_view a, std::string_view b) const
{
int result = 0;
if (a.empty())
result = b.empty() ? 0 : -1;
else if (b.empty())
result = a.empty() ? 0 : +1;
else
{
switch (m_primitive_type)
{
case DDL_PrimitiveType::Numb:
{
double da, db;
using namespace cif;
using namespace std;
std::from_chars_result ra, rb;
ra = selected_charconv<double>::from_chars(a.begin(), a.end(), da);
rb = selected_charconv<double>::from_chars(b.begin(), b.end(), db);
if (ra.ec == std::errc() and rb.ec == std::errc())
{
auto d = da - db;
if (std::abs(d) > std::numeric_limits<double>::epsilon())
{
if (d > 0)
result = 1;
else if (d < 0)
result = -1;
}
}
else if (ra.ec == std::errc())
result = 1;
else
result = -1;
break;
}
case DDL_PrimitiveType::UChar:
case DDL_PrimitiveType::Char:
{
// CIF is guaranteed to have ascii only, therefore this primitive code will do
// also, we're collapsing spaces
auto ai = a.begin(), bi = b.begin();
for (;;)
{
if (ai == a.end())
{
if (bi != b.end())
result = -1;
break;
}
else if (bi == b.end())
{
result = 1;
break;
}
char ca = *ai;
char cb = *bi;
if (m_primitive_type == DDL_PrimitiveType::UChar)
{
ca = tolower(ca);
cb = tolower(cb);
}
result = ca - cb;
if (result != 0)
break;
if (ca == ' ')
{
while (ai[1] == ' ')
++ai;
while (bi[1] == ' ')
++bi;
}
++ai;
++bi;
}
break;
}
}
}
return result;
}
// --------------------------------------------------------------------
// void ValidateItem::addLinked(ValidateItem* parent, const std::string& parentItem, const std::string& childItem)
//{
//// if (mParent != nullptr and VERBOSE)
//// cerr << "replacing parent in " << mCategory->m_name << " from " << mParent->mCategory->m_name << " to " << parent->mCategory->m_name << endl;
//// mParent = parent;
//
// if (m_type == nullptr and parent != nullptr)
// m_type = parent->m_type;
//
// if (parent != nullptr)
// {
// mLinked.push_back({parent, parentItem, childItem});
//
// parent->mChildren.insert(this);
////
//// if (mCategory->mKeys == std::vector<std::string>{mTag})
//// parent->mForeignKeys.insert(this);
// }
//}
void item_validator::operator()(std::string_view value) const
{
if (not value.empty() and value != "?" and value != ".")
{
if (m_type != nullptr and not regex_match(value.begin(), value.end(), *m_type->m_rx))
throw validation_error(m_category->m_name, m_tag, "Value '" + std::string{ value } + "' does not match type expression for type " + m_type->m_name);
if (not m_enums.empty())
{
if (m_enums.count(std::string{ value }) == 0)
throw validation_error(m_category->m_name, m_tag, "Value '" + std::string{ value } + "' is not in the list of allowed values");
}
}
}
// --------------------------------------------------------------------
void category_validator::addItemValidator(item_validator &&v)
{
if (v.m_mandatory)
m_mandatory_fields.insert(v.m_tag);
v.m_category = this;
auto r = m_item_validators.insert(std::move(v));
if (not r.second and VERBOSE >= 4)
std::cout << "Could not add validator for item " << v.m_tag << " to category " << m_name << std::endl;
}
const item_validator *category_validator::get_validator_for_item(std::string_view tag) const
{
const item_validator *result = nullptr;
auto i = m_item_validators.find(item_validator{ std::string(tag) });
if (i != m_item_validators.end())
result = &*i;
else if (VERBOSE > 4)
std::cout << "No validator for tag " << tag << std::endl;
return result;
}
// --------------------------------------------------------------------
void validator::add_type_validator(type_validator &&v)
{
auto r = m_type_validators.insert(std::move(v));
if (not r.second and VERBOSE > 4)
std::cout << "Could not add validator for type " << v.m_name << std::endl;
}
const type_validator *validator::get_validator_for_type(std::string_view typeCode) const
{
const type_validator *result = nullptr;
auto i = m_type_validators.find(type_validator{ std::string(typeCode), DDL_PrimitiveType::Char, {} });
if (i != m_type_validators.end())
result = &*i;
else if (VERBOSE > 4)
std::cout << "No validator for type " << typeCode << std::endl;
return result;
}
void validator::add_category_validator(category_validator &&v)
{
auto r = m_category_validators.insert(std::move(v));
if (not r.second and VERBOSE > 4)
std::cout << "Could not add validator for category " << v.m_name << std::endl;
}
const category_validator *validator::get_validator_for_category(std::string_view category) const
{
const category_validator *result = nullptr;
auto i = m_category_validators.find(category_validator{ std::string(category) });
if (i != m_category_validators.end())
result = &*i;
else if (VERBOSE > 4)
std::cout << "No validator for category " << category << std::endl;
return result;
}
item_validator *validator::get_validator_for_item(std::string_view tag) const
{
item_validator *result = nullptr;
std::string cat, item;
std::tie(cat, item) = split_tag_name(tag);
auto *cv = get_validator_for_category(cat);
if (cv != nullptr)
result = const_cast<item_validator *>(cv->get_validator_for_item(item));
if (result == nullptr and VERBOSE > 4)
std::cout << "No validator for item " << tag << std::endl;
return result;
}
void validator::add_link_validator(link_validator &&v)
{
assert(v.m_parent_keys.size() == v.m_child_keys.size());
if (v.m_parent_keys.size() != v.m_child_keys.size())
throw std::runtime_error("unequal number of keys for parent and child in link");
auto pcv = get_validator_for_category(v.m_parent_category);
auto ccv = get_validator_for_category(v.m_child_category);
if (pcv == nullptr)
throw std::runtime_error("unknown parent category " + v.m_parent_category);
if (ccv == nullptr)
throw std::runtime_error("unknown child category " + v.m_child_category);
for (size_t i = 0; i < v.m_parent_keys.size(); ++i)
{
auto piv = pcv->get_validator_for_item(v.m_parent_keys[i]);
if (piv == nullptr)
throw std::runtime_error("unknown parent tag _" + v.m_parent_category + '.' + v.m_parent_keys[i]);
auto civ = ccv->get_validator_for_item(v.m_child_keys[i]);
if (civ == nullptr)
throw std::runtime_error("unknown child tag _" + v.m_child_category + '.' + v.m_child_keys[i]);
if (civ->m_type == nullptr and piv->m_type != nullptr)
const_cast<item_validator *>(civ)->m_type = piv->m_type;
}
m_link_validators.emplace_back(std::move(v));
}
std::vector<const link_validator *> validator::get_links_for_parent(std::string_view category) const
{
std::vector<const link_validator *> result;
for (auto &l : m_link_validators)
{
if (l.m_parent_category == category)
result.push_back(&l);
}
return result;
}
std::vector<const link_validator *> validator::get_links_for_child(std::string_view category) const
{
std::vector<const link_validator *> result;
for (auto &l : m_link_validators)
{
if (l.m_child_category == category)
result.push_back(&l);
}
return result;
}
void validator::report_error(const std::string &msg, bool fatal) const
{
if (m_strict or fatal)
throw validation_error(msg);
else if (VERBOSE > 0)
std::cerr << msg << std::endl;
}
// --------------------------------------------------------------------
const validator &validator_factory::operator[](std::string_view dictionary_name)
{
std::lock_guard lock(m_mutex);
for (auto &validator : m_validators)
{
if (iequals(validator.name(), dictionary_name))
return validator;
}
// not found, try to see if it helps if we tweak the name a little
// too bad clang version 10 did not have a constructor for std::filesystem::path that accepts a std::string_view
std::filesystem::path dictionary(dictionary_name.data(), dictionary_name.data() + dictionary_name.length());
if (dictionary.extension() != ".dic")
{
auto dict_name = dictionary.filename().string() + ".dic";
for (auto &validator : m_validators)
{
if (iequals(validator.name(), dict_name))
return validator;
}
}
// not found, add it
auto data = load_resource(dictionary_name);
if (not data and dictionary.extension().string() != ".dic")
data = load_resource(dictionary.parent_path() / (dictionary.filename().string() + ".dic"));
if (data)
construct_validator(dictionary_name, *data);
else
{
std::error_code ec;
// might be a compressed dictionary on disk
std::filesystem::path p = dictionary;
if (p.extension() == ".dic")
p = p.parent_path() / (p.filename().string() + ".gz");
else
p = p.parent_path() / (p.filename().string() + ".dic.gz");
#if defined(CACHE_DIR) or defined(DATA_DIR)
if (not std::filesystem::exists(p, ec) or ec)
{
for (const char *dir : {
#if defined(CACHE_DIR)
CACHE_DIR,
#endif
#if defined(DATA_DIR)
DATA_DIR
#endif
})
{
auto p2 = std::filesystem::path(dir) / p;
if (std::filesystem::exists(p2, ec) and not ec)
{
swap(p, p2);
break;
}
}
}
#endif
if (std::filesystem::exists(p, ec) and not ec)
{
gzio::ifstream in(p);
if (not in.is_open())
throw std::runtime_error("Could not open dictionary (" + p.string() + ")");
construct_validator(dictionary_name, in);
}
else
throw std::runtime_error("Dictionary not found or defined (" + dictionary.string() + ")");
}
return m_validators.back();
}
void validator_factory::construct_validator(std::string_view name, std::istream &is)
{
m_validators.emplace_back(parse_dictionary(name, is));
}
} // namespace cif

BIN
test/1juh.cif.gz Normal file

Binary file not shown.

253
test/HEM.cif Normal file
View File

@@ -0,0 +1,253 @@
data_HEM
#
_chem_comp.id HEM
_chem_comp.name "PROTOPORPHYRIN IX CONTAINING FE"
_chem_comp.type NON-POLYMER
_chem_comp.pdbx_type HETAIN
_chem_comp.formula "C34 H32 Fe N4 O4"
_chem_comp.mon_nstd_parent_comp_id ?
_chem_comp.pdbx_synonyms HEME
_chem_comp.pdbx_formal_charge 0
_chem_comp.pdbx_initial_date 1999-07-08
_chem_comp.pdbx_modified_date 2020-06-17
_chem_comp.pdbx_ambiguous_flag Y
_chem_comp.pdbx_release_status REL
_chem_comp.pdbx_replaced_by ?
_chem_comp.pdbx_replaces MHM
_chem_comp.formula_weight 616.487
_chem_comp.one_letter_code ?
_chem_comp.three_letter_code HEM
_chem_comp.pdbx_model_coordinates_details ?
_chem_comp.pdbx_model_coordinates_missing_flag N
_chem_comp.pdbx_ideal_coordinates_details Corina
_chem_comp.pdbx_ideal_coordinates_missing_flag N
_chem_comp.pdbx_model_coordinates_db_code 3IA3
_chem_comp.pdbx_subcomponent_list ?
_chem_comp.pdbx_processing_site RCSB
# #
loop_
_chem_comp_atom.comp_id
_chem_comp_atom.atom_id
_chem_comp_atom.alt_atom_id
_chem_comp_atom.type_symbol
_chem_comp_atom.charge
_chem_comp_atom.pdbx_align
_chem_comp_atom.pdbx_aromatic_flag
_chem_comp_atom.pdbx_leaving_atom_flag
_chem_comp_atom.pdbx_stereo_config
_chem_comp_atom.model_Cartn_x
_chem_comp_atom.model_Cartn_y
_chem_comp_atom.model_Cartn_z
_chem_comp_atom.pdbx_model_Cartn_x_ideal
_chem_comp_atom.pdbx_model_Cartn_y_ideal
_chem_comp_atom.pdbx_model_Cartn_z_ideal
_chem_comp_atom.pdbx_component_atom_id
_chem_comp_atom.pdbx_component_comp_id
_chem_comp_atom.pdbx_ordinal
HEM CHA CHA C 0 1 N N N 2.748 -19.531 39.896 -2.161 -0.125 0.490 CHA HEM 1
HEM CHB CHB C 0 1 N N N 3.258 -17.744 35.477 1.458 -3.419 0.306 CHB HEM 2
HEM CHC CHC C 0 1 N N N 1.703 -21.900 33.637 4.701 0.169 -0.069 CHC HEM 3
HEM CHD CHD C 0 1 N N N 1.149 -23.677 38.059 1.075 3.460 0.018 CHD HEM 4
HEM C1A C1A C 0 1 Y N N 3.031 -18.673 38.872 -1.436 -1.305 0.380 C1A HEM 5
HEM C2A C2A C 0 1 Y N N 3.578 -17.325 39.013 -2.015 -2.587 0.320 C2A HEM 6
HEM C3A C3A C 0 1 Y N N 3.705 -16.820 37.785 -1.009 -3.500 0.270 C3A HEM 7
HEM C4A C4A C 0 1 Y N N 3.256 -17.863 36.862 0.216 -2.803 0.298 C4A HEM 8
HEM CMA CMA C 0 1 N N N 4.227 -15.469 37.393 -1.175 -4.996 0.197 CMA HEM 9
HEM CAA CAA C 0 1 N N N 3.945 -16.670 40.296 -3.490 -2.893 0.314 CAA HEM 10
HEM CBA CBA C 0 1 N N N 5.391 -17.138 40.581 -3.998 -2.926 -1.129 CBA HEM 11
HEM CGA CGA C 0 1 N N N 6.095 -16.663 41.825 -5.473 -3.232 -1.136 CGA HEM 12
HEM O1A O1A O 0 1 N N N 7.098 -15.928 41.683 -6.059 -3.405 -0.094 O1A HEM 13
HEM O2A O2A O 0 1 N N N 5.657 -17.040 42.940 -6.137 -3.311 -2.300 O2A HEM 14
HEM C1B C1B C 0 1 N N N 2.888 -18.698 34.579 2.664 -2.707 0.308 C1B HEM 15
HEM C2B C2B C 0 1 N N N 2.933 -18.535 33.146 3.937 -3.328 0.418 C2B HEM 16
HEM C3B C3B C 0 1 N N N 2.499 -19.716 32.632 4.874 -2.341 0.314 C3B HEM 17
HEM C4B C4B C 0 1 N N N 2.187 -20.580 33.743 4.117 -1.079 0.139 C4B HEM 18
HEM CMB CMB C 0 1 N N N 3.391 -17.290 32.422 4.203 -4.798 0.613 CMB HEM 19
HEM CAB CAB C 0 1 N N N 2.345 -20.140 31.217 6.339 -2.497 0.365 CAB HEM 20
HEM CBB CBB C 0 1 N N N 1.755 -19.492 30.233 6.935 -3.419 -0.385 CBB HEM 21
HEM C1C C1C C 0 1 Y N N 1.395 -22.786 34.659 3.964 1.345 -0.174 C1C HEM 22
HEM C2C C2C C 0 1 Y N N 0.854 -24.130 34.500 4.531 2.601 -0.445 C2C HEM 23
HEM C3C C3C C 0 1 Y N N 0.689 -24.626 35.757 3.510 3.536 -0.437 C3C HEM 24
HEM C4C C4C C 0 1 Y N N 1.139 -23.583 36.674 2.304 2.846 -0.139 C4C HEM 25
HEM CMC CMC C 0 1 N N N 0.550 -24.782 33.175 5.991 2.880 -0.697 CMC HEM 26
HEM CAC CAC C 0 1 N N N 0.164 -25.943 36.196 3.649 4.981 -0.692 CAC HEM 27
HEM CBC CBC C 0 1 N N N 0.498 -27.158 35.750 4.201 5.407 -1.823 CBC HEM 28
HEM C1D C1D C 0 1 N N N 1.550 -22.718 38.980 -0.102 2.753 0.298 C1D HEM 29
HEM C2D C2D C 0 1 N N N 1.513 -22.879 40.415 -1.382 3.388 0.641 C2D HEM 30
HEM C3D C3D C 0 1 N N N 1.951 -21.691 40.929 -2.283 2.389 0.774 C3D HEM 31
HEM C4D C4D C 0 1 N N N 2.277 -20.826 39.811 -1.561 1.137 0.511 C4D HEM 32
HEM CMD CMD C 0 1 N N N 1.055 -24.094 41.156 -1.639 4.863 0.811 CMD HEM 33
HEM CAD CAD C 0 1 N N N 2.048 -21.326 42.352 -3.741 2.532 1.123 CAD HEM 34
HEM CBD CBD C 0 1 N N N 0.741 -20.498 42.530 -4.573 2.563 -0.160 CBD HEM 35
HEM CGD CGD C 0 1 N N N 0.578 -19.987 43.892 -6.032 2.706 0.189 CGD HEM 36
HEM O1D O1D O 0 1 N N N 1.387 -19.103 44.303 -6.372 2.776 1.347 O1D HEM 37
HEM O2D O2D O 0 1 N N N -0.401 -20.468 44.537 -6.954 2.755 -0.785 O2D HEM 38
HEM NA NA N 0 1 Y N N 2.863 -18.969 37.554 -0.068 -1.456 0.321 NA HEM 39
HEM NB NB N 0 1 N N N 2.439 -19.944 34.911 2.820 -1.386 0.207 NB HEM 40
HEM NC NC N 0 1 Y N N 1.537 -22.509 35.976 2.604 1.506 -0.033 NC HEM 41
HEM ND ND N 0 1 N N N 2.008 -21.465 38.663 -0.276 1.431 0.298 ND HEM 42
HEM FE FE FE 0 0 N N N 2.196 -20.749 36.814 1.010 0.157 -0.060 FE HEM 43
HEM HHB HHB H 0 1 N N N 3.587 -16.798 35.072 1.498 -4.508 0.309 HHB HEM 44
HEM HHC HHC H 0 1 N N N 1.553 -22.268 32.633 5.786 0.229 -0.153 HHC HEM 45
HEM HHD HHD H 0 1 N N N 0.802 -24.613 38.472 1.018 4.543 -0.083 HHD HEM 46
HEM HMA HMA H 0 1 N N N 5.316 -15.524 37.249 -1.220 -5.306 -0.847 HMA HEM 47
HEM HMAA HMAA H 0 0 N N N 3.749 -15.149 36.455 -0.328 -5.480 0.683 HMAA HEM 48
HEM HMAB HMAB H 0 0 N N N 3.998 -14.743 38.187 -2.097 -5.285 0.702 HMAB HEM 49
HEM HAA HAA H 0 1 N N N 3.905 -15.575 40.197 -3.662 -3.862 0.782 HAA HEM 50
HEM HAAA HAAA H 0 0 N N N 3.268 -16.991 41.102 -4.024 -2.121 0.869 HAAA HEM 51
HEM HBA HBA H 0 1 N N N 5.368 -18.237 40.627 -3.825 -1.956 -1.597 HBA HEM 52
HEM HBAA HBAA H 0 0 N N N 6.004 -16.819 39.725 -3.464 -3.697 -1.684 HBAA HEM 53
HEM HMB HMB H 0 1 N N N 3.319 -17.449 31.336 3.256 -5.336 0.660 HMB HEM 54
HEM HMBA HMBA H 0 0 N N N 2.753 -16.442 32.711 4.794 -5.175 -0.222 HMBA HEM 55
HEM HMBB HMBB H 0 0 N N N 4.435 -17.072 32.692 4.752 -4.948 1.543 HMBB HEM 56
HEM HAB HAB H 0 1 N N N 2.770 -21.100 30.963 6.927 -1.863 1.011 HAB HEM 57
HEM HBB HBB H 0 1 N N N 1.719 -19.927 29.245 7.994 -3.600 -0.277 HBB HEM 58
HEM HBBA HBBA H 0 0 N N N 1.308 -18.526 30.414 6.360 -3.987 -1.102 HBBA HEM 59
HEM HMC HMC H 0 1 N N N 0.153 -25.793 33.346 6.554 1.949 -0.639 HMC HEM 60
HEM HMCA HMCA H 0 0 N N N -0.196 -24.182 32.634 6.110 3.316 -1.689 HMCA HEM 61
HEM HMCB HMCB H 0 0 N N N 1.472 -24.846 32.578 6.362 3.578 0.053 HMCB HEM 62
HEM HAC HAC H 0 1 N N N -0.583 -25.916 36.975 3.303 5.694 0.042 HAC HEM 63
HEM HBC HBC H 0 1 N N N 0.027 -28.035 36.169 4.614 4.696 -2.523 HBC HEM 64
HEM HBCA HBCA H 0 0 N N N 1.239 -27.263 34.971 4.235 6.464 -2.043 HBCA HEM 65
HEM HMD HMD H 0 1 N N N 1.142 -23.919 42.238 -0.715 5.415 0.639 HMD HEM 66
HEM HMDA HMDA H 0 0 N N N 0.006 -24.304 40.902 -2.394 5.185 0.094 HMDA HEM 67
HEM HMDB HMDB H 0 0 N N N 1.680 -24.954 40.872 -1.994 5.055 1.824 HMDB HEM 68
HEM HAD HAD H 0 1 N N N 2.055 -22.216 42.999 -4.052 1.687 1.738 HAD HEM 69
HEM HADA HADA H 0 0 N N N 2.943 -20.719 42.554 -3.893 3.459 1.677 HADA HEM 70
HEM HBD HBD H 0 1 N N N 0.767 -19.646 41.835 -4.262 3.408 -0.775 HBD HEM 71
HEM HBDA HBDA H 0 0 N N N -0.119 -21.141 42.290 -4.421 1.636 -0.714 HBDA HEM 72
HEM H2A H2A H 0 1 N N N 6.201 -16.682 43.632 -7.082 -3.510 -2.254 H2A HEM 73
HEM H2D H2D H 0 1 N N N -0.445 -20.063 45.395 -7.877 2.847 -0.512 H2D HEM 74
HEM HHA HHA H 0 1 N N N 2.913 -19.150 40.893 -3.246 -0.188 0.567 HHA HEM 75
# #
loop_
_chem_comp_bond.comp_id
_chem_comp_bond.atom_id_1
_chem_comp_bond.atom_id_2
_chem_comp_bond.value_order
_chem_comp_bond.pdbx_aromatic_flag
_chem_comp_bond.pdbx_stereo_config
_chem_comp_bond.pdbx_ordinal
HEM CHA C1A SING N N 1
HEM CHA C4D DOUB N N 2
HEM CHA HHA SING N N 3
HEM CHB C4A SING N N 4
HEM CHB C1B DOUB N N 5
HEM CHB HHB SING N N 6
HEM CHC C4B SING N N 7
HEM CHC C1C DOUB N N 8
HEM CHC HHC SING N N 9
HEM CHD C4C DOUB N N 10
HEM CHD C1D SING N N 11
HEM CHD HHD SING N N 12
HEM C1A C2A DOUB Y N 13
HEM C1A NA SING Y N 14
HEM C2A C3A SING Y N 15
HEM C2A CAA SING N N 16
HEM C3A C4A DOUB Y N 17
HEM C3A CMA SING N N 18
HEM C4A NA SING Y N 19
HEM CMA HMA SING N N 20
HEM CMA HMAA SING N N 21
HEM CMA HMAB SING N N 22
HEM CAA CBA SING N N 23
HEM CAA HAA SING N N 24
HEM CAA HAAA SING N N 25
HEM CBA CGA SING N N 26
HEM CBA HBA SING N N 27
HEM CBA HBAA SING N N 28
HEM CGA O1A DOUB N N 29
HEM CGA O2A SING N N 30
HEM C1B C2B SING N N 31
HEM C1B NB SING N N 32
HEM C2B C3B DOUB N N 33
HEM C2B CMB SING N N 34
HEM C3B C4B SING N N 35
HEM C3B CAB SING N N 36
HEM C4B NB DOUB N N 37
HEM CMB HMB SING N N 38
HEM CMB HMBA SING N N 39
HEM CMB HMBB SING N N 40
HEM CAB CBB DOUB N N 41
HEM CAB HAB SING N N 42
HEM CBB HBB SING N N 43
HEM CBB HBBA SING N N 44
HEM C1C C2C SING Y N 45
HEM C1C NC SING Y N 46
HEM C2C C3C DOUB Y N 47
HEM C2C CMC SING N N 48
HEM C3C C4C SING Y N 49
HEM C3C CAC SING N N 50
HEM C4C NC SING Y N 51
HEM CMC HMC SING N N 52
HEM CMC HMCA SING N N 53
HEM CMC HMCB SING N N 54
HEM CAC CBC DOUB N N 55
HEM CAC HAC SING N N 56
HEM CBC HBC SING N N 57
HEM CBC HBCA SING N N 58
HEM C1D C2D SING N N 59
HEM C1D ND DOUB N N 60
HEM C2D C3D DOUB N N 61
HEM C2D CMD SING N N 62
HEM C3D C4D SING N N 63
HEM C3D CAD SING N N 64
HEM C4D ND SING N N 65
HEM CMD HMD SING N N 66
HEM CMD HMDA SING N N 67
HEM CMD HMDB SING N N 68
HEM CAD CBD SING N N 69
HEM CAD HAD SING N N 70
HEM CAD HADA SING N N 71
HEM CBD CGD SING N N 72
HEM CBD HBD SING N N 73
HEM CBD HBDA SING N N 74
HEM CGD O1D DOUB N N 75
HEM CGD O2D SING N N 76
HEM O2A H2A SING N N 77
HEM O2D H2D SING N N 78
HEM FE NA SING N N 79
HEM FE NB SING N N 80
HEM FE NC SING N N 81
HEM FE ND SING N N 82
# #
loop_
_pdbx_chem_comp_descriptor.comp_id
_pdbx_chem_comp_descriptor.type
_pdbx_chem_comp_descriptor.program
_pdbx_chem_comp_descriptor.program_version
_pdbx_chem_comp_descriptor.descriptor
HEM SMILES ACDLabs 12.01 "C=1c3c(c(c4C=C5C(=C(C=6C=C7C(=C(C8=CC=2C(=C(C=1N=2[Fe](n34)(N5=6)N78)CCC(=O)O)C)\C=C)C)\C=C)C)C)CCC(=O)O"
HEM InChI InChI 1.03 "InChI=1S/C34H34N4O4.Fe/c1-7-21-17(3)25-13-26-19(5)23(9-11-33(39)40)31(37-26)16-32-24(10-12-34(41)42)20(6)28(38-32)15-30-22(8-2)18(4)27(36-30)14-29(21)35-25;/h7-8,13-16H,1-2,9-12H2,3-6H3,(H4,35,36,37,38,39,40,41,42);/q;+2/p-2/b25-13-,26-13-,27-14-,28-15-,29-14-,30-15-,31-16-,32-16-;"
HEM InChIKey InChI 1.03 KABFMIBPWCXCRK-RGGAHWMASA-L
HEM SMILES_CANONICAL CACTVS 3.385 "CC1=C(CCC(O)=O)C2=Cc3n4[Fe]5|6|N2=C1C=c7n5c(=CC8=N|6C(=Cc4c(C)c3CCC(O)=O)C(=C8C=C)C)c(C)c7C=C"
HEM SMILES CACTVS 3.385 "CC1=C(CCC(O)=O)C2=Cc3n4[Fe]5|6|N2=C1C=c7n5c(=CC8=N|6C(=Cc4c(C)c3CCC(O)=O)C(=C8C=C)C)c(C)c7C=C"
HEM SMILES_CANONICAL "OpenEye OEToolkits" 1.7.6 "Cc1c2n3c(c1CCC(=O)O)C=C4C(=C(C5=[N]4[Fe]36[N]7=C(C=C8N6C(=C5)C(=C8C)C=C)C(=C(C7=C2)C)C=C)C)CCC(=O)O"
HEM SMILES "OpenEye OEToolkits" 1.7.6 "Cc1c2n3c(c1CCC(=O)O)C=C4C(=C(C5=[N]4[Fe]36[N]7=C(C=C8N6C(=C5)C(=C8C)C=C)C(=C(C7=C2)C)C=C)C)CCC(=O)O"
# #
loop_
_pdbx_chem_comp_identifier.comp_id
_pdbx_chem_comp_identifier.type
_pdbx_chem_comp_identifier.program
_pdbx_chem_comp_identifier.program_version
_pdbx_chem_comp_identifier.identifier
HEM "SYSTEMATIC NAME" "OpenEye OEToolkits" 1.6.1 "3-[(5Z,10Z,14Z,19Z)-18-(2-carboxyethyl)-8,13-bis(ethenyl)-3,7,12,17-tetramethyl-21,23-dihydroporphyrin-2-yl]propanoic acid"
HEM "SYSTEMATIC NAME" ACDLabs 12.01 "[3,3'-(7,12-diethenyl-3,8,13,17-tetramethylporphyrin-2,18-diyl-kappa~4~N~21~,N~22~,N~23~,N~24~)dipropanoato(2-)]iron"
# #
loop_
_pdbx_chem_comp_audit.comp_id
_pdbx_chem_comp_audit.action_type
_pdbx_chem_comp_audit.date
_pdbx_chem_comp_audit.processing_site
HEM "Create component" 1999-07-08 RCSB
HEM "Other modification" 2016-01-20 RCSB
HEM "Modify synonyms" 2020-06-05 PDBE
#
_pdbx_chem_comp_synonyms.ordinal 1
_pdbx_chem_comp_synonyms.comp_id HEM
_pdbx_chem_comp_synonyms.name HEME
_pdbx_chem_comp_synonyms.provenance ?
_pdbx_chem_comp_synonyms.type ?
##

188
test/REA.cif Normal file
View File

@@ -0,0 +1,188 @@
data_REA
#
_chem_comp.id REA
_chem_comp.name "RETINOIC ACID"
_chem_comp.type NON-POLYMER
_chem_comp.pdbx_type HETAIN
_chem_comp.formula "C20 H28 O2"
_chem_comp.mon_nstd_parent_comp_id ?
_chem_comp.pdbx_synonyms ?
_chem_comp.pdbx_formal_charge 0
_chem_comp.pdbx_initial_date 1999-07-08
_chem_comp.pdbx_modified_date 2016-10-18
_chem_comp.pdbx_ambiguous_flag N
_chem_comp.pdbx_release_status REL
_chem_comp.pdbx_replaced_by ?
_chem_comp.pdbx_replaces 3KV
_chem_comp.formula_weight 300.435
_chem_comp.one_letter_code ?
_chem_comp.three_letter_code REA
_chem_comp.pdbx_model_coordinates_details ?
_chem_comp.pdbx_model_coordinates_missing_flag N
_chem_comp.pdbx_ideal_coordinates_details Corina
_chem_comp.pdbx_ideal_coordinates_missing_flag N
_chem_comp.pdbx_model_coordinates_db_code 1CBS
_chem_comp.pdbx_subcomponent_list ?
_chem_comp.pdbx_processing_site RCSB
#
loop_
_chem_comp_atom.comp_id
_chem_comp_atom.atom_id
_chem_comp_atom.alt_atom_id
_chem_comp_atom.type_symbol
_chem_comp_atom.charge
_chem_comp_atom.pdbx_align
_chem_comp_atom.pdbx_aromatic_flag
_chem_comp_atom.pdbx_leaving_atom_flag
_chem_comp_atom.pdbx_stereo_config
_chem_comp_atom.model_Cartn_x
_chem_comp_atom.model_Cartn_y
_chem_comp_atom.model_Cartn_z
_chem_comp_atom.pdbx_model_Cartn_x_ideal
_chem_comp_atom.pdbx_model_Cartn_y_ideal
_chem_comp_atom.pdbx_model_Cartn_z_ideal
_chem_comp_atom.pdbx_component_atom_id
_chem_comp_atom.pdbx_component_comp_id
_chem_comp_atom.pdbx_ordinal
REA C1 C1 C 0 1 N N N 21.972 29.831 16.739 -4.684 0.932 -0.497 C1 REA 1
REA C2 C2 C 0 1 N N N 20.921 30.524 15.841 -5.837 0.190 -1.176 C2 REA 2
REA C3 C3 C 0 1 N N N 20.245 29.635 14.848 -6.441 -0.798 -0.171 C3 REA 3
REA C4 C4 C 0 1 N N N 19.555 28.479 15.488 -5.418 -1.903 0.100 C4 REA 4
REA C5 C5 C 0 1 N N N 20.389 27.812 16.587 -4.082 -1.301 0.429 C5 REA 5
REA C6 C6 C 0 1 N N N 21.425 28.446 17.218 -3.756 -0.048 0.161 C6 REA 6
REA C7 C7 C 0 1 N N N 22.242 27.851 18.297 -2.457 0.396 0.516 C7 REA 7
REA C8 C8 C 0 1 N N N 21.868 26.977 19.240 -1.363 -0.229 0.007 C8 REA 8
REA C9 C9 C 0 1 N N N 22.705 26.434 20.286 -0.076 0.257 0.298 C9 REA 9
REA C10 C10 C 0 1 N N N 22.159 25.536 21.131 1.022 -0.370 -0.213 C10 REA 10
REA C11 C11 C 0 1 N N N 22.875 24.924 22.234 2.306 0.115 0.077 C11 REA 11
REA C12 C12 C 0 1 N N N 22.237 24.026 22.990 3.405 -0.513 -0.435 C12 REA 12
REA C13 C13 C 0 1 N N N 22.856 23.377 24.125 4.689 -0.028 -0.144 C13 REA 13
REA C14 C14 C 0 1 N N N 22.135 22.473 24.834 5.787 -0.655 -0.656 C14 REA 14
REA C15 C15 C 0 1 N N N 22.563 21.710 26.016 7.077 -0.265 -0.244 C15 REA 15
REA C16 C16 C 0 1 N N N 22.238 30.737 17.948 -5.246 1.886 0.559 C16 REA 16
REA C17 C17 C 0 1 N N N 23.292 29.620 15.948 -3.911 1.737 -1.544 C17 REA 17
REA C18 C18 C 0 1 N N N 19.791 26.449 16.947 -3.056 -2.175 1.103 C18 REA 18
REA C19 C19 C 0 1 N N N 24.181 26.841 20.385 0.090 1.471 1.175 C19 REA 19
REA C20 C20 C 0 1 N N N 24.303 23.747 24.489 4.855 1.186 0.733 C20 REA 20
REA O1 O1 O 0 1 N N N 23.640 21.075 25.978 7.210 0.553 0.648 O1 REA 21
REA O2 O2 O 0 1 N N N 21.840 21.712 27.037 8.166 -0.798 -0.840 O2 REA 22
REA H21 H21 H 0 1 N N N 20.147 30.955 16.494 -6.598 0.905 -1.490 H21 REA 23
REA H22 H22 H 0 1 N N N 21.425 31.330 15.288 -5.462 -0.353 -2.044 H22 REA 24
REA H31 H31 H 0 1 N N N 19.501 30.227 14.295 -6.673 -0.278 0.759 H31 REA 25
REA H32 H32 H 0 1 N N N 21.001 29.250 14.148 -7.349 -1.234 -0.586 H32 REA 26
REA H41 H41 H 0 1 N N N 18.613 28.835 15.931 -5.756 -2.511 0.938 H41 REA 27
REA H42 H42 H 0 1 N N N 19.335 27.730 14.713 -5.322 -2.531 -0.786 H42 REA 28
REA H7 H7 H 0 1 N N N 23.276 28.162 18.329 -2.337 1.230 1.191 H7 REA 29
REA H8 H8 H 0 1 N N N 20.840 26.645 19.217 -1.482 -1.100 -0.622 H8 REA 30
REA H10 H10 H 0 1 N N N 21.127 25.256 20.977 0.903 -1.241 -0.842 H10 REA 31
REA H11 H11 H 0 1 N N N 23.902 25.189 22.440 2.425 0.985 0.706 H11 REA 32
REA H12 H12 H 0 1 N N N 21.216 23.774 22.743 3.286 -1.383 -1.063 H12 REA 33
REA H14 H14 H 0 1 N N N 21.127 22.292 24.490 5.667 -1.451 -1.376 H14 REA 34
REA H161 H161 H 0 0 N N N 22.984 30.265 18.604 -5.802 1.316 1.303 H161 REA 35
REA H162 H162 H 0 0 N N N 22.618 31.709 17.601 -4.426 2.415 1.044 H162 REA 36
REA H163 H163 H 0 0 N N N 21.302 30.887 18.506 -5.911 2.605 0.081 H163 REA 37
REA H171 H171 H 0 0 N N N 24.033 29.127 16.595 -4.598 2.394 -2.077 H171 REA 38
REA H172 H172 H 0 0 N N N 23.095 28.989 15.069 -3.146 2.335 -1.050 H172 REA 39
REA H173 H173 H 0 0 N N N 23.683 30.595 15.620 -3.439 1.054 -2.251 H173 REA 40
REA H181 H181 H 0 0 N N N 20.397 25.979 17.736 -3.448 -3.187 1.201 H181 REA 41
REA H182 H182 H 0 0 N N N 18.761 26.584 17.308 -2.145 -2.194 0.503 H182 REA 42
REA H183 H183 H 0 0 N N N 19.786 25.804 16.056 -2.831 -1.775 2.092 H183 REA 43
REA H191 H191 H 0 0 N N N 24.647 26.327 21.238 0.171 1.159 2.216 H191 REA 44
REA H192 H192 H 0 0 N N N 24.702 26.559 19.458 0.993 2.008 0.885 H192 REA 45
REA H193 H193 H 0 0 N N N 24.252 27.929 20.529 -0.774 2.125 1.058 H193 REA 46
REA H201 H201 H 0 0 N N N 24.620 23.168 25.369 5.026 0.871 1.762 H201 REA 47
REA H202 H202 H 0 0 N N N 24.965 23.516 23.641 5.707 1.771 0.386 H202 REA 48
REA H203 H203 H 0 0 N N N 24.360 24.822 24.717 3.952 1.795 0.685 H203 REA 49
REA HO2 HO2 H 0 1 N N N 22.244 21.180 27.713 9.006 -0.469 -0.490 HO2 REA 50
#
loop_
_chem_comp_bond.comp_id
_chem_comp_bond.atom_id_1
_chem_comp_bond.atom_id_2
_chem_comp_bond.value_order
_chem_comp_bond.pdbx_aromatic_flag
_chem_comp_bond.pdbx_stereo_config
_chem_comp_bond.pdbx_ordinal
REA C1 C2 SING N N 1
REA C1 C6 SING N N 2
REA C1 C16 SING N N 3
REA C1 C17 SING N N 4
REA C2 C3 SING N N 5
REA C2 H21 SING N N 6
REA C2 H22 SING N N 7
REA C3 C4 SING N N 8
REA C3 H31 SING N N 9
REA C3 H32 SING N N 10
REA C4 C5 SING N N 11
REA C4 H41 SING N N 12
REA C4 H42 SING N N 13
REA C5 C6 DOUB N N 14
REA C5 C18 SING N N 15
REA C6 C7 SING N N 16
REA C7 C8 DOUB N E 17
REA C7 H7 SING N N 18
REA C8 C9 SING N N 19
REA C8 H8 SING N N 20
REA C9 C10 DOUB N E 21
REA C9 C19 SING N N 22
REA C10 C11 SING N N 23
REA C10 H10 SING N N 24
REA C11 C12 DOUB N E 25
REA C11 H11 SING N N 26
REA C12 C13 SING N N 27
REA C12 H12 SING N N 28
REA C13 C14 DOUB N E 29
REA C13 C20 SING N N 30
REA C14 C15 SING N N 31
REA C14 H14 SING N N 32
REA C15 O1 DOUB N N 33
REA C15 O2 SING N N 34
REA C16 H161 SING N N 35
REA C16 H162 SING N N 36
REA C16 H163 SING N N 37
REA C17 H171 SING N N 38
REA C17 H172 SING N N 39
REA C17 H173 SING N N 40
REA C18 H181 SING N N 41
REA C18 H182 SING N N 42
REA C18 H183 SING N N 43
REA C19 H191 SING N N 44
REA C19 H192 SING N N 45
REA C19 H193 SING N N 46
REA C20 H201 SING N N 47
REA C20 H202 SING N N 48
REA C20 H203 SING N N 49
REA O2 HO2 SING N N 50
#
loop_
_pdbx_chem_comp_descriptor.comp_id
_pdbx_chem_comp_descriptor.type
_pdbx_chem_comp_descriptor.program
_pdbx_chem_comp_descriptor.program_version
_pdbx_chem_comp_descriptor.descriptor
REA SMILES ACDLabs 12.01 "C1(CCCC(=C1\C=C\C(=C\C=C\C(=C\C(=O)O)C)C)C)(C)C"
REA InChI InChI 1.03 "InChI=1S/C20H28O2/c1-15(8-6-9-16(2)14-19(21)22)11-12-18-17(3)10-7-13-20(18,4)5/h6,8-9,11-12,14H,7,10,13H2,1-5H3,(H,21,22)/b9-6+,12-11+,15-8+,16-14+"
REA InChIKey InChI 1.03 SHGAZHPCJJPHSC-YCNIQYBTSA-N
REA SMILES_CANONICAL CACTVS 3.385 "CC1=C(\C=C\C(C)=C\C=C\C(C)=C\C(O)=O)C(C)(C)CCC1"
REA SMILES CACTVS 3.385 "CC1=C(C=CC(C)=CC=CC(C)=CC(O)=O)C(C)(C)CCC1"
REA SMILES_CANONICAL "OpenEye OEToolkits" 1.7.6 "CC1=C(C(CCC1)(C)C)/C=C/C(=C/C=C/C(=C/C(=O)O)/C)/C"
REA SMILES "OpenEye OEToolkits" 1.7.6 "CC1=C(C(CCC1)(C)C)C=CC(=CC=CC(=CC(=O)O)C)C"
#
loop_
_pdbx_chem_comp_identifier.comp_id
_pdbx_chem_comp_identifier.type
_pdbx_chem_comp_identifier.program
_pdbx_chem_comp_identifier.program_version
_pdbx_chem_comp_identifier.identifier
REA "SYSTEMATIC NAME" ACDLabs 12.01 "retinoic acid"
REA "SYSTEMATIC NAME" "OpenEye OEToolkits" 1.7.6 "(2E,4E,6E,8E)-3,7-dimethyl-9-(2,6,6-trimethylcyclohexen-1-yl)nona-2,4,6,8-tetraenoic acid"
#
loop_
_pdbx_chem_comp_audit.comp_id
_pdbx_chem_comp_audit.action_type
_pdbx_chem_comp_audit.date
_pdbx_chem_comp_audit.processing_site
REA "Create component" 1999-07-08 RCSB
REA "Modify descriptor" 2011-06-04 RCSB
REA "Other modification" 2016-10-18 RCSB
#

189
test/RXA.cif Normal file
View File

@@ -0,0 +1,189 @@
data_RXA
#
_chem_comp.id RXA
_chem_comp.name "RENAMED RETINOIC ACID"
_chem_comp.type NON-POLYMER
_chem_comp.pdbx_type HETAIN
_chem_comp.formula "C20 H28 O2"
_chem_comp.mon_nstd_parent_comp_id ?
_chem_comp.pdbx_synonyms ?
_chem_comp.pdbx_formal_charge 0
_chem_comp.pdbx_initial_date 1999-07-08
_chem_comp.pdbx_modified_date 2016-10-18
_chem_comp.pdbx_ambiguous_flag N
_chem_comp.pdbx_release_status REL
_chem_comp.pdbx_replaced_by ?
_chem_comp.pdbx_replaces 3KV
_chem_comp.formula_weight 300.435
_chem_comp.one_letter_code ?
_chem_comp.three_letter_code RXA
_chem_comp.pdbx_model_coordinates_details ?
_chem_comp.pdbx_model_coordinates_missing_flag N
_chem_comp.pdbx_ideal_coordinates_details Corina
_chem_comp.pdbx_ideal_coordinates_missing_flag N
_chem_comp.pdbx_model_coordinates_db_code 1CBS
_chem_comp.pdbx_subcomponent_list ?
_chem_comp.pdbx_processing_site RCSB
#
loop_
_chem_comp_atom.comp_id
_chem_comp_atom.atom_id
_chem_comp_atom.alt_atom_id
_chem_comp_atom.type_symbol
_chem_comp_atom.charge
_chem_comp_atom.pdbx_align
_chem_comp_atom.pdbx_aromatic_flag
_chem_comp_atom.pdbx_leaving_atom_flag
_chem_comp_atom.pdbx_stereo_config
_chem_comp_atom.model_Cartn_x
_chem_comp_atom.model_Cartn_y
_chem_comp_atom.model_Cartn_z
_chem_comp_atom.pdbx_model_Cartn_x_ideal
_chem_comp_atom.pdbx_model_Cartn_y_ideal
_chem_comp_atom.pdbx_model_Cartn_z_ideal
_chem_comp_atom.pdbx_component_atom_id
_chem_comp_atom.pdbx_component_comp_id
_chem_comp_atom.pdbx_ordinal
RXA C1 C1 C 0 1 N N N 21.972 29.831 16.739 -4.684 0.932 -0.497 C1 RXA 1
RXA C2 C2 C 0 1 N N N 20.921 30.524 15.841 -5.837 0.190 -1.176 C2 RXA 2
RXA C3 C3 C 0 1 N N N 20.245 29.635 14.848 -6.441 -0.798 -0.171 C3 RXA 3
RXA C4 C4 C 0 1 N N N 19.555 28.479 15.488 -5.418 -1.903 0.100 C4 RXA 4
RXA C5 C5 C 0 1 N N N 20.389 27.812 16.587 -4.082 -1.301 0.429 C5 RXA 5
RXA C6 C6 C 0 1 N N N 21.425 28.446 17.218 -3.756 -0.048 0.161 C6 RXA 6
RXA C7 C7 C 0 1 N N N 22.242 27.851 18.297 -2.457 0.396 0.516 C7 RXA 7
RXA C8 C8 C 0 1 N N N 21.868 26.977 19.240 -1.363 -0.229 0.007 C8 RXA 8
RXA C9 C9 C 0 1 N N N 22.705 26.434 20.286 -0.076 0.257 0.298 C9 RXA 9
RXA C10 C10 C 0 1 N N N 22.159 25.536 21.131 1.022 -0.370 -0.213 C10 RXA 10
RXA C11 C11 C 0 1 N N N 22.875 24.924 22.234 2.306 0.115 0.077 C11 RXA 11
RXA C12 C12 C 0 1 N N N 22.237 24.026 22.990 3.405 -0.513 -0.435 C12 RXA 12
RXA C13 C13 C 0 1 N N N 22.856 23.377 24.125 4.689 -0.028 -0.144 C13 RXA 13
RXA C14 C14 C 0 1 N N N 22.135 22.473 24.834 5.787 -0.655 -0.656 C14 RXA 14
RXA C15 C15 C 0 1 N N N 22.563 21.710 26.016 7.077 -0.265 -0.244 C15 RXA 15
RXA C16 C16 C 0 1 N N N 22.238 30.737 17.948 -5.246 1.886 0.559 C16 RXA 16
RXA C17 C17 C 0 1 N N N 23.292 29.620 15.948 -3.911 1.737 -1.544 C17 RXA 17
RXA C18 C18 C 0 1 N N N 19.791 26.449 16.947 -3.056 -2.175 1.103 C18 RXA 18
RXA C19 C19 C 0 1 N N N 24.181 26.841 20.385 0.090 1.471 1.175 C19 RXA 19
RXA C20 C20 C 0 1 N N N 24.303 23.747 24.489 4.855 1.186 0.733 C20 RXA 20
RXA O1 O1 O 0 1 N N N 23.640 21.075 25.978 7.210 0.553 0.648 O1 RXA 21
RXA O2 O2 O 0 1 N N N 21.840 21.712 27.037 8.166 -0.798 -0.840 O2 RXA 22
RXA H21 H21 H 0 1 N N N 20.147 30.955 16.494 -6.598 0.905 -1.490 H21 RXA 23
RXA H22 H22 H 0 1 N N N 21.425 31.330 15.288 -5.462 -0.353 -2.044 H22 RXA 24
RXA H31 H31 H 0 1 N N N 19.501 30.227 14.295 -6.673 -0.278 0.759 H31 RXA 25
RXA H32 H32 H 0 1 N N N 21.001 29.250 14.148 -7.349 -1.234 -0.586 H32 RXA 26
RXA H41 H41 H 0 1 N N N 18.613 28.835 15.931 -5.756 -2.511 0.938 H41 RXA 27
RXA H42 H42 H 0 1 N N N 19.335 27.730 14.713 -5.322 -2.531 -0.786 H42 RXA 28
RXA H7 H7 H 0 1 N N N 23.276 28.162 18.329 -2.337 1.230 1.191 H7 RXA 29
RXA H8 H8 H 0 1 N N N 20.840 26.645 19.217 -1.482 -1.100 -0.622 H8 RXA 30
RXA H10 H10 H 0 1 N N N 21.127 25.256 20.977 0.903 -1.241 -0.842 H10 RXA 31
RXA H11 H11 H 0 1 N N N 23.902 25.189 22.440 2.425 0.985 0.706 H11 RXA 32
RXA H12 H12 H 0 1 N N N 21.216 23.774 22.743 3.286 -1.383 -1.063 H12 RXA 33
RXA H14 H14 H 0 1 N N N 21.127 22.292 24.490 5.667 -1.451 -1.376 H14 RXA 34
RXA H161 H161 H 0 0 N N N 22.984 30.265 18.604 -5.802 1.316 1.303 H161 RXA 35
RXA H162 H162 H 0 0 N N N 22.618 31.709 17.601 -4.426 2.415 1.044 H162 RXA 36
RXA H163 H163 H 0 0 N N N 21.302 30.887 18.506 -5.911 2.605 0.081 H163 RXA 37
RXA H171 H171 H 0 0 N N N 24.033 29.127 16.595 -4.598 2.394 -2.077 H171 RXA 38
RXA H172 H172 H 0 0 N N N 23.095 28.989 15.069 -3.146 2.335 -1.050 H172 RXA 39
RXA H173 H173 H 0 0 N N N 23.683 30.595 15.620 -3.439 1.054 -2.251 H173 RXA 40
RXA H181 H181 H 0 0 N N N 20.397 25.979 17.736 -3.448 -3.187 1.201 H181 RXA 41
RXA H182 H182 H 0 0 N N N 18.761 26.584 17.308 -2.145 -2.194 0.503 H182 RXA 42
RXA H183 H183 H 0 0 N N N 19.786 25.804 16.056 -2.831 -1.775 2.092 H183 RXA 43
RXA H191 H191 H 0 0 N N N 24.647 26.327 21.238 0.171 1.159 2.216 H191 RXA 44
RXA H192 H192 H 0 0 N N N 24.702 26.559 19.458 0.993 2.008 0.885 H192 RXA 45
RXA H193 H193 H 0 0 N N N 24.252 27.929 20.529 -0.774 2.125 1.058 H193 RXA 46
RXA H201 H201 H 0 0 N N N 24.620 23.168 25.369 5.026 0.871 1.762 H201 RXA 47
RXA H202 H202 H 0 0 N N N 24.965 23.516 23.641 5.707 1.771 0.386 H202 RXA 48
RXA H203 H203 H 0 0 N N N 24.360 24.822 24.717 3.952 1.795 0.685 H203 RXA 49
RXA HO2 HO2 H 0 1 N N N 22.244 21.180 27.713 9.006 -0.469 -0.490 HO2 RXA 50
#
loop_
_chem_comp_bond.comp_id
_chem_comp_bond.atom_id_1
_chem_comp_bond.atom_id_2
_chem_comp_bond.value_order
_chem_comp_bond.pdbx_aromatic_flag
_chem_comp_bond.pdbx_stereo_config
_chem_comp_bond.pdbx_ordinal
RXA C1 C2 SING N N 1
RXA C1 C6 SING N N 2
RXA C1 C16 SING N N 3
RXA C1 C17 SING N N 4
RXA C2 C3 SING N N 5
RXA C2 H21 SING N N 6
RXA C2 H22 SING N N 7
RXA C3 C4 SING N N 8
RXA C3 H31 SING N N 9
RXA C3 H32 SING N N 10
RXA C4 C5 SING N N 11
RXA C4 H41 SING N N 12
RXA C4 H42 SING N N 13
RXA C5 C6 DOUB N N 14
RXA C5 C18 SING N N 15
RXA C6 C7 SING N N 16
RXA C7 C8 DOUB N E 17
RXA C7 H7 SING N N 18
RXA C8 C9 SING N N 19
RXA C8 H8 SING N N 20
RXA C9 C10 DOUB N E 21
RXA C9 C19 SING N N 22
RXA C10 C11 SING N N 23
RXA C10 H10 SING N N 24
RXA C11 C12 DOUB N E 25
RXA C11 H11 SING N N 26
RXA C12 C13 SING N N 27
RXA C12 H12 SING N N 28
RXA C13 C14 DOUB N E 29
RXA C13 C20 SING N N 30
RXA C14 C15 SING N N 31
RXA C14 H14 SING N N 32
RXA C15 O1 DOUB N N 33
RXA C15 O2 SING N N 34
RXA C16 H161 SING N N 35
RXA C16 H162 SING N N 36
RXA C16 H163 SING N N 37
RXA C17 H171 SING N N 38
RXA C17 H172 SING N N 39
RXA C17 H173 SING N N 40
RXA C18 H181 SING N N 41
RXA C18 H182 SING N N 42
RXA C18 H183 SING N N 43
RXA C19 H191 SING N N 44
RXA C19 H192 SING N N 45
RXA C19 H193 SING N N 46
RXA C20 H201 SING N N 47
RXA C20 H202 SING N N 48
RXA C20 H203 SING N N 49
RXA O2 HO2 SING N N 50
#
loop_
_pdbx_chem_comp_descriptor.comp_id
_pdbx_chem_comp_descriptor.type
_pdbx_chem_comp_descriptor.program
_pdbx_chem_comp_descriptor.program_version
_pdbx_chem_comp_descriptor.descriptor
RXA SMILES ACDLabs 12.01 "C1(CCCC(=C1\C=C\C(=C\C=C\C(=C\C(=O)O)C)C)C)(C)C"
RXA InChI InChI 1.03 "InChI=1S/C20H28O2/c1-15(8-6-9-16(2)14-19(21)22)11-12-18-17(3)10-7-13-20(18,4)5/h6,8-9,11-12,14H,7,10,13H2,1-5H3,(H,21,22)/b9-6+,12-11+,15-8+,16-14+"
RXA InChIKey InChI 1.03 SHGAZHPCJJPHSC-YCNIQYBTSA-N
RXA SMILES_CANONICAL CACTVS 3.385 "CC1=C(\C=C\C(C)=C\C=C\C(C)=C\C(O)=O)C(C)(C)CCC1"
RXA SMILES CACTVS 3.385 "CC1=C(C=CC(C)=CC=CC(C)=CC(O)=O)C(C)(C)CCC1"
RXA SMILES_CANONICAL "OpenEye OEToolkits" 1.7.6 "CC1=C(C(CCC1)(C)C)/C=C/C(=C/C=C/C(=C/C(=O)O)/C)/C"
RXA SMILES "OpenEye OEToolkits" 1.7.6 "CC1=C(C(CCC1)(C)C)C=CC(=CC=CC(=CC(=O)O)C)C"
#
loop_
_pdbx_chem_comp_identifier.comp_id
_pdbx_chem_comp_identifier.type
_pdbx_chem_comp_identifier.program
_pdbx_chem_comp_identifier.program_version
_pdbx_chem_comp_identifier.identifier
RXA "SYSTEMATIC NAME" ACDLabs 12.01 "retinoic acid"
RXA "SYSTEMATIC NAME" "OpenEye OEToolkits" 1.7.6 "(2E,4E,6E,8E)-3,7-dimethyl-9-(2,6,6-trimethylcyclohexen-1-yl)nona-2,4,6,8-tetraenoic acid"
#
loop_
_pdbx_chem_comp_audit.comp_id
_pdbx_chem_comp_audit.action_type
_pdbx_chem_comp_audit.date
_pdbx_chem_comp_audit.processing_site
RXA "Create component" 1999-07-08 RCSB
RXA "Modify descriptor" 2011-06-04 RCSB
RXA "Other modification" 2016-10-18 RCSB
#

152
test/UN_.cif Normal file
View File

@@ -0,0 +1,152 @@
#
data_comp_list
loop_
_chem_comp.id
_chem_comp.three_letter_code
_chem_comp.name
_chem_comp.group
_chem_comp.number_atoms_all
_chem_comp.number_atoms_nh
_chem_comp.desc_level
UN_ UN_ UN_NINE L-peptide 13 6 .
#
data_comp_UN_
#
loop_
_chem_comp_atom.comp_id
_chem_comp_atom.atom_id
_chem_comp_atom.type_symbol
_chem_comp_atom.type_energy
_chem_comp_atom.charge
_chem_comp_atom.x
_chem_comp_atom.y
_chem_comp_atom.z
UN_ N N NT3 1 0.227 -1.259 0.452
UN_ H H H 0 0.069 -1.019 1.421
UN_ H2 H H 0 1.104 -1.640 0.356
UN_ H3 H H 0 -0.424 -1.909 0.174
UN_ CA C CH1 0 0.103 -0.030 -0.392
UN_ HA H H 0 0.160 -0.299 -1.339
UN_ CB C CH3 0 -1.244 0.625 -0.159
UN_ HB3 H H 0 -1.857 -0.018 0.234
UN_ HB2 H H 0 -1.605 0.932 -1.008
UN_ HB1 H H 0 -1.150 1.385 0.442
UN_ C C C 0 1.270 0.922 -0.094
UN_ O O O 0 2.008 1.323 -0.994
UN_ OXT O OC -1 1.498 1.305 1.054
loop_
_chem_comp_tree.comp_id
_chem_comp_tree.atom_id
_chem_comp_tree.atom_back
_chem_comp_tree.atom_forward
_chem_comp_tree.connect_type
UN_ N n/a CA START
UN_ H N . .
UN_ H2 N . .
UN_ H3 N . .
UN_ CA N C .
UN_ HA CA . .
UN_ CB CA HB3 .
UN_ HB1 CB . .
UN_ HB2 CB . .
UN_ HB3 CB . .
UN_ C CA . END
UN_ O C . .
UN_ OXT C . .
loop_
_chem_comp_bond.comp_id
_chem_comp_bond.atom_id_1
_chem_comp_bond.atom_id_2
_chem_comp_bond.type
_chem_comp_bond.aromatic
_chem_comp_bond.value_dist
_chem_comp_bond.value_dist_esd
UN_ CB CA SINGLE n 1.509 0.014
UN_ CA C SINGLE n 1.533 0.011
UN_ C O DOUBLE n 1.247 0.019
UN_ C OXT SINGLE n 1.247 0.019
UN_ CA N SINGLE n 1.482 0.010
UN_ CB HB3 SINGLE n 0.972 0.015
UN_ CB HB2 SINGLE n 0.972 0.015
UN_ CB HB1 SINGLE n 0.972 0.015
UN_ CA HA SINGLE n 0.986 0.020
UN_ N H SINGLE n 0.911 0.020
UN_ N H2 SINGLE n 0.911 0.020
UN_ N H3 SINGLE n 0.911 0.020
loop_
_chem_comp_angle.comp_id
_chem_comp_angle.atom_id_1
_chem_comp_angle.atom_id_2
_chem_comp_angle.atom_id_3
_chem_comp_angle.value_angle
_chem_comp_angle.value_angle_esd
UN_ CA CB HB3 109.546 1.50
UN_ CA CB HB2 109.546 1.50
UN_ CA CB HB1 109.546 1.50
UN_ HB3 CB HB2 109.386 1.50
UN_ HB3 CB HB1 109.386 1.50
UN_ HB2 CB HB1 109.386 1.50
UN_ CB CA C 111.490 1.50
UN_ CB CA N 109.912 1.50
UN_ CB CA HA 108.878 1.50
UN_ C CA N 109.627 1.50
UN_ C CA HA 108.541 1.50
UN_ N CA HA 108.529 1.50
UN_ CA C O 117.159 1.57
UN_ CA C OXT 117.159 1.57
UN_ O C OXT 125.683 1.50
UN_ CA N H 109.643 1.50
UN_ CA N H2 109.643 1.50
UN_ CA N H3 109.643 1.50
UN_ H N H2 109.028 2.41
UN_ H N H3 109.028 2.41
UN_ H2 N H3 109.028 2.41
loop_
_chem_comp_tor.comp_id
_chem_comp_tor.id
_chem_comp_tor.atom_id_1
_chem_comp_tor.atom_id_2
_chem_comp_tor.atom_id_3
_chem_comp_tor.atom_id_4
_chem_comp_tor.value_angle
_chem_comp_tor.value_angle_esd
_chem_comp_tor.period
UN_ hh1 N CA CB HB3 60.000 15.000 3
UN_ sp2_sp3_1 O C CA CB 0.000 10.00 6
UN_ sp3_sp3_10 CB CA N H 180.000 10.00 3
loop_
_chem_comp_chir.comp_id
_chem_comp_chir.id
_chem_comp_chir.atom_id_centre
_chem_comp_chir.atom_id_1
_chem_comp_chir.atom_id_2
_chem_comp_chir.atom_id_3
_chem_comp_chir.volume_sign
UN_ chir_1 CA N C CB positive
loop_
_chem_comp_plane_atom.comp_id
_chem_comp_plane_atom.plane_id
_chem_comp_plane_atom.atom_id
_chem_comp_plane_atom.dist_esd
UN_ plan-1 C 0.020
UN_ plan-1 CA 0.020
UN_ plan-1 O 0.020
UN_ plan-1 OXT 0.020
loop_
_pdbx_chem_comp_descriptor.comp_id
_pdbx_chem_comp_descriptor.type
_pdbx_chem_comp_descriptor.program
_pdbx_chem_comp_descriptor.program_version
_pdbx_chem_comp_descriptor.descriptor
UN_ SMILES ACDLabs 10.04 "O=C(O)C(N)C"
UN_ SMILES_CANONICAL CACTVS 3.341 "C[C@H](N)C(O)=O"
UN_ SMILES CACTVS 3.341 "C[CH](N)C(O)=O"
UN_ SMILES_CANONICAL "OpenEye OEToolkits" 1.5.0 "C[C@@H](C(=O)O)N"
UN_ SMILES "OpenEye OEToolkits" 1.5.0 "CC(C(=O)O)N"
UN_ InChI InChI 1.03 "InChI=1S/C3H7NO2/c1-2(4)3(5)6/h2H,4H2,1H3,(H,5,6)/t2-/m0/s1"
UN_ InChIKey InChI 1.03 QNAYBMKLOCPYGJ-REOHCLBHSA-N
UN_ ? acedrg 195 "dictionary generator"
UN_ ? acedrg_database 11 "data source"
UN_ ? rdkit 2017.03.2 "Chemoinformatics tool"
UN_ ? refmac5 5.8.0189 "optimization tool"

85
test/format-test.cpp Normal file
View File

@@ -0,0 +1,85 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define BOOST_TEST_ALTERNATIVE_INIT_API
#include <boost/test/included/unit_test.hpp>
#include <stdexcept>
#include <cif++.hpp>
namespace tt = boost::test_tools;
std::filesystem::path gTestDir = std::filesystem::current_path(); // filled in first test
// --------------------------------------------------------------------
cif::file operator""_cf(const char *text, size_t length)
{
struct membuf : public std::streambuf
{
membuf(char *text, size_t length)
{
this->setg(text, text, text + length);
}
} buffer(const_cast<char *>(text), length);
std::istream is(&buffer);
return cif::file(is);
}
// --------------------------------------------------------------------
bool init_unit_test()
{
cif::VERBOSE = 1;
// // not a test, just initialize test dir
// if (boost::unit_test::framework::master_test_suite().argc == 2)
// gTestDir = boost::unit_test::framework::master_test_suite().argv[1];
// // do this now, avoids the need for installing
// cif::add_file_resource("mmcif_pdbx.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic");
// // initialize CCD location
// cif::add_file_resource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");
return true;
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(fmt_1)
{
std::ostringstream os;
std::string world("world");
os << cif::format("Hello, %-10.10s, the magic number is %d and pi is %g", world, 42, M_PI);
BOOST_CHECK_EQUAL(os.str(), "Hello, world , the magic number is 42 and pi is 3.14159");
BOOST_CHECK_EQUAL(cif::format("Hello, %-10.10s, the magic number is %d and pi is %g", world, 42, M_PI).str(),
"Hello, world , the magic number is 42 and pi is 3.14159");
}

341
test/model-test.cpp Normal file
View File

@@ -0,0 +1,341 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define BOOST_TEST_ALTERNATIVE_INIT_API
#include <boost/test/included/unit_test.hpp>
#include <stdexcept>
#include <cif++.hpp>
// --------------------------------------------------------------------
cif::file operator""_cf(const char* text, size_t length)
{
struct membuf : public std::streambuf
{
membuf(char* text, size_t length)
{
this->setg(text, text, text + length);
}
} buffer(const_cast<char*>(text), length);
std::istream is(&buffer);
return cif::file(is);
}
// --------------------------------------------------------------------
std::filesystem::path gTestDir = std::filesystem::current_path();
bool init_unit_test()
{
cif::VERBOSE = 1;
// not a test, just initialize test dir
if (boost::unit_test::framework::master_test_suite().argc == 2)
gTestDir = boost::unit_test::framework::master_test_suite().argv[1];
// do this now, avoids the need for installing
cif::add_file_resource("mmcif_pdbx.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic");
// initialize CCD location
cif::add_file_resource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");
cif::compound_factory::instance().push_dictionary(gTestDir / "HEM.cif");
return true;
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(create_nonpoly_1)
{
cif::VERBOSE = 1;
cif::file file;
file.load_dictionary("mmcif_pdbx.dic");
file.emplace("TEST"); // create a datablock
cif::mm::structure structure(file);
std::string entity_id = structure.create_non_poly_entity("HEM");
auto atoms = R"(
data_HEM
loop_
_atom_site.id
_atom_site.group_PDB
_atom_site.type_symbol
_atom_site.label_atom_id
_atom_site.label_alt_id
_atom_site.pdbx_PDB_ins_code
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.B_iso_or_equiv
_atom_site.pdbx_formal_charge
1 HETATM C CHA . ? -5.248 39.769 -0.250 1.00 7.67 ?
2 HETATM C CHB . ? -3.774 36.790 3.280 1.00 7.05 ?
3 HETATM C CHC . ? -2.879 33.328 0.013 1.00 7.69 ?
4 HETATM C CHD . ? -4.342 36.262 -3.536 1.00 8.00 ?
# that's enough to test with
)"_cf;
atoms.load_dictionary("mmcif_pdbx.dic");
auto &hem_data = atoms["HEM"];
auto &atom_site = hem_data["atom_site"];
auto hem_atoms = atom_site.rows();
std::vector<cif::mm::atom> atom_data;
for (auto hem_atom: hem_atoms)
atom_data.emplace_back(hem_data, hem_atom);
structure.create_non_poly(entity_id, atom_data);
auto expected = R"(
data_TEST
#
_pdbx_nonpoly_scheme.asym_id A
_pdbx_nonpoly_scheme.ndb_seq_num 1
_pdbx_nonpoly_scheme.entity_id 1
_pdbx_nonpoly_scheme.mon_id HEM
_pdbx_nonpoly_scheme.pdb_seq_num 1
_pdbx_nonpoly_scheme.auth_seq_num 1
_pdbx_nonpoly_scheme.pdb_mon_id HEM
_pdbx_nonpoly_scheme.auth_mon_id HEM
_pdbx_nonpoly_scheme.pdb_strand_id A
_pdbx_nonpoly_scheme.pdb_ins_code .
#
loop_
_atom_site.id
_atom_site.auth_asym_id
_atom_site.label_alt_id
_atom_site.label_asym_id
_atom_site.label_atom_id
_atom_site.label_comp_id
_atom_site.label_entity_id
_atom_site.label_seq_id
_atom_site.type_symbol
_atom_site.group_PDB
_atom_site.pdbx_PDB_ins_code
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.B_iso_or_equiv
_atom_site.pdbx_formal_charge
_atom_site.auth_seq_id
_atom_site.auth_comp_id
_atom_site.auth_atom_id
_atom_site.pdbx_PDB_model_num
1 A ? A CHA HEM 1 . C HETATM ? -5.248 39.769 -0.250 1.00 7.67 ? 1 HEM CHA 1
2 A ? A CHB HEM 1 . C HETATM ? -3.774 36.790 3.280 1.00 7.05 ? 1 HEM CHB 1
3 A ? A CHC HEM 1 . C HETATM ? -2.879 33.328 0.013 1.00 7.69 ? 1 HEM CHC 1
4 A ? A CHD HEM 1 . C HETATM ? -4.342 36.262 -3.536 1.00 8.00 ? 1 HEM CHD 1
#
_chem_comp.id HEM
_chem_comp.type NON-POLYMER
_chem_comp.name 'PROTOPORPHYRIN IX CONTAINING FE'
_chem_comp.formula 'C34 H32 Fe N4 O4'
_chem_comp.formula_weight 616.487000
#
_pdbx_entity_nonpoly.entity_id 1
_pdbx_entity_nonpoly.name 'PROTOPORPHYRIN IX CONTAINING FE'
_pdbx_entity_nonpoly.comp_id HEM
#
_entity.id 1
_entity.type non-polymer
_entity.pdbx_description 'PROTOPORPHYRIN IX CONTAINING FE'
_entity.formula_weight 616.487000
#
_struct_asym.id A
_struct_asym.entity_id 1
_struct_asym.pdbx_blank_PDB_chainid_flag N
_struct_asym.pdbx_modified N
_struct_asym.details ?
#
_atom_type.symbol C
)"_cf;
expected.load_dictionary("mmcif_pdbx.dic");
if (not (expected.front() == structure.get_datablock()))
{
BOOST_TEST(false);
std::cout << expected.front() << std::endl
<< std::endl
<< structure.get_datablock() << std::endl;
}
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(test_atom_id)
{
auto data = R"(
data_TEST
#
_pdbx_nonpoly_scheme.asym_id A
_pdbx_nonpoly_scheme.ndb_seq_num 1
_pdbx_nonpoly_scheme.entity_id 1
_pdbx_nonpoly_scheme.mon_id HEM
_pdbx_nonpoly_scheme.pdb_seq_num 1
_pdbx_nonpoly_scheme.auth_seq_num 1
_pdbx_nonpoly_scheme.pdb_mon_id HEM
_pdbx_nonpoly_scheme.auth_mon_id HEM
_pdbx_nonpoly_scheme.pdb_strand_id A
_pdbx_nonpoly_scheme.pdb_ins_code .
#
loop_
_atom_site.id
_atom_site.auth_asym_id
_atom_site.label_alt_id
_atom_site.label_asym_id
_atom_site.label_atom_id
_atom_site.label_comp_id
_atom_site.label_entity_id
_atom_site.label_seq_id
_atom_site.type_symbol
_atom_site.group_PDB
_atom_site.pdbx_PDB_ins_code
_atom_site.Cartn_x
_atom_site.Cartn_y
_atom_site.Cartn_z
_atom_site.occupancy
_atom_site.B_iso_or_equiv
_atom_site.pdbx_formal_charge
_atom_site.auth_seq_id
_atom_site.auth_comp_id
_atom_site.auth_atom_id
_atom_site.pdbx_PDB_model_num
1 A ? A CHA HEM 1 . C HETATM ? -5.248 39.769 -0.250 1.00 7.67 ? 1 HEM CHA 1
3 A ? A CHB HEM 1 . C HETATM ? -3.774 36.790 3.280 1.00 7.05 ? 1 HEM CHB 1
2 A ? A CHC HEM 1 . C HETATM ? -2.879 33.328 0.013 1.00 7.69 ? 1 HEM CHC 1
4 A ? A CHD HEM 1 . C HETATM ? -4.342 36.262 -3.536 1.00 8.00 ? 1 HEM CHD 1
#
_chem_comp.id HEM
_chem_comp.type NON-POLYMER
_chem_comp.name 'PROTOPORPHYRIN IX CONTAINING FE'
_chem_comp.formula 'C34 H32 Fe N4 O4'
_chem_comp.formula_weight 616.487000
#
_pdbx_entity_nonpoly.entity_id 1
_pdbx_entity_nonpoly.name 'PROTOPORPHYRIN IX CONTAINING FE'
_pdbx_entity_nonpoly.comp_id HEM
#
_entity.id 1
_entity.type non-polymer
_entity.pdbx_description 'PROTOPORPHYRIN IX CONTAINING FE'
_entity.formula_weight 616.487000
#
_struct_asym.id A
_struct_asym.entity_id 1
_struct_asym.pdbx_blank_PDB_chainid_flag N
_struct_asym.pdbx_modified N
_struct_asym.details ?
#
)"_cf;
data.load_dictionary("mmcif_pdbx.dic");
cif::mm::structure s(data);
BOOST_CHECK_EQUAL(s.get_atom_by_id("1").get_label_atom_id(), "CHA");
BOOST_CHECK_EQUAL(s.get_atom_by_id("2").get_label_atom_id(), "CHC");
BOOST_CHECK_EQUAL(s.get_atom_by_id("3").get_label_atom_id(), "CHB");
BOOST_CHECK_EQUAL(s.get_atom_by_id("4").get_label_atom_id(), "CHD");
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(atom_numbers_1)
{
const std::filesystem::path test1(gTestDir / ".." / "examples" / "1cbs.cif.gz");
cif::file file(test1.string());
cif::mm::structure structure(file);
auto &db = file.front();
auto &atoms = structure.atoms();
auto ai = atoms.begin();
for (const auto &[id, label_asym_id, label_seq_id, label_atom_id, auth_seq_id, label_comp_id] :
db["atom_site"].rows<std::string,std::string,int,std::string,std::string,std::string>("id", "label_asym_id", "label_seq_id", "label_atom_id", "auth_seq_id", "label_comp_id"))
{
auto atom = structure.get_atom_by_id(id);
BOOST_CHECK_EQUAL(atom.get_label_asym_id(), label_asym_id);
BOOST_CHECK_EQUAL(atom.get_label_seq_id(), label_seq_id);
BOOST_CHECK_EQUAL(atom.get_label_atom_id(), label_atom_id);
BOOST_CHECK_EQUAL(atom.get_auth_seq_id(), auth_seq_id);
BOOST_CHECK_EQUAL(atom.get_label_comp_id(), label_comp_id);
BOOST_ASSERT(ai != atoms.end());
BOOST_CHECK_EQUAL(ai->id(), id);
++ai;
}
BOOST_ASSERT(ai == atoms.end());
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(test_load_2)
{
using namespace cif::literals;
const std::filesystem::path example(gTestDir / ".." / "examples" / "1cbs.cif.gz");
cif::file file(example.string());
auto &db = file.front();
cif::mm::structure s(file);
BOOST_CHECK(s.polymers().size() == 1);
auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"];
for (auto &poly : s.polymers())
{
BOOST_CHECK_EQUAL(poly.size(), pdbx_poly_seq_scheme.find("asym_id"_key == poly.get_asym_id()).size());
}
}
BOOST_AUTO_TEST_CASE(remove_residue_1)
{
using namespace cif::literals;
const std::filesystem::path example(gTestDir / ".." / "examples" / "1cbs.cif.gz");
cif::file file(example.string());
cif::mm::structure s(file);
s.remove_residue(s.get_residue("B"));
BOOST_CHECK_NO_THROW(s.validate_atoms());
}

View File

@@ -0,0 +1,74 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <cif++.hpp>
#include <iostream>
#include <fstream>
int main(int argc, char* argv[])
{
cif::VERBOSE = 3;
try
{
std::filesystem::path testdir = std::filesystem::current_path();
if (argc == 3)
testdir = argv[2];
if (std::filesystem::exists(testdir / ".." / "data" / "ccd-subset.cif"))
cif::add_file_resource("components.cif", testdir / ".." / "data" / "ccd-subset.cif");
if (std::filesystem::exists(testdir / ".." / "rsrc" / "mmcif_pdbx.dic"))
cif::add_file_resource("mmcif_pdbx.dic", testdir / ".." / "rsrc" / "mmcif_pdbx.dic");
cif::compound_factory::instance().push_dictionary(testdir / "REA.cif");
cif::compound_factory::instance().push_dictionary(testdir / "RXA.cif");
cif::file f(testdir / ".."/"examples"/"1cbs.cif.gz");
cif::mm::structure structure(f);
auto &res = structure.get_residue("B");
structure.change_residue(res, "RXA", {});
structure.cleanup_empty_categories();
f.save(std::cout);
if (not f.is_valid())
throw std::runtime_error("Invalid");
f.save(std::cout);
}
catch (const std::exception& e)
{
std::cerr << e.what() << std::endl;
exit(1);
}
return 0;
}

210
test/sugar-test.cpp Normal file
View File

@@ -0,0 +1,210 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define BOOST_TEST_ALTERNATIVE_INIT_API
#include <boost/test/included/unit_test.hpp>
#include <stdexcept>
#include <cif++.hpp>
// --------------------------------------------------------------------
cif::file operator""_cf(const char* text, size_t length)
{
struct membuf : public std::streambuf
{
membuf(char* text, size_t length)
{
this->setg(text, text, text + length);
}
} buffer(const_cast<char*>(text), length);
std::istream is(&buffer);
return cif::file(is);
}
// --------------------------------------------------------------------
std::filesystem::path gTestDir = std::filesystem::current_path();
bool init_unit_test()
{
cif::VERBOSE = 1;
// not a test, just initialize test dir
if (boost::unit_test::framework::master_test_suite().argc == 2)
gTestDir = boost::unit_test::framework::master_test_suite().argv[1];
// do this now, avoids the need for installing
cif::add_file_resource("mmcif_pdbx.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic");
// initialize CCD location
cif::add_file_resource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");
cif::compound_factory::instance().push_dictionary(gTestDir / "HEM.cif");
return true;
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(sugar_name_1)
{
using namespace cif::literals;
const std::filesystem::path example(gTestDir / "1juh.cif.gz");
cif::file file(example.string());
cif::mm::structure s(file);
auto &db = s.get_datablock();
auto &entity = db["entity"];
auto &branches = s.branches();
BOOST_CHECK_EQUAL(branches.size(), 4);
for (auto &branch : branches)
{
auto entityID = branch.front().get_entity_id();
auto name = entity.find1<std::string>("id"_key == entityID, "pdbx_description");
BOOST_CHECK_EQUAL(branch.name(), name);
}
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(create_sugar_1)
{
using namespace cif::literals;
const std::filesystem::path example(gTestDir / "1juh.cif.gz");
cif::file file(example.string());
cif::mm::structure s(file);
// collect atoms from asym L first
auto &NAG = s.get_residue("L");
auto nagAtoms = NAG.atoms();
std::vector<cif::row_initializer> ai;
auto &db = s.get_datablock();
auto &as = db["atom_site"];
// NOTE, row_initializer does not actually hold the data, so copy it first
// before it gets destroyed by remove_residue
for (auto r : as.find("label_asym_id"_key == "L"))
/*auto &ri = */ai.emplace_back(r);
s.remove_residue(NAG);
auto &branch = s.create_branch(ai);
BOOST_CHECK_EQUAL(branch.name(), "2-acetamido-2-deoxy-beta-D-glucopyranose");
BOOST_CHECK_EQUAL(branch.size(), 1);
BOOST_CHECK_EQUAL(branch[0].atoms().size(), nagAtoms.size());
BOOST_CHECK(file.is_valid());
file.save(gTestDir / "test-create_sugar_1.cif");
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(create_sugar_2)
{
using namespace cif::literals;
const std::filesystem::path example(gTestDir / "1juh.cif.gz");
cif::file file(example.string());
cif::mm::structure s(file);
// Get branch for H
auto &bH = s.get_branch_by_asym_id("H");
BOOST_CHECK_EQUAL(bH.size(), 2);
std::vector<cif::row_initializer> ai[2];
auto &db = s.get_datablock();
auto &as = db["atom_site"];
for (size_t i = 0; i < 2; ++i)
{
for (auto r : as.find("label_asym_id"_key == "H" and "auth_seq_id"_key == i + 1))
/*auto &ri = */ai[i].emplace_back(r);
}
s.remove_branch(bH);
BOOST_CHECK(file.is_valid());
auto &bN = s.create_branch(ai[0]);
s.extend_branch(bN.get_asym_id(), ai[1], 1, "O4");
BOOST_CHECK_EQUAL(bN.name(), "2-acetamido-2-deoxy-beta-D-glucopyranose-(1-4)-2-acetamido-2-deoxy-beta-D-glucopyranose");
BOOST_CHECK_EQUAL(bN.size(), 2);
BOOST_CHECK(file.is_valid());
file.save(gTestDir / "test-create_sugar_2.cif");
BOOST_CHECK_NO_THROW(cif::mm::structure s2(file));
}
// --------------------------------------------------------------------
BOOST_AUTO_TEST_CASE(delete_sugar_1)
{
using namespace cif::literals;
const std::filesystem::path example(gTestDir / "1juh.cif.gz");
cif::file file(example.string());
cif::mm::structure s(file);
// Get branch for H
auto &bG = s.get_branch_by_asym_id("G");
BOOST_CHECK_EQUAL(bG.size(), 4);
s.remove_residue(bG[1]);
BOOST_CHECK_EQUAL(bG.size(), 1);
auto &bN = s.get_branch_by_asym_id("G");
BOOST_CHECK_EQUAL(bN.name(), "2-acetamido-2-deoxy-beta-D-glucopyranose");
BOOST_CHECK_EQUAL(bN.size(), 1);
BOOST_CHECK(file.is_valid());
// file.save(gTestDir / "test-create_sugar_3.cif");
BOOST_CHECK_NO_THROW(cif::mm::structure s2(file));
}

168
test/unit-3d-test.cpp Normal file
View File

@@ -0,0 +1,168 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#define BOOST_TEST_ALTERNATIVE_INIT_API
#include <boost/test/included/unit_test.hpp>
#include <stdexcept>
#include <cif++.hpp>
#include <cif++/dictionary_parser.hpp>
#include <cif++/parser.hpp>
namespace tt = boost::test_tools;
std::filesystem::path gTestDir = std::filesystem::current_path(); // filled in first test
// --------------------------------------------------------------------
cif::file operator""_cf(const char *text, size_t length)
{
struct membuf : public std::streambuf
{
membuf(char *text, size_t length)
{
this->setg(text, text, text + length);
}
} buffer(const_cast<char *>(text), length);
std::istream is(&buffer);
return cif::file(is);
}
// --------------------------------------------------------------------
bool init_unit_test()
{
cif::VERBOSE = 1;
// not a test, just initialize test dir
if (boost::unit_test::framework::master_test_suite().argc == 2)
gTestDir = boost::unit_test::framework::master_test_suite().argv[1];
// do this now, avoids the need for installing
cif::add_file_resource("mmcif_pdbx.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic");
// initialize CCD location
cif::add_file_resource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");
return true;
}
// --------------------------------------------------------------------
// 3d tests
BOOST_AUTO_TEST_CASE(t1)
{
// std::random_device rnd;
// std::mt19937 gen(rnd());
// std::uniform_real_distribution<float> dis(0, 1);
// Quaternion q{ dis(gen), dis(gen), dis(gen), dis(gen) };
// q = Normalize(q);
// Quaternion q{ 0.1, 0.2, 0.3, 0.4 };
cif::quaternion q{0.5, 0.5, 0.5, 0.5};
q = normalize(q);
const auto &&[angle0, axis0] = cif::quaternion_to_angle_axis(q);
std::vector<cif::point> p1{
{16.979, 13.301, 44.555},
{18.150, 13.525, 43.680},
{18.656, 14.966, 43.784},
{17.890, 15.889, 44.078},
{17.678, 13.270, 42.255},
{16.248, 13.734, 42.347},
{15.762, 13.216, 43.724}};
auto p2 = p1;
cif::center_points(p1);
for (auto &p : p2)
p.rotate(q);
cif::center_points(p2);
auto q2 = cif::align_points(p1, p2);
const auto &&[angle, axis] = cif::quaternion_to_angle_axis(q2);
BOOST_TEST(std::fmod(360 + angle, 360) == std::fmod(360 - angle0, 360), tt::tolerance(0.01));
for (auto &p : p1)
p.rotate(q2);
float rmsd = cif::RMSd(p1, p2);
BOOST_TEST(rmsd < 1e-5);
// std::cout << "rmsd: " << RMSd(p1, p2) << std::endl;
}
BOOST_AUTO_TEST_CASE(t2)
{
cif::point p[] = {
{ 1, 1, 0 },
{ 2, 1, 0 },
{ 1, 2, 0 }
};
cif::point xp = cif::cross_product(p[1] - p[0], p[2] - p[0]);
auto q = cif::construct_from_angle_axis(45, xp); //mmcif::Normalize(Quaternion{45 * mmcif::kPI / 180, xp.mX, xp.mY, xp.mZ});
auto &&[angle, axis] = cif::quaternion_to_angle_axis(q);
BOOST_TEST(angle == 45, tt::tolerance(0.01));
}
BOOST_AUTO_TEST_CASE(t3)
{
cif::point p[] = {
{ 1, 1, 0 },
{ 2, 1, 0 },
{ 1, 2, 0 }
};
cif::point xp = cif::cross_product(p[1] - p[0], p[2] - p[0]);
auto q = cif::construct_from_angle_axis(45, xp); //mmcif::Normalize(Quaternion{45 * mmcif::kPI / 180, xp.mX, xp.mY, xp.mZ});
auto v = p[1];
v -= p[0];
v.rotate(q);
v += p[0];
std::cout << v << std::endl;
double a = cif::angle(v, p[0], p[1]);
BOOST_TEST(a == 45, tt::tolerance(0.01));
}

3060
test/unit-v2-test.cpp Normal file

File diff suppressed because it is too large Load Diff

55
tools/update-libcifpp-data.in Executable file
View File

@@ -0,0 +1,55 @@
#!/bin/bash
set -e
if [ "$EUID" -ne 0 ]
then echo "Please run as root"
exit
fi
if [ -f /etc/libcifpp.conf ] ; then
. /etc/libcifpp.conf
fi
# check to see if we're supposed to run at all
if [ "$update" != "true" ] ; then
exit
fi
# if cache directory doesn't exist, exit.
if ! [ -d @CIFPP_CACHE_DIR@ ]; then
exit
fi
fetch_dictionary () {
dict=$1
source=$2
wget -O${dict}.gz ${source}
# be careful not to nuke an existing dictionary file
# extract to a temporary file first
gunzip -c ${dict}.gz > ${dict}-tmp
# then move the extracted file to the final location
mv ${dict}-tmp ${dict}
# and clean up afterwards
rm ${dict}.gz
}
# fetch the dictionaries
fetch_dictionary "@CIFPP_CACHE_DIR@/mmcif_pdbx.dic" "https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic.gz"
fetch_dictionary "@CIFPP_CACHE_DIR@/components.cif" "ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz"
# notify subscribers
if [ -d /etc/libcifpp/cache-update.d ] && [ -x /bin/run-parts ]; then
run-parts --arg "@CIFPP_CACHE_DIR@" -- /etc/libcifpp/cache-update.d
fi
wget -O/var/cache/libcifpp/mmcif_ma.dic "https://github.com/ihmwg/ModelCIF/raw/master/dist/mmcif_ma.dic"