update for WIN32

Added cif::cell::get_volume()
new cif::item constructors
2026-06-05 06:25:52 +08:00 · 2023-11-02 14:12:17 +01:00 · 2023-10-19 11:58:21 +02:00 · 2023-10-19 09:51:10 +02:00 · 2023-10-17 15:24:21 +02:00 · 2023-10-17 15:22:35 +02:00
93 changed files with 13089 additions and 12440 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,6 @@ msvc/
 src/revision.hpp
 test/test-create_sugar_?.cif
 Testing/
+include/cif++/exports.hpp
+docs/api
+docs/conf.py
--- a/.readthedocs.yaml
+++ b/.readthedocs.yaml
@@ -0,0 +1,22 @@
+version: 2
+
+build:
+  os: ubuntu-22.04
+  tools:
+    python: "3.11"
+  apt_packages:
+    - doxygen
+    - cmake
+  jobs:
+    pre_build:
+      - cmake -S . -B build -DBUILD_DOCUMENTATION=ON
+      - cmake --build build --target Doxygen
+
+# Build from the docs/ directory with Sphinx
+sphinx:
+  configuration: docs/conf.py
+
+# Explicitly set the version of Python and its requirements
+python:
+  install:
+    - requirements: docs/requirements.txt
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -25,7 +25,7 @@
 cmake_minimum_required(VERSION 3.16)

 # set the project name
-project(cifpp VERSION 5.0.8 LANGUAGES CXX)
+project(libcifpp VERSION 5.2.3 LANGUAGES CXX)

 list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

@@ -36,21 +36,23 @@ include(CheckLibraryExists)
 include(CMakePackageConfigHelpers)
 include(CheckCXXSourceCompiles)
 include(GenerateExportHeader)
+include(CTest)
+include(CMakeDependentOption)

 set(CXX_EXTENSIONS OFF)
 set(CMAKE_CXX_STANDARD 20)
 set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
 # set(CMAKE_CXX_VISIBILITY_PRESET hidden)
 # set(CMAKE_VISIBILITY_INLINES_HIDDEN 1)
-
 if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
 	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
 elseif(MSVC)
 	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
 endif()

-# Building shared libraries?
-option(BUILD_SHARED_LIBS "Build a shared library instead of a static one" OFF)
+# Build documentation?
+option(BUILD_DOCUMENTATION "Build the documentation" OFF)

 # We do not want to write an export file for all our symbols...
 set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
@@ -58,6 +60,14 @@ set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
 # Optionally build a version to be installed inside CCP4
 option(BUILD_FOR_CCP4 "Build a version to be installed in CCP4" OFF)

+# Building shared libraries?
+if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.22)
+	cmake_policy(SET CMP0127 NEW)
+	cmake_dependent_option(BUILD_SHARED_LIBS "Build a shared library instead of a static one" OFF "NOT (BUILD_FOR_CCP4 AND WIN32)" ON)
+else()
+	option(BUILD_SHARED_LIBS "Build a shared library instead of a static one" OFF)
+endif()
+
 # Lots of code depend on the availability of the components.cif file
 option(CIFPP_DOWNLOAD_CCD "Download the CCD file components.cif during installation" ON)

@@ -80,8 +90,6 @@ else()
 endif()

 # Unit tests
-option(ENABLE_TESTING "Build test exectuables" OFF)
-
 if(BUILD_FOR_CCP4)
 	if("$ENV{CCP4}" STREQUAL "" OR NOT EXISTS $ENV{CCP4})
 		message(FATAL_ERROR "A CCP4 built was requested but CCP4 was not sourced")
@@ -97,22 +105,34 @@ if(BUILD_FOR_CCP4)
 	endif()
 endif()

+if(WIN32)
+    if(${CMAKE_SYSTEM_VERSION} GREATER_EQUAL 10) # Windows 10
+        add_definitions(-D _WIN32_WINNT=0x0A00)
+    elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.3) # Windows 8.1
+        add_definitions(-D _WIN32_WINNT=0x0603)
+    elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.2) # Windows 8
+        add_definitions(-D _WIN32_WINNT=0x0602)
+    elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.1) # Windows 7
+        add_definitions(-D _WIN32_WINNT=0x0601)
+    elseif(${CMAKE_SYSTEM_VERSION} EQUAL 6.0) # Windows Vista
+        add_definitions(-D _WIN32_WINNT=0x0600)
+    else() # Windows XP (5.1)
+        add_definitions(-D _WIN32_WINNT=0x0501)
+    endif()
+
+	add_definitions(-DNOMINMAX)
+endif()
+
 if(MSVC)
-	# make msvc standards compliant...
-	add_compile_options(/permissive-)
+    # make msvc standards compliant...
+    add_compile_options(/permissive- /bigobj)
+	add_link_options(/NODEFAULTLIB:library)

-	macro(get_WIN32_WINNT version)
-		if(WIN32 AND CMAKE_SYSTEM_VERSION)
-			set(ver ${CMAKE_SYSTEM_VERSION})
-			string(REPLACE "." "" ver ${ver})
-			string(REGEX REPLACE "([0-9])" "0\\1" ver ${ver})
-
-			set(${version} "0x${ver}")
-		endif()
-	endmacro()
-
-	get_WIN32_WINNT(ver)
-	add_definitions(-D_WIN32_WINNT=${ver})
+	if(BUILD_SHARED_LIBS)
+		set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>DLL")
+	else()
+		set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
+	endif()
 endif()

 # Libraries
@@ -157,17 +177,20 @@ if(MSVC)
 	set(_ZLIB_x86 "(x86)")
 	set(_ZLIB_SEARCH_NORMAL
 		PATHS "[HKEY_LOCAL_MACHINE\\SOFTWARE\\GnuWin32\\Zlib;InstallPath]"
-			"$ENV{ProgramFiles}/zlib"
-			"$ENV{ProgramFiles${_ZLIB_x86}}/zlib")
+		"$ENV{ProgramFiles}/zlib"
+		"$ENV{ProgramFiles${_ZLIB_x86}}/zlib")
 	unset(_ZLIB_x86)
 	list(APPEND _ZLIB_SEARCHES _ZLIB_SEARCH_NORMAL)

 	foreach(search ${_ZLIB_SEARCHES})
-    	find_library(ZLIB_LIBRARY NAMES zlibstatic NAMES_PER_DIR ${${search}} PATH_SUFFIXES lib)
+		find_library(ZLIB_LIBRARY NAMES zlibstatic NAMES_PER_DIR ${${search}} PATH_SUFFIXES lib)
 	endforeach()
 endif()
+
 find_package(ZLIB REQUIRED)

+find_package(Eigen3 REQUIRED)
+
 include(FindFilesystem)
 list(APPEND CIFPP_REQUIRED_LIBRARIES ${STDCPPFS_LIBRARY})

@@ -182,7 +205,7 @@ endif()

 # Create a revision file, containing the current git version info
 include(VersionString)
-write_version_header(${PROJECT_SOURCE_DIR}/src/ "LibCIFPP")
+write_version_header(${PROJECT_SOURCE_DIR}/src/ LIB_NAME "LibCIFPP")

 # SymOp data table
 if(CIFPP_RECREATE_SYMOP_DATA)
@@ -191,12 +214,12 @@ if(CIFPP_RECREATE_SYMOP_DATA)

 	add_custom_command(
 		OUTPUT ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
-		COMMAND $<TARGET_FILE:symop-map-generator> $ENV{CLIBD}/syminfo.lib ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
+		COMMAND $<TARGET_FILE:symop-map-generator> $ENV{CLIBD}/syminfo.lib $ENV{CLIBD}/symop.lib ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
 	)

 	add_custom_target(
 		OUTPUT ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
-		DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib"
+		DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib" "$ENV{CLIBD}/symop.lib"
 	)
 endif()

@@ -223,8 +246,9 @@ set(project_sources

 	${PROJECT_SOURCE_DIR}/src/pdb/cif2pdb.cpp
 	${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif.cpp
+	${PROJECT_SOURCE_DIR}/src/pdb/pdb_record.hpp
+	${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.hpp
 	${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.cpp
-	${PROJECT_SOURCE_DIR}/src/pdb/tls.cpp
 )

 set(project_headers
@@ -249,16 +273,17 @@ set(project_headers

 	${PROJECT_SOURCE_DIR}/include/cif++/model.hpp

+	${PROJECT_SOURCE_DIR}/include/cif++/pdb.hpp
+
 	${PROJECT_SOURCE_DIR}/include/cif++/pdb/cif2pdb.hpp
 	${PROJECT_SOURCE_DIR}/include/cif++/pdb/io.hpp
 	${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif.hpp
-	${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif_remark_3.hpp
 	${PROJECT_SOURCE_DIR}/include/cif++/pdb/tls.hpp
 )

 add_library(cifpp ${project_sources} ${project_headers} ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp)
 add_library(cifpp::cifpp ALIAS cifpp)
-generate_export_header(cifpp EXPORT_FILE_NAME cif++/exports.hpp)
+generate_export_header(cifpp EXPORT_FILE_NAME ${PROJECT_SOURCE_DIR}/include/cif++/exports.hpp)

 if(BOOST_REGEX)
 	target_compile_definitions(cifpp PRIVATE USE_BOOST_REGEX=1 BOOST_REGEX_STANDALONE=1)
@@ -277,7 +302,7 @@ target_include_directories(cifpp
 	"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
 )

-target_link_libraries(cifpp PUBLIC Threads::Threads ZLIB::ZLIB ${CIFPP_REQUIRED_LIBRARIES})
+target_link_libraries(cifpp PUBLIC Threads::Threads ZLIB::ZLIB ${CIFPP_REQUIRED_LIBRARIES} PRIVATE Eigen3::Eigen)

 if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
 	target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
@@ -292,29 +317,19 @@ if(CIFPP_DOWNLOAD_CCD)
 			file(MAKE_DIRECTORY ${PROJECT_SOURCE_DIR}/data/)
 		endif()

-		find_program(GUNZIP gunzip)
-
-		if(GUNZIP)
-			file(DOWNLOAD ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz ${COMPONENTS_CIF}.gz
-				SHOW_PROGRESS)
-			add_custom_command(OUTPUT ${COMPONENTS_CIF}
-				COMMAND ${GUNZIP} ${COMPONENTS_CIF}.gz
-				WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/data/)
-		else()
-			file(DOWNLOAD ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif ${COMPONENTS_CIF}
-				SHOW_PROGRESS)
-		endif()
+		file(DOWNLOAD https://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif ${COMPONENTS_CIF} SHOW_PROGRESS)
 	endif()

 	add_custom_target(COMPONENTS ALL DEPENDS ${COMPONENTS_CIF})
 endif()

 # Installation directories
-set(CIFPP_DATA_DIR "${CMAKE_INSTALL_FULL_DATADIR}/libcifpp")
+set(CIFPP_DATA_DIR "${CMAKE_INSTALL_FULL_DATADIR}/libcifpp" CACHE PATH "The directory where dictionary files are stored")
 target_compile_definitions(cifpp PUBLIC DATA_DIR="${CIFPP_DATA_DIR}")

 if(UNIX)
-	set(CIFPP_CACHE_DIR "${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/cache/libcifpp")
+	set(CIFPP_CACHE_DIR "${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/cache/libcifpp"
+		CACHE PATH "The directory where the update script stores new dictionary files")
 	target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")

 	set(CIFPP_ETC_DIR "${CMAKE_INSTALL_FULL_SYSCONFDIR}")
@@ -330,7 +345,7 @@ install(TARGETS cifpp

 if(MSVC AND BUILD_SHARED_LIBS)
 	install(
-		FILES $<TARGET_PDB_FILE:${PROJECT_NAME}>
+		FILES $<TARGET_PDB_FILE:cifpp>
 		DESTINATION ${CMAKE_INSTALL_LIBDIR}
 		OPTIONAL)
 endif()
@@ -353,12 +368,6 @@ install(
 	COMPONENT Devel
 )

-install(
-	FILES ${PROJECT_BINARY_DIR}/cif++/exports.hpp
-	DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/cif++
-	COMPONENT Devel
-)
-
 install(FILES
 	${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
 	${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
@@ -367,6 +376,16 @@ install(FILES
 	DESTINATION ${CIFPP_DATA_DIR}
 )

+if(${CIFPP_CACHE_DIR})
+	install(FILES
+		${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
+		${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
+		${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic
+		${COMPONENTS_CIF}
+		DESTINATION ${CIFPP_CACHE_DIR}
+	)
+endif()
+
 set(CONFIG_TEMPLATE_FILE ${PROJECT_SOURCE_DIR}/cmake/cifppConfig.cmake.in)

 configure_package_config_file(
@@ -383,11 +402,10 @@ install(FILES
 	COMPONENT Devel
 )

-set(cifpp_MAJOR_VERSION ${CMAKE_PROJECT_VERSION_MAJOR})
 set_target_properties(cifpp PROPERTIES
 	VERSION ${PROJECT_VERSION}
-	SOVERSION ${cifpp_MAJOR_VERSION}
-	INTERFACE_cifpp_MAJOR_VERSION ${cifpp_MAJOR_VERSION})
+	SOVERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}"
+	INTERFACE_cifpp_MAJOR_VERSION ${PROJECT_VERSION_MAJOR})

 set_property(TARGET cifpp APPEND PROPERTY
 	COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION
@@ -399,25 +417,10 @@ write_basic_package_version_file(
 	COMPATIBILITY AnyNewerVersion
 )

-# pkgconfig support
-set(prefix ${CMAKE_INSTALL_PREFIX})
-set(exec_prefix ${CMAKE_INSTALL_PREFIX})
-set(libdir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_LIBDIR})
-set(includedir ${CMAKE_INSTALL_PREFIX}/${CMAKE_INSTALL_INCLUDEDIR})
-
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/libcifpp.pc.in
-	${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc.in @ONLY)
-file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc
-	INPUT ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc.in)
-install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.pc DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig)
-
-if(ENABLE_TESTING)
-	enable_testing()
-
+if(BUILD_TESTING)
 	find_package(Boost REQUIRED)

-	list(APPEND CIFPP_tests unit-v2 unit-3d format model rename-compound sugar
-	)
+	list(APPEND CIFPP_tests unit-v2 unit-3d format model rename-compound sugar spinner)

 	foreach(CIFPP_TEST IN LISTS CIFPP_tests)
 		set(CIFPP_TEST "${CIFPP_TEST}-test")
@@ -436,18 +439,18 @@ if(ENABLE_TESTING)

 		add_custom_command(
 			OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch
-			COMMAND $<TARGET_FILE:${CIFPP_TEST}> -- ${PROJECT_SOURCE_DIR}/test)
+			COMMAND $<TARGET_FILE:${CIFPP_TEST}> -- ${CMAKE_CURRENT_SOURCE_DIR}/test)

 		add_test(NAME ${CIFPP_TEST}
-			COMMAND $<TARGET_FILE:${CIFPP_TEST}> -- ${PROJECT_SOURCE_DIR}/test)
+			COMMAND $<TARGET_FILE:${CIFPP_TEST}> -- ${CMAKE_CURRENT_SOURCE_DIR}/test)
 	endforeach()
 endif()

 # Optionally install the update scripts for CCD and dictionary files
 if(CIFPP_INSTALL_UPDATE_SCRIPT)
-	if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
+	if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAME} STREQUAL "GNU")
 		set(CIFPP_CRON_DIR "${CIFPP_ETC_DIR}/cron.weekly")
-	elseif(UNIX)	# assume all others are like FreeBSD... 
+	elseif(UNIX) # assume all others are like FreeBSD...
 		set(CIFPP_CRON_DIR "${CIFPP_ETC_DIR}/periodic/weekly")
 	else()
 		message(FATAL_ERROR "Don't know where to install the update script")
@@ -475,6 +478,10 @@ if(CIFPP_INSTALL_UPDATE_SCRIPT)
 	target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")
 endif()

+if(BUILD_DOCUMENTATION)
+	add_subdirectory(docs)
+endif()
+
 set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
 set(CPACK_SOURCE_TGZ ON)
 set(CPACK_SOURCE_TBZ2 OFF)
--- a/README.md
+++ b/README.md
@@ -1,46 +1,120 @@
-libcifpp
-========
+# libcifpp

-This library contains code to work with mmCIF and PDB files.
+This library contains code to work with mmCIF and legacy PDB files.

-Requirements
------------
+## Documentation
+
+The documentation can be found at https://www.hekkelman.com/libcifpp-doc/
+
+## Synopsis
+
+```c++
+// A simple program counting residues with an OXT atom
+
+#include <filesystem>
+#include <iostream>
+
+#include <cif++.hpp>
+
+namespace fs = std::filesystem;
+
+int main(int argc, char *argv[])
+{
+    if (argc != 2)
+        exit(1);
+
+    // Read file, can be PDB or mmCIF and can even be compressed with gzip.
+    cif::file file = cif::pdb::read(argv[1]);
+
+    if (file.empty())
+    {
+        std::cerr << "Empty file" << std::endl;
+        exit(1);
+    }
+
+    // Take the first datablock in the file
+    auto &db = file.front();
+
+    // Use the atom_site category
+    auto &atom_site = db["atom_site"];
+
+    // Count the atoms with atom-id "OXT"
+    auto n = atom_site.count(cif::key("label_atom_id") == "OXT");
+
+    std::cout << "File contains " << atom_site.size() << " atoms of which "
+              << n << (n == 1 ? " is" : " are") << " OXT" << std::endl
+              << "residues with an OXT are:" << std::endl;
+
+    // Loop over all atoms with atom-id "OXT" and print out some info.
+    // That info is extracted using structured binding in C++
+    for (const auto &[asym, comp, seqnr] :
+            atom_site.find<std::string, std::string, int>(
+                cif::key("label_atom_id") == "OXT",
+                "label_asym_id", "label_comp_id", "label_seq_id"))
+    {
+        std::cout << asym << ' ' << comp << ' ' << seqnr << std::endl;
+    }
+
+    return 0;
+}
+```
+
+## Installation
+
+You might be able to use libcifpp from a package manager used by your
+OS distribution. But most likely this package will be out-of-date.
+Therefore it is recommended to build *libcifpp* from code. It is not
+hard to do.
+
+### Requirements

 The code for this library was written in C++17. You therefore need a
-recent compiler to build it. For the development gcc 9.3 and clang 9.0
+recent compiler to build it. For the development gcc 9.4 and clang 9.0
 have been used as well as MSVC version 2019.

 Other requirements are:

+- [cmake](https://cmake.org) A build tool.
 - [mrc](https://github.com/mhekkel/mrc), a resource compiler that
  allows including data files into the executable making them easier to
-  install. Strictly this is optional, but at the expense of functionality.
+  install. Strictly speaking this is optional, but at the expense of
+  functionality.
+- [libeigen](https://eigen.tuxfamily.org/index.php?title=Main_Page), a
+  library to do amongst others matrix calculations. This usually can be
+  installed using your package manager, in Debian/Ubuntu it is called
+  `libeigen3-dev`
+- zlib, the development version of this library. On Debian/Ubuntu this
+  is the package `zlib1g-dev`.
+- [boost](https://www.boost.org).

-Building
--------
+When building using MS Visual Studio, you will also need [libzeep](https://github.com/mhekkel/libzeep)
+since MSVC does not yet provide a C++ template required by libcifpp.

-This library uses [cmake](https://cmake.org). The usual way of building
-and installing is to create a `build` directory and run cmake there.
+The Boost libraries are only needed in case you want to build the test
+code or if you are using GCC. That last condition is due to a long
+standing bug in the implementation of std::regex. It simply crashes
+on the regular expressions used in the mmcif_pdbx dictionary and so
+we use the boost regex implementation instead.

-On linux e.g. you would issue the following commands:
+### Building

+Building the code is as simple as typing:
+
+```console
+ git clone https://github.com/PDB-REDO/libcifpp.git --recurse-submodules
+ cd libcifpp
+ cmake -S . -B build -DCMAKE_INSTALL_PREFIX=$HOME/.local -DCMAKE_BUILD_TYPE=Release
+ cmake --build build
+ cmake --install build
 ```
-	git clone https://github.com/PDB-REDO/libcifpp.git
-	cd libcifpp
-	mkdir build
-	cd build
-	cmake ..
-	cmake --build . --config Release
-	ctest -C Release
-	cmake --install .
-```
+
 This checks out the source code from github, creates a new directory
-where cmake stores its files. Run a configure, build the code and run
-tests. And then it installs the library and auxiliary files.
+where cmake stores its files. Run a configure, build the code and then
+it installs the library and auxiliary files.

-The default is to install everything in `$HOME/.local` on Linux and
-`%LOCALAPPDATA%` on Windows (the AppData/Local folder in your home directory).
-You can change this by specifying the prefix with the
-[CMAKE_INSTALL_PREFIX](https://cmake.org/cmake/help/v3.21/variable/CMAKE_INSTALL_PREFIX.html)
-variable.
+Tests are created by default, and to test the code you can run:

+```console
+ cmake --build build
+ ctest --test-dir build
+```
--- a/53
+++ b/53
@@ -1,9 +1,62 @@
+Version 5.2.3
+- New constructors for cif::item, one taking std::optional values
+  and another taking only a name resulting in a value '.' (i.e. inapplicable).
+- added cif::cell::get_volume
+
+Version 5.2.2
+- Remove dependency on Eigen3 for users of libcifpp
+- Fix typos in documentation
+- Do not build latex files in documentation
+- Fixed conversion from string to integer, would fail on +2 e.g.
+- sqrt is not constexpr, thus kGoldenRatio should be const, not constexpr
+
+Version 5.2.1
+- New versionstring module
+- small fixes for generating documentation
+- correctly setting SONAME
+
+Version 5.2.0
+- With lots of documentation
+- Refactored coloured text output
+- Removed the subdirectory cif++/pdb, there now is a single
+  header file pdb.hpp for I/O of legacy PDB files.
+
+Version 5.1.3
+- Dropped pkgconfig support
+
+Version 5.1.2
+- New version string code
+- Added check for Eigen3 in cifppConfig.cmake
+
+Version 5.1.1
+- Added missing include <compare> in symmetry.hpp
+- Added empty() to matrix
+- Fix for parsing legacy PDB files with a last line that does
+  not end with a new line character.
+
+Version 5.1
+- New parser, optimised for speed
+- Fix in unique ID generator
+
+Version 5.0.10
+- Fix in progress_bar, was using too much CPU
+- Optimised mmCIF parser
+
+Version 5.0.9
+- Fix in dihedral angle calculations
+- Added create_water to model
+- Writing twin domain info in PDB files and more PDB fixes
+- remove_atom improved (remove struct_conn records)
+- Added a specialisation for category::find1<std::optional>
+- fix memory leak in category
+
 Version 5.0.8
 - implemented find_first, find_min, find_max and count in category
 - find1 now throws an exception if condition does not not exactly match one row
 - Change in writing out PDB files, now looking up the original auth_seq_num
  via the pdbx_xxx_scheme categories based on the atom_site.auth_seq_num ->
  pdbx_xxx_scheme.pdb_seq_num relationship.
+- fix memory leak in category

 Version 5.0.7.1
 - Use the implementation from zeep for std::experimental::is_detected
--- a/cmake/FindSphinx.cmake
+++ b/cmake/FindSphinx.cmake
@@ -0,0 +1,11 @@
+#Look for an executable called sphinx-build
+find_program(SPHINX_EXECUTABLE
+             NAMES sphinx-build
+             DOC "Path to sphinx-build executable")
+
+include(FindPackageHandleStandardArgs)
+
+#Handle standard arguments to find_package like REQUIRED and QUIET
+find_package_handle_standard_args(Sphinx
+                                  "Failed to find sphinx-build executable"
+                                  SPHINX_EXECUTABLE)
--- a/cmake/GetGitRevisionDescription.cmake
+++ b/cmake/GetGitRevisionDescription.cmake
@@ -1,284 +0,0 @@
-# - Returns a version string from Git
-#
-# These functions force a re-configure on each git commit so that you can
-# trust the values of the variables in your build system.
-#
-#  get_git_head_revision(<refspecvar> <hashvar> [ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR])
-#
-# Returns the refspec and sha hash of the current head revision
-#
-#  git_describe(<var> [<additional arguments to git describe> ...])
-#
-# Returns the results of git describe on the source tree, and adjusting
-# the output so that it tests false if an error occurs.
-#
-#  git_describe_working_tree(<var> [<additional arguments to git describe> ...])
-#
-# Returns the results of git describe on the working tree (--dirty option),
-# and adjusting the output so that it tests false if an error occurs.
-#
-#  git_get_exact_tag(<var> [<additional arguments to git describe> ...])
-#
-# Returns the results of git describe --exact-match on the source tree,
-# and adjusting the output so that it tests false if there was no exact
-# matching tag.
-#
-#  git_local_changes(<var>)
-#
-# Returns either "CLEAN" or "DIRTY" with respect to uncommitted changes.
-# Uses the return code of "git diff-index --quiet HEAD --".
-# Does not regard untracked files.
-#
-# Requires CMake 2.6 or newer (uses the 'function' command)
-#
-# Original Author:
-# 2009-2020 Ryan Pavlik <ryan.pavlik@gmail.com> <abiryan@ryand.net>
-# http://academic.cleardefinition.com
-#
-# Copyright 2009-2013, Iowa State University.
-# Copyright 2013-2020, Ryan Pavlik
-# Copyright 2013-2020, Contributors
-# SPDX-License-Identifier: BSL-1.0
-# Distributed under the Boost Software License, Version 1.0.
-# (See accompanying file LICENSE_1_0.txt or copy at
-# http://www.boost.org/LICENSE_1_0.txt)
-
-if(__get_git_revision_description)
-    return()
-endif()
-set(__get_git_revision_description YES)
-
-# We must run the following at "include" time, not at function call time,
-# to find the path to this module rather than the path to a calling list file
-get_filename_component(_gitdescmoddir ${CMAKE_CURRENT_LIST_FILE} PATH)
-
-# Function _git_find_closest_git_dir finds the next closest .git directory
-# that is part of any directory in the path defined by _start_dir.
-# The result is returned in the parent scope variable whose name is passed
-# as variable _git_dir_var. If no .git directory can be found, the
-# function returns an empty string via _git_dir_var.
-#
-# Example: Given a path C:/bla/foo/bar and assuming C:/bla/.git exists and
-# neither foo nor bar contain a file/directory .git. This wil return
-# C:/bla/.git
-#
-function(_git_find_closest_git_dir _start_dir _git_dir_var)
-    set(cur_dir "${_start_dir}")
-    set(git_dir "${_start_dir}/.git")
-    while(NOT EXISTS "${git_dir}")
-        # .git dir not found, search parent directories
-        set(git_previous_parent "${cur_dir}")
-        get_filename_component(cur_dir "${cur_dir}" DIRECTORY)
-        if(cur_dir STREQUAL git_previous_parent)
-            # We have reached the root directory, we are not in git
-            set(${_git_dir_var}
-                ""
-                PARENT_SCOPE)
-            return()
-        endif()
-        set(git_dir "${cur_dir}/.git")
-    endwhile()
-    set(${_git_dir_var}
-        "${git_dir}"
-        PARENT_SCOPE)
-endfunction()
-
-function(get_git_head_revision _refspecvar _hashvar)
-    _git_find_closest_git_dir("${CMAKE_CURRENT_SOURCE_DIR}" GIT_DIR)
-
-    if("${ARGN}" STREQUAL "ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR")
-        set(ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR TRUE)
-    else()
-        set(ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR FALSE)
-    endif()
-    if(NOT "${GIT_DIR}" STREQUAL "")
-        file(RELATIVE_PATH _relative_to_source_dir "${CMAKE_SOURCE_DIR}"
-             "${GIT_DIR}")
-        if("${_relative_to_source_dir}" MATCHES "[.][.]" AND NOT ALLOW_LOOKING_ABOVE_CMAKE_SOURCE_DIR)
-            # We've gone above the CMake root dir.
-            set(GIT_DIR "")
-        endif()
-    endif()
-    if("${GIT_DIR}" STREQUAL "")
-        set(${_refspecvar}
-            "GITDIR-NOTFOUND"
-            PARENT_SCOPE)
-        set(${_hashvar}
-            "GITDIR-NOTFOUND"
-            PARENT_SCOPE)
-        return()
-    endif()
-
-    # Check if the current source dir is a git submodule or a worktree.
-    # In both cases .git is a file instead of a directory.
-    #
-    if(NOT IS_DIRECTORY ${GIT_DIR})
-        # The following git command will return a non empty string that
-        # points to the super project working tree if the current
-        # source dir is inside a git submodule.
-        # Otherwise the command will return an empty string.
-        #
-        execute_process(
-            COMMAND "${GIT_EXECUTABLE}" rev-parse
-                    --show-superproject-working-tree
-            WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
-            OUTPUT_VARIABLE out
-            ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
-        if(NOT "${out}" STREQUAL "")
-            # If out is empty, GIT_DIR/CMAKE_CURRENT_SOURCE_DIR is in a submodule
-            file(READ ${GIT_DIR} submodule)
-            string(REGEX REPLACE "gitdir: (.*)$" "\\1" GIT_DIR_RELATIVE
-                                 ${submodule})
-            string(STRIP ${GIT_DIR_RELATIVE} GIT_DIR_RELATIVE)
-            get_filename_component(SUBMODULE_DIR ${GIT_DIR} PATH)
-            get_filename_component(GIT_DIR ${SUBMODULE_DIR}/${GIT_DIR_RELATIVE}
-                                   ABSOLUTE)
-            set(HEAD_SOURCE_FILE "${GIT_DIR}/HEAD")
-        else()
-            # GIT_DIR/CMAKE_CURRENT_SOURCE_DIR is in a worktree
-            file(READ ${GIT_DIR} worktree_ref)
-            # The .git directory contains a path to the worktree information directory
-            # inside the parent git repo of the worktree.
-            #
-            string(REGEX REPLACE "gitdir: (.*)$" "\\1" git_worktree_dir
-                                 ${worktree_ref})
-            string(STRIP ${git_worktree_dir} git_worktree_dir)
-            _git_find_closest_git_dir("${git_worktree_dir}" GIT_DIR)
-            set(HEAD_SOURCE_FILE "${git_worktree_dir}/HEAD")
-        endif()
-    else()
-        set(HEAD_SOURCE_FILE "${GIT_DIR}/HEAD")
-    endif()
-    set(GIT_DATA "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/git-data")
-    if(NOT EXISTS "${GIT_DATA}")
-        file(MAKE_DIRECTORY "${GIT_DATA}")
-    endif()
-
-    if(NOT EXISTS "${HEAD_SOURCE_FILE}")
-        return()
-    endif()
-    set(HEAD_FILE "${GIT_DATA}/HEAD")
-    configure_file("${HEAD_SOURCE_FILE}" "${HEAD_FILE}" COPYONLY)
-
-    configure_file("${_gitdescmoddir}/GetGitRevisionDescription.cmake.in"
-                   "${GIT_DATA}/grabRef.cmake" @ONLY)
-    include("${GIT_DATA}/grabRef.cmake")
-
-    set(${_refspecvar}
-        "${HEAD_REF}"
-        PARENT_SCOPE)
-    set(${_hashvar}
-        "${HEAD_HASH}"
-        PARENT_SCOPE)
-endfunction()
-
-function(git_describe _var)
-    if(NOT GIT_FOUND)
-        find_package(Git QUIET)
-    endif()
-    get_git_head_revision(refspec hash)
-    if(NOT GIT_FOUND)
-        set(${_var}
-            "GIT-NOTFOUND"
-            PARENT_SCOPE)
-        return()
-    endif()
-    if(NOT hash)
-        set(${_var}
-            "HEAD-HASH-NOTFOUND"
-            PARENT_SCOPE)
-        return()
-    endif()
-
-    # TODO sanitize
-    #if((${ARGN}" MATCHES "&&") OR
-    #	(ARGN MATCHES "||") OR
-    #	(ARGN MATCHES "\\;"))
-    #	message("Please report the following error to the project!")
-    #	message(FATAL_ERROR "Looks like someone's doing something nefarious with git_describe! Passed arguments ${ARGN}")
-    #endif()
-
-    #message(STATUS "Arguments to execute_process: ${ARGN}")
-
-    execute_process(
-        COMMAND "${GIT_EXECUTABLE}" describe --tags --always ${hash} ${ARGN}
-        WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
-        RESULT_VARIABLE res
-        OUTPUT_VARIABLE out
-        ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
-    if(NOT res EQUAL 0)
-        set(out "${out}-${res}-NOTFOUND")
-    endif()
-
-    set(${_var}
-        "${out}"
-        PARENT_SCOPE)
-endfunction()
-
-function(git_describe_working_tree _var)
-    if(NOT GIT_FOUND)
-        find_package(Git QUIET)
-    endif()
-    if(NOT GIT_FOUND)
-        set(${_var}
-            "GIT-NOTFOUND"
-            PARENT_SCOPE)
-        return()
-    endif()
-
-    execute_process(
-        COMMAND "${GIT_EXECUTABLE}" describe --dirty ${ARGN}
-        WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
-        RESULT_VARIABLE res
-        OUTPUT_VARIABLE out
-        ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
-    if(NOT res EQUAL 0)
-        set(out "${out}-${res}-NOTFOUND")
-    endif()
-
-    set(${_var}
-        "${out}"
-        PARENT_SCOPE)
-endfunction()
-
-function(git_get_exact_tag _var)
-    git_describe(out --exact-match ${ARGN})
-    set(${_var}
-        "${out}"
-        PARENT_SCOPE)
-endfunction()
-
-function(git_local_changes _var)
-    if(NOT GIT_FOUND)
-        find_package(Git QUIET)
-    endif()
-    get_git_head_revision(refspec hash)
-    if(NOT GIT_FOUND)
-        set(${_var}
-            "GIT-NOTFOUND"
-            PARENT_SCOPE)
-        return()
-    endif()
-    if(NOT hash)
-        set(${_var}
-            "HEAD-HASH-NOTFOUND"
-            PARENT_SCOPE)
-        return()
-    endif()
-
-    execute_process(
-        COMMAND "${GIT_EXECUTABLE}" diff-index --quiet HEAD --
-        WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
-        RESULT_VARIABLE res
-        OUTPUT_VARIABLE out
-        ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
-    if(res EQUAL 0)
-        set(${_var}
-            "CLEAN"
-            PARENT_SCOPE)
-    else()
-        set(${_var}
-            "DIRTY"
-            PARENT_SCOPE)
-    endif()
-endfunction()
--- a/cmake/GetGitRevisionDescription.cmake.in
+++ b/cmake/GetGitRevisionDescription.cmake.in
@@ -1,43 +0,0 @@
-#
-# Internal file for GetGitRevisionDescription.cmake
-#
-# Requires CMake 2.6 or newer (uses the 'function' command)
-#
-# Original Author:
-# 2009-2010 Ryan Pavlik <rpavlik@iastate.edu> <abiryan@ryand.net>
-# http://academic.cleardefinition.com
-# Iowa State University HCI Graduate Program/VRAC
-#
-# Copyright 2009-2012, Iowa State University
-# Copyright 2011-2015, Contributors
-# Distributed under the Boost Software License, Version 1.0.
-# (See accompanying file LICENSE_1_0.txt or copy at
-# http://www.boost.org/LICENSE_1_0.txt)
-# SPDX-License-Identifier: BSL-1.0
-
-set(HEAD_HASH)
-
-file(READ "@HEAD_FILE@" HEAD_CONTENTS LIMIT 1024)
-
-string(STRIP "${HEAD_CONTENTS}" HEAD_CONTENTS)
-if(HEAD_CONTENTS MATCHES "ref")
-	# named branch
-	string(REPLACE "ref: " "" HEAD_REF "${HEAD_CONTENTS}")
-	if(EXISTS "@GIT_DIR@/${HEAD_REF}")
-		configure_file("@GIT_DIR@/${HEAD_REF}" "@GIT_DATA@/head-ref" COPYONLY)
-	else()
-		configure_file("@GIT_DIR@/packed-refs" "@GIT_DATA@/packed-refs" COPYONLY)
-		file(READ "@GIT_DATA@/packed-refs" PACKED_REFS)
-		if(${PACKED_REFS} MATCHES "([0-9a-z]*) ${HEAD_REF}")
-			set(HEAD_HASH "${CMAKE_MATCH_1}")
-		endif()
-	endif()
-else()
-	# detached HEAD
-	configure_file("@GIT_DIR@/HEAD" "@GIT_DATA@/head-ref" COPYONLY)
-endif()
-
-if(NOT HEAD_HASH)
-	file(READ "@GIT_DATA@/head-ref" HEAD_HASH LIMIT 1024)
-	string(STRIP "${HEAD_HASH}" HEAD_HASH)
-endif()
--- a/cmake/VersionString.cmake
+++ b/cmake/VersionString.cmake
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: BSD-2-Clause

-# Copyright (c) 2021 NKI/AVL, Netherlands Cancer Institute
+# Copyright (c) 2021-2023 Maarten L. Hekkelman

 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions are met:
@@ -22,60 +22,254 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

+# This cmake extension writes out a revision.hpp file in a specified directory.
+# The file will contain a C++ inline function that can be used to write out
+# version information.
+
 cmake_minimum_required(VERSION 3.15)

+# We want the revision.hpp file to be updated whenever the status of the
+# git repository changes. Use the same technique as in GetGitRevisionDescription.cmake
+# from https://github.com/rpavlik/cmake-modules
+
+
+#[=======================================================================[.rst:
+.. command:: write_version_header
+
+  Write a file named revision.hpp containing version info::
+
+	write_version_header(<destdir>
+	                     [FILE_NAME <file-name>]
+						 [LIB_NAME <library-name>]
+	                    )
+  
+  This command will generate the code to write a file name
+  revision.hpp in the directory ``<destdir>``.
+  
+  ``FILE_NAME``
+	Specify the name of the file to create, default is ``revision.hpp``.
+
+  ``LIB_NAME``
+	Specify the library name which will be used as a prefix part for the
+	variables contained in the revision file.
+#]=======================================================================]
+
+# Record the location of this module now, not at the time the CMakeLists.txt
+# is being processed
+get_filename_component(_current_cmake_module_dir ${CMAKE_CURRENT_LIST_FILE} PATH)
+
+# First locate a .git file or directory.
+function(_get_git_dir _start_dir _variable)
+
+	set(cur_dir "${_start_dir}")
+	set(git_dir "${_start_dir}/.git")
+
+	while(NOT EXISTS "${git_dir}")
+		# .git dir not found, search parent directories
+		set(prev_dir "${cur_dir}")
+		get_filename_component(cur_dir "${cur_dir}" DIRECTORY)
+		if(cur_dir STREQUAL prev_dir OR cur_dir STREQUAL ${_start_dir})
+			# we are not in git since we either hit root or
+			# the ${_start_dir} which should be the top
+			set(${_variable} "" PARENT_SCOPE)
+			return()
+		endif()
+		set(git_dir "${cur_dir}/.git")
+	endwhile()
+
+	set(${_variable} "${git_dir}" PARENT_SCOPE)
+endfunction()
+
+# Locate the git refspec hash and load the hash
+# This code locates the file containing the git refspec/hash
+# and loads it. Doing it this way assures that each time the git
+# repository changes the revision.hpp file gets out of date.
+function(_get_git_hash _data_dir _variable)
+
+	# Be pessimistic
+	set(_variable "" PARENT_SCOPE)
+
+	# Load git package if needed
+	if(NOT GIT_FOUND)
+		find_package(Git QUIET)
+	endif()
+
+	# And fail if not found
+	if(NOT GIT_FOUND)
+		return()
+	endif()
+
+	# Locate the nearest .git file or directory
+	_get_git_dir(${CMAKE_CURRENT_SOURCE_DIR} GIT_DIR)
+
+	# And fail if not found
+	if("${GIT_DIR}" STREQUAL "")
+        return()
+    endif()
+
+    # Check if the current source dir is a git submodule or a worktree.
+    # In both cases .git is a file instead of a directory.
+    #
+    if(IS_DIRECTORY ${GIT_DIR})
+		set(HEAD_SOURCE_FILE "${GIT_DIR}/HEAD")
+	else()
+		# The following git command will return a non empty string that
+        # points to the super project working tree if the current
+        # source dir is inside a git submodule.
+        # Otherwise the command will return an empty string.
+        #
+        execute_process(
+            COMMAND "${GIT_EXECUTABLE}" rev-parse
+                    --show-superproject-working-tree
+            WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
+            OUTPUT_VARIABLE out
+            ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+        if(NOT "${out}" STREQUAL "")
+            # If out is not empty, GIT_DIR/CMAKE_CURRENT_SOURCE_DIR is in a submodule
+            file(READ ${GIT_DIR} submodule)
+            string(REGEX REPLACE "gitdir: (.*)$" "\\1" GIT_DIR_RELATIVE
+                                 ${submodule})
+            string(STRIP ${GIT_DIR_RELATIVE} GIT_DIR_RELATIVE)
+            get_filename_component(SUBMODULE_DIR ${GIT_DIR} PATH)
+            get_filename_component(GIT_DIR ${SUBMODULE_DIR}/${GIT_DIR_RELATIVE}
+                                   ABSOLUTE)
+            set(HEAD_SOURCE_FILE "${GIT_DIR}/HEAD")
+        else()
+            # GIT_DIR/CMAKE_CURRENT_SOURCE_DIR is in a worktree
+            file(READ ${GIT_DIR} worktree_ref)
+            # The .git directory contains a path to the worktree information directory
+            # inside the parent git repo of the worktree.
+            #
+            string(REGEX REPLACE "gitdir: (.*)$" "\\1" git_worktree_dir
+                                 ${worktree_ref})
+            string(STRIP ${git_worktree_dir} git_worktree_dir)
+            _get_git_dir("${git_worktree_dir}" GIT_DIR)
+            set(HEAD_SOURCE_FILE "${git_worktree_dir}/HEAD")
+        endif()
+	endif()
+
+	# Fail if the 'head' file was not found
+    if(NOT EXISTS "${HEAD_SOURCE_FILE}")
+        return()
+    endif()
+
+	# Make a copy of the head file
+    set(HEAD_FILE "${_data_dir}/HEAD")
+    configure_file("${HEAD_SOURCE_FILE}" "${HEAD_FILE}" COPYONLY)
+
+	# Now we create a cmake file that will read the contents of this
+	# head file in the appropriate way
+	file(WRITE "${_data_dir}/grab-ref.cmake.in" [[
+set(HEAD_HASH)
+
+file(READ "@HEAD_FILE@" HEAD_CONTENTS LIMIT 1024)
+
+string(STRIP "${HEAD_CONTENTS}" HEAD_CONTENTS)
+if(HEAD_CONTENTS MATCHES "ref")
+	# named branch
+	string(REPLACE "ref: " "" HEAD_REF "${HEAD_CONTENTS}")
+	if(EXISTS "@GIT_DIR@/${HEAD_REF}")
+		configure_file("@GIT_DIR@/${HEAD_REF}" "@VERSION_STRING_DATA@/head-ref" COPYONLY)
+	else()
+		configure_file("@GIT_DIR@/packed-refs" "@VERSION_STRING_DATA@/packed-refs" COPYONLY)
+		file(READ "@VERSION_STRING_DATA@/packed-refs" PACKED_REFS)
+		if(${PACKED_REFS} MATCHES "([0-9a-z]*) ${HEAD_REF}")
+			set(HEAD_HASH "${CMAKE_MATCH_1}")
+		endif()
+	endif()
+else()
+	# detached HEAD
+	configure_file("@GIT_DIR@/HEAD" "@VERSION_STRING_DATA@/head-ref" COPYONLY)
+endif()
+
+if(NOT HEAD_HASH)
+	file(READ "@VERSION_STRING_DATA@/head-ref" HEAD_HASH LIMIT 1024)
+	string(STRIP "${HEAD_HASH}" HEAD_HASH)
+endif()
+]])
+
+    configure_file("${VERSION_STRING_DATA}/grab-ref.cmake.in"
+                   "${VERSION_STRING_DATA}/grab-ref.cmake" @ONLY)
+    
+	# Include the aforementioned file, this will define
+	# the HEAD_HASH variable we're looking for
+	include("${VERSION_STRING_DATA}/grab-ref.cmake")
+
+    set(${_variable} "${HEAD_HASH}" PARENT_SCOPE)
+endfunction()
+
 # Create a revision file, containing the current git version info, if any
 function(write_version_header dir)
+
+	set(flags )
+	set(options LIB_NAME FILE_NAME)
+	set(sources )
+	cmake_parse_arguments(VERSION_STRING_OPTION "${flags}" "${options}" "${sources}" ${ARGN})
+
 	# parameter check
 	if(NOT IS_DIRECTORY ${dir})
 		message(FATAL_ERROR "First parameter to write_version_header should be a directory where the final revision.hpp file will be placed")
 	endif()

-	include(GetGitRevisionDescription)
-	if(NOT(GIT-NOTFOUND OR HEAD-HASH-NOTFOUND))
-		git_describe_working_tree(BUILD_VERSION_STRING --match=build --dirty)
+	if(VERSION_STRING_OPTION_FILE_NAME)
+		set(file_name "${VERSION_STRING_OPTION_FILE_NAME}")
+	else()
+		set(file_name "revision.hpp")
+	endif()

-		if(BUILD_VERSION_STRING MATCHES "build-([0-9]+)-g([0-9a-f]+)(-dirty)?")
-			set(BUILD_GIT_TAGREF "${CMAKE_MATCH_2}")
-			if(CMAKE_MATCH_3)
-				set(BUILD_VERSION_STRING "${CMAKE_MATCH_1}*")
-			else()
-				set(BUILD_VERSION_STRING "${CMAKE_MATCH_1}")
-			endif()
+	# Where to store intermediate files
+	set(VERSION_STRING_DATA "${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/VersionString")
+	if(NOT EXISTS "${VERSION_STRING_DATA}")
+        file(MAKE_DIRECTORY "${VERSION_STRING_DATA}")
+    endif()
+
+	# Load the git hash using the wizzard-like code above.
+	_get_git_hash("${VERSION_STRING_DATA}" GIT_HASH)
+
+	# If git was found, fetch the git description string
+	if(GIT_HASH)
+		execute_process(
+			COMMAND "${GIT_EXECUTABLE}" describe --dirty --match=build
+			WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
+			RESULT_VARIABLE res
+			OUTPUT_VARIABLE out
+			ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+
+		if(res EQUAL 0)
+			set(REVISION_STRING "${out}")
+		else()
+			message(STATUS "Git hash not found, does this project has a 'build' tag?")
 		endif()
 	else()
-		message(WARNING "no git info available, cannot update version string")
+		message(STATUS "Git hash not found")
 	endif()

-	string(TIMESTAMP BUILD_DATE_TIME "%Y-%m-%dT%H:%M:%SZ" UTC)
+	# Check the revision string, if it matches we fill in the required info
+	if(REVISION_STRING MATCHES "build-([0-9]+)-g([0-9a-f]+)(-dirty)?")
+		set(BUILD_NUMBER ${CMAKE_MATCH_1})
+		if(CMAKE_MATCH_3)
+			set(REVISION_GIT_TAGREF "${CMAKE_MATCH_2}*")
+		else()
+			set(REVISION_GIT_TAGREF "${CMAKE_MATCH_2}")
+		endif()

-	if(ARGC GREATER 1)
-		set(VAR_PREFIX "${ARGV1}")
+		string(TIMESTAMP REVISION_DATE_TIME "%Y-%m-%dT%H:%M:%SZ" UTC)
+	else()
+		set(REVISION_GIT_TAGREF "")
+		set(BUILD_NUMBER 0)
+		set(REVISION_DATE_TIME "")
 	endif()

-	file(WRITE "${PROJECT_BINARY_DIR}/revision.hpp.in" [[// Generated revision file
+	if(VERSION_STRING_OPTION_LIB_NAME)
+		set(VAR_PREFIX "${VERSION_STRING_OPTION_LIB_NAME}")
+		set(IDENT_PREFIX "${VERSION_STRING_OPTION_LIB_NAME}_")
+		set(BOOL_IS_MAIN "false")
+	else()
+		set(VAR_PREFIX "")
+		set(IDENT_PREFIX "")
+		set(BOOL_IS_MAIN "true")
+	endif()

-#pragma once
-
-#include <ostream>
-
-const char k@VAR_PREFIX@ProjectName[] = "@PROJECT_NAME@";
-const char k@VAR_PREFIX@VersionNumber[] = "@PROJECT_VERSION@";
-const char k@VAR_PREFIX@VersionGitTag[] = "@BUILD_GIT_TAGREF@";
-const char k@VAR_PREFIX@BuildInfo[] = "@BUILD_VERSION_STRING@";
-const char k@VAR_PREFIX@BuildDate[] = "@BUILD_DATE_TIME@";
-
-inline void write_version_string(std::ostream &os, bool verbose)
-{
-	os << k@VAR_PREFIX@ProjectName << " version " << k@VAR_PREFIX@VersionNumber << std::endl;
-	if (verbose)
-	{
-		os << "build: " << k@VAR_PREFIX@BuildInfo << ' ' << k@VAR_PREFIX@BuildDate << std::endl;
-		if (k@VAR_PREFIX@VersionGitTag[0] != 0)
-			os << "git tag: " << k@VAR_PREFIX@VersionGitTag << std::endl;
-	}
-}
-]])
-	configure_file("${PROJECT_BINARY_DIR}/revision.hpp.in" "${dir}/revision.hpp" @ONLY)
+	configure_file("${_current_cmake_module_dir}/revision.hpp.in" "${dir}/${file_name}" @ONLY)
 endfunction()

--- a/cmake/cifppConfig.cmake.in
+++ b/cmake/cifppConfig.cmake.in
@@ -1,5 +1,12 @@
@PACKAGE_INIT@

+include("${CMAKE_CURRENT_LIST_DIR}/cifppTargets.cmake")
+
+# Note that this set_and_check needs te be executed before
+# find_dependency of Eigen3, otherwise the path is
+# not found....
+set_and_check(CIFPP_SHARE_DIR "@PACKAGE_CIFPP_DATA_DIR@")
+
 include(CMakeFindDependencyMacro)
 find_dependency(Threads)

@@ -9,8 +16,4 @@ if(MSVC)
 	find_dependency(zeep REQUIRED)
 endif()

-INCLUDE("${CMAKE_CURRENT_LIST_DIR}/cifppTargets.cmake")
-
-set_and_check(CIFPP_SHARE_DIR "@PACKAGE_CIFPP_DATA_DIR@")
-
 check_required_components(cifpp)
--- a/cmake/revision.hpp.in
+++ b/cmake/revision.hpp.in
@@ -0,0 +1,121 @@
+// This file was generated by VersionString.cmake
+
+#pragma once
+
+#include <ostream>
+
+constexpr const char k@VAR_PREFIX@ProjectName[] = "@PROJECT_NAME@";
+constexpr const char k@VAR_PREFIX@VersionNumber[] = "@PROJECT_VERSION@";
+constexpr int k@VAR_PREFIX@BuildNumber = @BUILD_NUMBER@;
+constexpr const char k@VAR_PREFIX@RevisionGitTag[] = "@REVISION_GIT_TAGREF@";
+constexpr const char k@VAR_PREFIX@RevisionDate[] = "@REVISION_DATE_TIME@";
+
+#ifndef VERSION_INFO_DEFINED
+#define VERSION_INFO_DEFINED 1
+
+namespace version_info_v1_1
+{
+
+class version_info_base
+{
+  public:
+	static void write_version_string(std::ostream &os, bool verbose)
+	{
+		auto s_main = registered_main();
+		if (s_main != nullptr)
+			s_main->write(os, verbose);
+
+		if (verbose)
+		{
+			for (auto lib = registered_libraries(); lib != nullptr; lib = lib->m_next)
+			{
+				os << "-\n";
+				lib->write(os, verbose);
+			}
+		}
+	}
+
+  protected:
+	version_info_base(const char *name, const char *version, int build_number, const char *git_tag, const char *revision_date, bool is_main)
+		: m_name(name)
+		, m_version(version)
+		, m_build_number(build_number)
+		, m_git_tag(git_tag)
+		, m_revision_date(revision_date)
+	{
+		if (is_main)
+			registered_main() = this;
+		else
+		{
+			auto &s_head = registered_libraries();
+			m_next = s_head;
+			s_head = this;
+		}
+	}
+
+	void write(std::ostream &os, bool verbose)
+	{
+		os << m_name << " version " << m_version << '\n';
+
+		if (verbose)
+		{
+			if (m_build_number != 0)
+			{
+				os << "build: " << m_build_number << ' ' << m_revision_date << '\n';
+				if (m_git_tag[0] != 0)
+					os << "git tag: " << m_git_tag << '\n';
+			}
+		}
+	}
+
+	using version_info_ptr = version_info_base *;
+
+	static version_info_ptr &registered_main()
+	{
+		static version_info_ptr s_main = nullptr;
+		return s_main;
+	}
+
+	static version_info_ptr &registered_libraries()
+	{
+		static version_info_ptr s_head = nullptr;
+		return s_head;
+	}
+
+	const char *m_name;
+	const char *m_version;
+	int m_build_number;
+	const char *m_git_tag;
+	const char *m_revision_date;
+	version_info_base *m_next = nullptr;
+};
+
+template <typename T>
+class version_info : public version_info_base
+{
+  public:
+	using implementation_type = T;
+
+	version_info(const char *name, const char *version, int build_number, const char *git_tag, const char *revision_date, bool is_main)
+		: version_info_base(name, version, build_number, git_tag, revision_date, is_main)
+	{
+	}
+};
+
+} // namespace version_info_v1_1
+
+inline void write_version_string(std::ostream &os, bool verbose)
+{
+	version_info_v1_1::version_info_base::write_version_string(os, verbose);
+}
+
+#endif
+
+const class version_info_@IDENT_PREFIX@impl : public version_info_v1_1::version_info<version_info_@IDENT_PREFIX@impl>
+{
+  public:
+	version_info_@IDENT_PREFIX@impl()
+		: version_info(k@VAR_PREFIX@ProjectName, k@VAR_PREFIX@VersionNumber, k@VAR_PREFIX@BuildNumber, k@VAR_PREFIX@RevisionGitTag, k@VAR_PREFIX@RevisionDate, @BOOL_IS_MAIN@)
+	{
+	}
+} s_version_info_@IDENT_PREFIX@instance;
--- a/cmake/test-rx.cpp
+++ b/cmake/test-rx.cpp
@@ -11,8 +11,8 @@ int main()

 	std::regex_search(s, m, r);

-	std::cout << s.substr(0, 10) << std::endl;
-	std::cout << m.str(1).substr(0, 10) << std::endl;
+	std::cout << s.substr(0, 10) << '\n';
+	std::cout << m.str(1).substr(0, 10) << '\n';

 	return 0;
 }
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -0,0 +1,48 @@
+find_package(Doxygen REQUIRED)
+find_package(Sphinx REQUIRED)
+
+# Find all the public headers
+# get_target_property(CIFPP_PUBLIC_HEADER_DIR libCIFPP INTERFACE_INCLUDE_DIRECTORIES)
+set(CIFPP_PUBLIC_HEADER_DIR ${PROJECT_SOURCE_DIR}/include)
+file(GLOB_RECURSE CIFPP_PUBLIC_HEADERS ${CIFPP_PUBLIC_HEADER_DIR}/*.hpp)
+
+set(DOXYGEN_INPUT_DIR ${CIFPP_PUBLIC_HEADER_DIR})
+set(DOXYGEN_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/xml)
+set(DOXYGEN_INDEX_FILE ${DOXYGEN_OUTPUT_DIR}/index.xml)
+set(DOXYFILE_IN ${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in)
+set(DOXYFILE_OUT ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile)
+
+# Replace variables inside @@ with the current values
+configure_file(${DOXYFILE_IN} ${DOXYFILE_OUT} @ONLY)
+
+add_custom_command(
+	OUTPUT ${DOXYGEN_OUTPUT_DIR}
+	COMMAND ${CMAKE_COMMAND} -E make_directory ${DOXYGEN_OUTPUT_DIR})
+
+add_custom_command(OUTPUT ${DOXYGEN_INDEX_FILE}
+	BYPRODUCTS ${DOXYGEN_OUTPUT_DIR}
+	DEPENDS ${DOXYGEN_OUTPUT_DIR} ${CIFPP_PUBLIC_HEADERS} ${DOXYFILE_OUT}
+	COMMAND ${DOXYGEN_EXECUTABLE} ${DOXYFILE_OUT}
+	MAIN_DEPENDENCY ${DOXYFILE_OUT} ${DOXYFILE_IN}
+	COMMENT "Generating docs")
+
+add_custom_target("Doxygen-${PROJECT_NAME}" ALL DEPENDS ${DOXYGEN_INDEX_FILE})
+
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in ${CMAKE_CURRENT_SOURCE_DIR}/conf.py @ONLY)
+
+set(SPHINX_SOURCE ${CMAKE_CURRENT_SOURCE_DIR})
+set(SPHINX_BUILD ${CMAKE_CURRENT_BINARY_DIR}/sphinx)
+
+add_custom_target("Sphinx-${PROJECT_NAME}" ALL
+	COMMAND ${SPHINX_EXECUTABLE} -b html
+	-Dbreathe_projects.${PROJECT_NAME}=${DOXYGEN_OUTPUT_DIR}
+	${SPHINX_SOURCE} ${SPHINX_BUILD}
+	DEPENDS ${DOXYGEN_INDEX_FILE}
+	BYPRODUCTS ${CMAKE_CURRENT_SOURCE_DIR}/api
+	WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+	COMMENT "Generating documentation with Sphinx")
+
+install(DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/sphinx/
+	DESTINATION ${CMAKE_INSTALL_DOCDIR}
+	PATTERN .doctrees EXCLUDE
+	PATTERN .buildinfo EXCLUDE)
--- a/docs/Doxyfile.in
+++ b/docs/Doxyfile.in
@@ -0,0 +1,10 @@
+EXCLUDE_SYMBOLS        = cif::detail::*, std*
+FILE_PATTERNS          = *.hpp
+STRIP_FROM_PATH        = @DOXYGEN_INPUT_DIR@
+RECURSIVE              = YES
+GENERATE_XML           = YES
+GENERATE_LATEX         = NO
+PREDEFINED             += and=&& or=|| not=! CIFPP_EXPORT= HAVE_LIBCLIPPER=1
+GENERATE_HTML          = NO
+GENERATE_TODOLIST      = NO
+INPUT                  = @DOXYGEN_INPUT_DIR@
--- a/docs/_static/.gitignore
+++ b/docs/_static/.gitignore
@@ -0,0 +1,4 @@
+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore
--- a/docs/basics.rst
+++ b/docs/basics.rst
@@ -0,0 +1,400 @@
+Basic usage
+===========
+
+This library, *libcifpp*, is a generic *CIF* library with some specific additions to work with *mmCIF* files. The main focus of this library is to make sure that files read or written are valid. That is, they are syntactically valid *and* their content is valid with respect to a CIF dictionary, if such a dictionary is available and specified.
+
+Reading a file is as simple as:
+
+.. code-block:: cpp
+
+    #include <cif++.hpp>
+
+    cif::file f("/path/to/file.cif");
+
+The file may also be compressed using *gzip* which is detected automatically.
+
+Writing out the file again is also simple, to write out the terminal you can do:
+
+.. code-block:: cpp
+
+    std::cout << f;
+
+    // or
+    f.save(std::cout);
+
+    // or write a compressed file using gzip compression:
+    f.save("/tmp/f.cif.gz");
+
+CIF files contain one or more datablocks. To print out the names of all datablocks in our file:
+
+.. code-block:: cpp
+
+    for (auto &db : f)
+        std::cout << db.name() << '\n';
+
+Most often *libcifpp* is used to read in structure files in mmCIF format. These files only contain one datablock and so you can safely use code like this:
+
+.. code-block:: cpp
+
+    // get a reference to the first datablock in f
+    auto &db = f.front();
+
+But if you know the name of the datablock, this also works:
+
+.. code-block:: cpp
+
+    // get a reference to the datablock name '1CBS'
+    auto &db = f["1CBS"];
+
+Now, each datablock contains categories. To print out all their names:
+
+.. code-block:: cpp
+
+    for (auto &cat : db)
+        std::cout << cat.name() << '\n';
+
+But you probably know what category you need to use, so lets fetch it by name:
+
+.. _atom_site-label:
+.. code-block:: cpp
+
+    // get a reference to the atom_site category in db
+    auto &atom_site = db["atom_site"];
+
+    // and make sure there's some data in it:
+    assert(not atom_site.empty());
+
+.. note::
+    
+    Note that we omit the leading underscore in the name of the category here.
+
+Categories contain rows of data and each row has fields or items. Referencing a row in a category results in a :cpp:class:`cif::row_handle` object which you can use to request or manipulate item data.
+
+.. code-block:: cpp
+
+    // Get the first row in atom_site
+    auto rh = atom_site.front();
+
+    // Get the label_atom_id value from this row handle as a std::string
+    std::string atom_id = rh["label_atom_id"].as<std::string>();
+
+    // Get the x, y and z coordinates using structered binding
+    const auto &[x, y, z] = rh.get<float,float,float>("Cartn_x", "Cartn_y", "Cartn_z");
+
+    // Assign a new value to the x coordinate or our atom
+    rh["Cartn_x"] = x + 1;
+
+Querying
+--------
+
+Walking over the rows in a category is often not very useful. More often you are interested in specific rows in a category. The function :cpp:func:`cif::category::find` and friends are here to help.
+
+What these functions have in common is that they return data based on a query implemented by :cpp:class:`cif::condition`. These condition objects are built in code using regular C++ syntax. The most basic example of a query is:
+
+.. code-block:: cpp
+
+    cif::condition c = cif::key("id") == 1;
+
+Here the condition is that all rows returned should have a value of 1 in there item named *id*. Likewise you can use other data types and even combine those. Oh, and I said we use regular C++ syntax for conditions, so you may as well use other operators to compare values:
+
+.. code-block:: cpp
+
+    // condition for C-alpha atoms having an occupancy less than 1.0
+    cif::condition c = cif::key("occupancy") < 1.0f and cif::key("label_atom_id") == "CA";
+
+Using the namespace *cif::literals* that code becomes a little less verbose:
+
+.. code-block:: cpp
+
+    using namespace cif::literals;
+    cif::condition c = "occupancy"_key < 1.0f and "label_atom_id"_key == "CA";
+
+Conditions can also be combined:
+
+.. code-block:: cpp
+
+    cif::condition c = "occupancy"_key < 1.0f and "label_atom_id"_key == "CA";
+
+    // extend the condition by requiring the compound ID to be unequal to PRO
+    c = std::move(c) and "label_comp_id"_key != "PRO";
+
+.. note::
+
+    Note the use of std::move here. 
+
+Using queries constructed in this way is simple:
+
+.. code-block:: cpp
+
+    cif::condition c = ...
+    auto result = atom_site.find(std::move(c));
+
+    // or construct a condition inline:
+    auto result = atom_site.find("label_atom_id"_key == "CA");
+
+In the example above the result is a range of :cpp:class:`cif::row_handle` objects. Often, using individual field values is more useful:
+
+.. code-block:: cpp
+
+    // Requesting a single item:
+    for (auto id : atom_site.find<std::string>("label_atom_id"_key == "CA", "id"))
+        std::cout << "ID for CA: " << id << '\n';
+
+    // Requesting multiple items:
+    for (const auto &[id, x, y, z] : atom_site.find<std::string,float,float,float>("label_atom_id"_key == "CA",
+            "id", "Cartn_x", "Cartn_y", "Cartn_z"))
+    {
+        std::cout << "Atom " << id << " is at [" << x << ", " << y << ", " z << "]\n";
+    }
+
+Returning a complete set if often not required, if you only want to have the first you can use :cpp:func:`cif::category::find_first` as shown here:
+
+.. code-block:: cpp
+
+    // return the ID item for the first C-alpha atom
+    std::string v1 = atom_site.find_first<std::string>("label_atom_id"_key == "CA", "id");
+
+    // If you're not sure the row exists, use std::optional
+    auto v2 = atom_site.find_first<std::optional<std::string>>("label_atom_id"_key == "CA", "id");
+    if (v2.has_value())
+        ...
+
+There are cases when you really need exactly one result. The :cpp:func:`cif::category::find1` can be used in that case, it will throw an exception if the query does not result in exactly one row.
+
+NULL and ANY
+------------
+
+Sometimes items may be empty. The trouble is a bit that empty comes in two flavors: unknown and null. Null in *CIF* parlance means the item should not contain a value since it makes no sense in this case, the value stored in the file is a single dot character: ``'.'``. E.g. *atom_site* records may have a NULL value for label_seq_id for atoms that are part of a *non-polymer*.
+
+The other empty value is indicated by a question mark character: ``'?'``. This means the value is simply unknown.
+
+Both these are NULL in *libcifpp* conditions and can be searched for using :cpp:var:`cif::null`.
+
+So you can search for:
+
+.. code-block:: cpp
+
+    cif::condition c = "label_seq_id"_key == cif::null;
+
+You might also want to look for a certain value and don't care in which item it is stored, in that case you can use :cpp:var:`cif::any`.
+
+.. code-block:: cpp
+
+    cif::condition c = cif::any == "foo";
+
+And in linked record you might have the items that have a value in both parent and child or both should be NULL. For that, you can request the value to return by find to be of type std::optional and then use that value to build the query. An example to explain this, let's find the location of the atom that is referenced as the first atom in a struct_conn record:
+
+.. code-block:: cpp
+
+    // Take references to the two categories we need
+    auto struct_conn = db["struct_conn"];
+    auto atom_site = db["atom_site"];
+
+    // Loop over all rows in struct_conn taking only the values we need
+    // Note that the label_seq_id is returned as a std::optional<int>
+    // That means it may contain an integer or may be empty
+    for (const auto &[asym1, seqid1, authseqid1, atomid1] :
+        struct_conn.rows<std::string,std::optional<int>,std::string,std::string,std::string>(
+            "ptnr1_label_asym_id", "ptnr1_label_seq_id", "ptnr1_auth_seq_id", "ptnr1_label_atom_id"
+        ))
+    {
+        // Find the location of the first atom
+        cif::point p1 = atom_site.find1<float,float,float>(
+            "label_asym_id"_key == asym1 and "label_seq_id"_key == seqid1 and "auth_seq_id"_key == authseqid1 and "label_atom_id"_key == atomid1,
+            "cartn_x", "cartn_y", "cartn_z");
+    }
+    
+
+Validation
+----------
+
+CIF files can have a dictionary attached. And based on such a dictionary a :cpp:class:`cif::validator` object can be constructed which in turn can be used to validate the content of the file.
+
+A simple case:
+
+.. code-block:: cpp
+
+    #include <cif++.hpp>
+
+    cif::file f("1cbs.cif.gz");
+    f.load_dictionary("mmcif_pdbx");
+
+    if (not f.is_valid())
+        std::cout << "This file is not valid\n";
+
+If you want to know why it is not valid, you should set the global variable :cpp:var:`cif::VERBOSE` to something higer than zero. Depending on the value more or less diagnostic output is sent to std::cerr.
+
+In the case above we load a dictionary based on its name. You can of course also load dictionaries based on a specific file, that's a bit more work:
+
+.. code-block:: cpp
+
+    std::filesystem::ifstream dictFile("/tmp/my-dictionary.dic");
+    auto &validator = cif::parse_dictionary("my-dictionary", dictFile);
+
+    cif::file f("1cbs.cif.gz");
+
+    // assign the validator
+    f.set_validator(&validator);
+
+    // alternatively, load it by name
+    f.load_dictionary("my-dictionary");
+
+    if (not f.is_valid())
+        std::cout << "This file is not valid\n";
+
+Creating your own dictionary is a lot of work, especially if you are only extending an existing dictionary with a couple of new categories or items. So, what you can do is extend a loaded validator like this (code taken from DSSP):
+
+.. code-block:: cpp
+
+    // db is a cif::datablock reference containing an mmCIF file with DSSP annotations
+    auto &validator = const_cast<cif::validator &>(*db.get_validator());
+    if (validator.get_validator_for_category("dssp_struct_summary") == nullptr)
+    {
+        auto dssp_extension = cif::load_resource("dssp-extension.dic");
+        if (dssp_extension)
+            cif::extend_dictionary(validator, *dssp_extension);
+    }
+
+.. note::
+
+    In the example above we're loading the data using :doc:`/resources`. See the documentation on that for more information.
+
+If a validator has been assigned to a file, assignments to items are checked for valid data. So the following code will throw an exception (see: :ref:`_atom_site-label`):
+
+.. code-block:: cpp
+    
+    auto rh = atom_site.front();
+    rh["Cartn_x"] = "foo";
+
+Linking
+-------
+
+Based on information recorded in dictionary files (see :ref:`Validation`) you can locate linked records in parent or child categories.
+
+To make this example not too complex, lets assume the following example file:
+
+.. code-block:: cif
+
+    data_test
+    loop_
+    _cat_1.id
+    _cat_1.name
+    _cat_1.desc
+    1 aap  Aap
+    2 noot Noot
+    3 mies Mies
+
+    loop_
+    _cat_2.id
+    _cat_2.name
+    _cat_2.num
+    _cat_2.desc
+    1 aap  1 'Een dier'
+    2 aap  2 'Een andere aap'
+    3 noot 1 'walnoot bijvoorbeeld'
+
+And we have a dictionary containing the following link definition:
+
+.. code-block:: cif
+
+    loop_
+    _pdbx_item_linked_group_list.parent_category_id
+    _pdbx_item_linked_group_list.link_group_id
+    _pdbx_item_linked_group_list.parent_name
+    _pdbx_item_linked_group_list.child_name
+    _pdbx_item_linked_group_list.child_category_id
+    cat_1 1 '_cat_1.name' '_cat_2.name' cat_2
+
+So, there are links between *cat_1* and *cat_2* based on the value in items named *name*. Using this information, we can now locate children and parents:
+
+.. code-block:: cpp
+
+    // Assuming the file was loaded in f:
+    auto &cat1 = f.front()["cat_1"];
+    auto &cat2 = f.front()["cat_2"];
+    auto &cat3 = f.front()["cat_3"];
+
+    // Loop over all ape's in cat2
+    for (auto r : cat1.get_children(cat1.find1("name"_key == "aap"), cat2))
+        std::cout << r.get<std::string>("desc") << '\n';
+
+Updating a value in an item in a parent category will update the corresponding value in all related children:
+
+.. code-block:: cpp
+
+    auto r1 = cat1.find1("id"_key == 1);
+    r1["name"] = "aapje";
+
+    auto rs1 = cat2.find("name"_key == "aapje");
+    assert(rs1.size() == 2);
+
+However, changing a value in a child record will not update the parent. This may result in an invalid file since you may then have a child that has no parent:
+
+.. code-block:: cpp
+
+    auto r2 = cat2.find1("id"_key == 3);
+    r2["name"] = "wim";
+
+    assert(f.is_valid() == false);
+
+So you have to fix this yourself by inserting a new item in cat1 with the new value.
+
+.. _splitting-rows:
+Another situation is when you change a value in a parent and updating children might introduce a situation where you need to split a child. To give an example, consider this:
+
+.. code-block:: cif
+
+    data_test
+    loop_
+    _cat_1.id
+    _cat_1.name
+    _cat_1.desc
+    1 aap  Aap
+    2 noot Noot
+    3 mies Mies
+
+    loop_
+    _cat_2.id
+    _cat_2.name
+    _cat_2.num
+    _cat_2.desc
+    1 aap  1 'Een dier'
+    2 aap  2 'Een andere aap'
+    3 noot 1 'walnoot bijvoorbeeld'
+
+    loop_
+    _cat_3.id
+    _cat_3.name
+    _cat_3.num
+    1 aap 1
+    2 aap 2
+
+And we have a dictionary containing the following link definition (reversed compared to the previous example):
+
+.. code-block:: cif
+
+    loop_
+    _pdbx_item_linked_group_list.parent_category_id
+    _pdbx_item_linked_group_list.link_group_id
+    _pdbx_item_linked_group_list.parent_name
+    _pdbx_item_linked_group_list.child_name
+    _pdbx_item_linked_group_list.child_category_id
+    cat_2 1 '_cat_2.name' '_cat_1.name' cat_1
+    cat_3 1 '_cat_3.name' '_cat_2.name' cat_2
+    cat_3 1 '_cat_3.num'  '_cat_2.num'  cat_2
+
+So *cat3* is a parent of *cat2* and *cat2* is a parent of *cat1*. Now, if you change the *name* value of the first row of *cat3* to 'aapje', the corresponding row in *cat2* is updated as well. But when you update *cat2* you have to update *cat1* too. And simply changing the name field in row 1 of *cat1* is wrong. The default behaviour in libcifpp is to split the record in *cat1* and have a new child with the new name whereas the other remains as is.
+
+The new *cat1* will thus be like:
+
+.. code-block:: cif
+
+    loop_
+    _cat_1.id
+    _cat_1.name
+    _cat_1.desc
+    1 aapje Aap
+    2 noot  Noot
+    3 mies  Mies
+    5 aap   Aap
+
--- a/docs/bitsandpieces.rst
+++ b/docs/bitsandpieces.rst
@@ -0,0 +1,49 @@
+Bits & Pieces
+=============
+
+The *libcifpp* library offers some extra code that makes the life of developers a bit easier.
+
+gzio
+----
+
+To work with compressed data files a *std::streambuf* implemenation was added based on the code in `gxrio <https://github.com/mhekkel/gxrio>`_. This allows you to read and write compressed data streams transparently.
+
+When working with files you can use :cpp:class:`cif::gzio::ifstream` and :cpp:class:`cif::gzio::ofstream`. The selection of whether to use compression or not is based on the file extension. If it is ``.gz`` gzip compression is used:
+
+.. code-block:: cpp
+
+	cif::gzio::ifstream file("my-file.txt.gz");
+
+	std::string line;
+	while (std::getline(file, line))
+		std::cout << line << '\n';
+
+Writing is equally easy:
+
+.. code-block:: cpp
+
+	cif::gzio::ofstream file("/tmp/output.txt.gz");
+	file << "Hello, world!";
+	file.close();
+
+You can also use the :cpp:class:`cif::gzio::istream` and feed it a *std::streambuf* object that may or may not contain compressed data. In that case the first bytes of the input are sniffed and if it is gzip compressed data, decompression will be done.
+
+A progress bar
+--------------
+
+Applications based on *libcifpp* may have a longer run time. To give some feedback to the user running your application in a terminal you can use the :cpp:class:`cif::progress_bar`. This class will display an ASCII progress bar along with optional status messages, but only if output is to a real TTY (terminal).
+
+A progress bar is also shown only if the duration is more than two seconds. To avoid having flashing progress bars for short actions.
+
+The progress bar uses an internal progress counter that starts at zero and ends when the max value has been reached after which it will be removed from the screen. Updating this internal progress counter can be done by adding a number of steps calling :cpp:func:`cif::progress_bar::consumed` or by setting the exact value for the counter by calling :cpp:func:`cif::progress_bar::progress`.
+
+Colouring output
+----------------
+
+It is also nice to emphasise some output in the terminal by using colours. For this you can create output manipulators using :cpp:func:`cif::coloured`. To write a string in white, and bold letters on a red background you can do:
+
+.. code-block:: cpp
+
+	using namespace cif::colour;
+	std::cout << cif::coloured("Hello, world!", white, red, bold) << '\n';
+
--- a/docs/compound.rst
+++ b/docs/compound.rst
@@ -0,0 +1,33 @@
+Chemical Compounds
+==================
+
+The data in *CIF* and *mmCIF* files often describes the structure of some chemical compounds. The structure is recorded in the categories *atom_site* and friends. Records in these categories refer to chemical compounds using a compound ID. This compound ID is the ID field of the *chem_comp* category. For all of the known compounds in the PDB there is an entry in the Chemical Compounds Dictionary or `CCD <https://www.wwpdb.org/data/ccd>`_. If *libcifpp* was properly installed you have a copy of this file somewhere on your disk. And if you have installed the update scripts, a fresh version of this file will be retrieved weekly.
+
+As an alternative to CCD there are the monomer library files from `CCP4 <https://www.ccp4.ac.uk/>`_. These contain somewhat different data but the overlap is good enough for usage in *libcifpp*.
+
+Information about compounds is captured in the :cpp:class:`cif::compound`. An instance of a compound object for a certain compound ID can be obtained by using the singleton :cpp:class:`cif::compound_factory`.
+
+If the compound you want to use is not available in the CCD or in CCP4, you can add that information yourself. For this you can use the method :cpp:func:`cif::compound_factory::push_dictionary`.
+
+So, given that we have CCD, CCP4 monomer library and used defined compound definitions, what will you get when you try to retrieve such a compound by ID? The answer is, the factory has a stack of compound generators. The first thrown on the stack is the one for a CCD file (*components.cif*) if it can be found. Then, if the *CLIBD_MON* environmental variable is defined, a generator for monomer library files is added to the stack. And then all generators for files you added using *push_dictionary* are added in order. The generators are searched in the reverse order in which they were added to see if it creates a compound object for the ID. If no compound was created at all, nullptr is returned.
+
+Updating CCD
+------------
+
+The CCD data is stored in a single file called *components.cif* and can be downloaded from `CCD <https://www.wwpdb.org/data/ccd>`_. 
+
+As can be read in the section on resources (:doc:`/resources`) files in libcifpp are loaded in a specific order. If the CCD datafile was downloaded during installation, a copy can be found in the directory */usr/share/libcifpp/* (if you installed in */usr*). This is a static file and will not be updated until the next installation of libcifpp.
+
+When configuring libcifpp, you can specify the *CIFPP_INSTALL_UPDATE_SCRIPT* option, as in:
+
+.. code-block:: console
+
+	cmake -S . -B build -DCIFPP_INSTALL_UPDATE_SCRIPT=ON # ... more options?
+
+This will install a script named *update-libcifpp-data* in */etc/cron.weekly* or */etc/periodic/weekly*. This file uses a config file named */etc/libcifpp.conf* which you then need to edit. In this config file the following line needs to be uncommented:
+
+.. code-block:: console
+
+	# update=true
+
+After that, the update script will weekly download the latest components.cif file to */var/cache/libcifpp*.
--- a/docs/conf.py.in
+++ b/docs/conf.py.in
@@ -0,0 +1,66 @@
+project = '@PROJECT_NAME@'
+copyright = '2023, Maarten L. Hekkelman'
+author = 'Maarten L. Hekkelman'
+release = '@PROJECT_VERSION@'
+
+# -- General configuration ---------------------------------------------------
+
+extensions = [
+    "breathe",
+    "exhale",
+    "myst_parser"
+]
+
+breathe_projects = {
+	"@PROJECT_NAME@": "../build/docs/xml"
+}
+
+myst_enable_extensions = [ "colon_fence" ]
+breathe_default_project = "@PROJECT_NAME@"
+
+# Setup the exhale extension
+exhale_args = {
+    # These arguments are required
+    "containmentFolder":     "./api",
+    "rootFileName":          "library_root.rst",
+    "doxygenStripFromPath":  "../include/",
+    # Heavily encouraged optional argument (see docs)
+    "rootFileTitle":         "API Reference",
+    # Suggested optional arguments
+    # "createTreeView":        True,
+    # TIP: if using the sphinx-bootstrap-theme, you need
+    # "treeViewIsBootstrap": True,
+    "exhaleExecutesDoxygen": False,
+    "contentsDirectives" : False,
+    
+    "verboseBuild": False
+}
+
+# Tell sphinx what the primary language being documented is.
+primary_domain = 'cpp'
+
+# Tell sphinx what the pygments highlight language should be.
+highlight_language = 'cpp'
+
+templates_path = ['_templates']
+exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'sphinx_rtd_theme'
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+html_static_path = ['_static']
+
+html_theme_options = {
+}
+
+cpp_index_common_prefix = [
+	'cif::'
+]
+
--- a/docs/genindex.rst
+++ b/docs/genindex.rst
@@ -0,0 +1,2 @@
+Index
+=====
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -0,0 +1,46 @@
+Introduction
+============
+
+Information on 3D structures of proteins originally came formatted in `PDB <http://www.wwpdb.org/documentation/file-format-content/format33/v3.3.html>`_ files. Although the specification for this format had some real restrictions like a mandatory HEADER and CRYST line, many programs implemented this very poorly often writing out only ATOM records. And users became used to this.
+
+The legacy PDB format has some severe limitations rendering it useless for all but very small protein structures. A new format called `mmCIF <https://mmcif.wwpdb.org/>`_ has been around for decades and now is the default format for the Protein Data Bank.
+
+The software developed in the `PDB-REDO <https://pdb-redo.eu/>`_ project aims at improving 3D models based on original experimental data. For this, the tools need to be able to work with both legacy PDB and mmCIF files. A decision was made to make mmCIF leading internally in all programs and convert legacy PDB directly into mmCIF before processing the data. A robust conversion had to be developed to make this possible since, as noted above, files can come with more or less information making it sometimes needed to do a sequence alignment to find out the exact residue numbers.
+
+And so libcif++ came to life, a library to work with mmCIF files. Work on this library started early 2017 and has developed quite a bit since then. To reduce dependency on other libraries, some functionality was added that is not strictly related to reading and writing mmCIF files but may be useful nonetheless. This is mostly code that is used in 3D calculations and symmetry operations.
+
+Design
+------
+
+The main part of the library is a set of classes that work with mmCIF files. They are:
+
+* :cpp:class:`cif::file`
+* :cpp:class:`cif::datablock`
+* :cpp:class:`cif::category`
+
+The :cpp:class:`cif::file` class encapsulates the contents of a mmCIF file. In such a file there are one or more :cpp:class:`cif::datablock` objects and each datablock contains one or more :cpp:class:`cif::category` objects.
+
+Synopsis
+--------
+
+Using *libcifpp* is easy, if you are familiar with modern C++:
+
+.. literalinclude:: ../README.md
+	:language: c++
+	:start-after: ```c++
+	:end-before: ```
+
+.. toctree::
+   :maxdepth: 2
+   :caption: Contents
+
+   self
+   basics.rst
+   compound.rst
+   model.rst
+   resources.rst
+   symmetry.rst
+   bitsandpieces.rst
+   api/library_root.rst
+   genindex.rst
+
--- a/docs/model.rst
+++ b/docs/model.rst
@@ -0,0 +1,36 @@
+Molecular Model
+===============
+
+Theoretically it is possible to get along with only the classes *cif::file*, *cif::datablock* and *cif::category*. But to keep your data complete and valid you then have to update lots of categories for all but the simplest manipulations. For this *libcifpp* comes with a higher level API modelling atoms, residues, monomers, polymers and complete structures in their respective classes.
+
+Note that these classes only work properly if you are using *mmCIF* files and have an mmcif_pdbx dictionary available, either compiled in using `mrc <https://github.com/mhekkel/mrc.git>`_ or installed in the proper location.
+
+.. note::
+
+	This part of *libcifpp* is the least developed part. What is available should work but functionality should eventually be extended.
+
+Atom
+----
+
+The :cpp:class:`cif::mm::atom` is a lightweight proxy class giving access to the data stored in *atom_site* and *atom_site_anisotrop*. It only caches the most often used item data and every modification is directly written back into the *mmCIF* categories.
+
+Atoms can be copied by value with low cost. The atom class only contains a pointer to an implementation that is reference counted.
+
+Residue, Monomer and Polymer
+----------------------------
+
+The :cpp:class:`cif::mm::residue`, :cpp:class:`cif::mm::monomer` and :cpp:class:`cif::mm::polymer` implement what you'd expect. A monomer is a residue that is part of a polymer and thus has a sequence number and siblings.
+
+Sugars & Branches
+-----------------
+
+There are also classes for modelling sugars and sugar branches. You can create sugar branches
+
+Structure
+---------
+
+The :cpp:class:`cif::mm::structure` can be used to load one of the models from an *mmCIF* file. By default the first model is loaded. (Multiple models are often only available files containing structures defined using NMR).
+
+A structure holds a reference to a *cif::datablock* and retrieves its data from this datablock and writes any modification back into that datablock.
+
+One of the most useful parts of the structure class is the ability to create and modify residues. This updates related *chem_comp* and *entity* categories as well.
--- a/docs/requirements.in
+++ b/docs/requirements.in
@@ -0,0 +1,5 @@
+sphinx<5
+exhale==0.3.6
+myst-parser
+breathe
+sphinx_rtd_theme==1.3.0
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -0,0 +1,93 @@
+#
+# This file is autogenerated by pip-compile with Python 3.10
+# by the following command:
+#
+#    pip-compile --output-file=requirements.txt requirements.in
+#
+alabaster==0.7.13
+    # via sphinx
+babel==2.12.1
+    # via sphinx
+beautifulsoup4==4.12.2
+    # via exhale
+breathe==4.35.0
+    # via
+    #   -r requirements.in
+    #   exhale
+certifi==2023.7.22
+    # via requests
+charset-normalizer==3.2.0
+    # via requests
+docutils==0.17.1
+    # via
+    #   breathe
+    #   exhale
+    #   myst-parser
+    #   sphinx
+    #   sphinx-rtd-theme
+exhale==0.3.6
+    # via -r requirements.in
+idna==3.4
+    # via requests
+imagesize==1.4.1
+    # via sphinx
+jinja2==3.1.2
+    # via
+    #   myst-parser
+    #   sphinx
+lxml==4.9.3
+    # via exhale
+markdown-it-py==2.2.0
+    # via
+    #   mdit-py-plugins
+    #   myst-parser
+markupsafe==2.1.3
+    # via jinja2
+mdit-py-plugins==0.3.5
+    # via myst-parser
+mdurl==0.1.2
+    # via markdown-it-py
+myst-parser==0.18.1
+    # via -r requirements.in
+packaging==23.1
+    # via sphinx
+pygments==2.16.1
+    # via sphinx
+pyyaml==6.0.1
+    # via myst-parser
+requests==2.31.0
+    # via sphinx
+six==1.16.0
+    # via exhale
+snowballstemmer==2.2.0
+    # via sphinx
+soupsieve==2.4.1
+    # via beautifulsoup4
+sphinx==4.5.0
+    # via
+    #   -r requirements.in
+    #   breathe
+    #   exhale
+    #   myst-parser
+    #   sphinx-rtd-theme
+    #   sphinxcontrib-jquery
+sphinx-rtd-theme==1.3.0
+    # via -r requirements.in
+sphinxcontrib-applehelp==1.0.4
+    # via sphinx
+sphinxcontrib-devhelp==1.0.2
+    # via sphinx
+sphinxcontrib-htmlhelp==2.0.1
+    # via sphinx
+sphinxcontrib-jquery==4.1
+    # via sphinx-rtd-theme
+sphinxcontrib-jsmath==1.0.1
+    # via sphinx
+sphinxcontrib-qthelp==1.0.3
+    # via sphinx
+sphinxcontrib-serializinghtml==1.1.5
+    # via sphinx
+typing-extensions==4.7.1
+    # via myst-parser
+urllib3==2.0.4
+    # via requests
--- a/docs/resources.rst
+++ b/docs/resources.rst
@@ -0,0 +1,47 @@
+Resources
+=========
+
+Programs using libcifpp often need access to common data files. E.g. CIF dictionary files, CCP4 monomer restraints files or the CCD data file. In libcifpp these files are called resources. These files are often also based on external sources that are updated on a regular basis.
+
+Resources can be compiled into the executable so that the resulting
+application can be made portable to other machines. For this you
+need to use `mrc <https://github.com/mhekkel/mrc.git>`_ which only works
+on Un*x like systems using the ELF executable format or on MS Windows
+
+But resources may also be located as files on the filesytem at
+specific locations. And you can specify your own location for
+files (a directory) or even override named resources with your
+own data.
+
+Loading Resources
+-----------------
+
+No matter where the resource is located, you should always use the single libcifpp API call :cpp:func:`cif::load_resource` to load them. This function returns a *std::istream* wrapped inside a *std::unique_ptr*. 
+
+The order in which resources are search for is:
+
+* Use the resource that was defined by calling :cpp:func:`cif::add_file_resource`
+  for this name.
+
+* Search the paths specified by :cpp:func:`cif::add_data_directory`, last one
+  added is searched first
+
+* Search the so-called *CACHE_DIR*. This location is defined
+  at compile time and based on the installation directory of
+  libcifpp. Usually it is */var/cache/libcifpp*.
+  It is in this directory where the cron job for libcifpp will
+  put the updated files weekly.
+
+* If the *CCP4* environment is available, the
+  *$ENV{CCP4}/share/libcifpp* is searched.
+
+* If the environment variable *LIBCIFPP_DATA_DIR* is set it
+  is searched
+
+* The *DATA_DIR* is searched, this is also a variable defined
+  at compile time, also based on the installation directory
+  of libcifpp. It usually is */usr/share/libcifpp*
+
+* As a last resort an attempt is made to load the data from
+  resources compiled by `mrc <https://github.com/mhekkel/mrc.git>`_.
+
--- a/docs/symmetry.rst
+++ b/docs/symmetry.rst
@@ -0,0 +1,108 @@
+Symmetry & Geometry
+===================
+
+Although not really a core *CIF* functionality, when working with *mmCIF* files you often need to work with symmetry information. And symmetry works on points in a certain space and thus geometry calculations are also something you need often. Former versions of *libcifpp* used to use `clipper <http://www.ysbl.york.ac.uk/~cowtan/clipper/doc/index.html>`_ to do many of these calculations, but that introduces a dependency and besides, the way clipper numbers symmetry operations is not completely compatible with the way this is done in the PDB.
+
+Points
+------
+
+The most basic type in use is :cpp:type:`cif::point`. It can be thought of as a point in space with three coordinates, but it is also often used as a vector in 3d space. To keep the interface simple there's no separate vector type.
+
+Many functions are available in :ref:`file_cif++_point.hpp` that work on points. There are functions to calculate the :cpp:func:`cif::distance` between two points and also function to calculate dot products, cross products and dihedral angles between sets of points.
+
+Quaternions
+-----------
+
+All operations inside *libcifpp* that perform some kind of rotation use :cpp:type:`cif::quaternion`. The reason to use Quaternions is not only that they are cool, they are faster than multiplying with a matrix and the results also suffer less from numerical instability.
+
+Matrix
+------
+
+Although Quaternions are the preferred way of doing rotations, not every manipulation is a rotation and thus we need a matrix class as well. Matrices and their operations are encoded as matrix_expressions in *libcifpp* allowing the compiler to generate very fast code. See the :ref:`file_cif++_matrix.hpp` for what is on offer.
+
+Crystals
+--------
+
+The *CIF* and *mmCIF* were initially developed to store crystallographic information on structures. Apart from coordinates and the chemical information the crystallographic information is important. This information can be split into two parts, a unit cell and a set of  :ref:`symmetry-ops` making up a spacegroup. The spacegroup number and name are stored in the *symmetry* category. The corresponding symmetry operations can be obtained in *libcifpp* by using the :cpp:class:`cif::spacegroup`. The cell is stored in the category *cell* and likewise can be loaded using the :cpp:class:`cif::cell`. Together these two classes make up a crystal and so we have a :cpp:class:`cif::crystal` which contains both. You can easily create such a crystal object by passing the datablock containing the data to the constructor. As in:
+
+.. code:: cpp
+
+    // Load the file
+    cif::file f("1cbs.cif.gz");
+
+    auto &db = f.front();
+    cif::crystal c(db);
+
+.. _symmetry-ops:
+Symmetry operations
+-------------------
+
+Each basic symmetry operation in the crystallographic world consists of a matrix multiplication followed by a translation. To apply such an operation on a carthesian coordinate you first have to convert the point into a fractional coordinate with respect to the unit cell of the crystal, then apply the matrix and translation operations and then convert the result back into carthesian coordinates. This is all done by the proper routines in *libcifpp*.
+
+Symmetry operations are encoded as a string in *mmCIF* PDBx files. The format is a string with the rotational number followed by an underscore and then the encoded translation in each direction where 5 means no translation. So, the identity operator is ``1_555`` meaning that we have rotational number 1 (which is always the identity rotation, point multiplied with the identity matrix) and a translation of zero in each direction.
+
+To give an idea how this works, here's a piece of code copied from one of the unit tests in *libcifpp*. It takes the *struct_conn* records in a certain PDB file and checks wether the distances in each row correspond to what we can calculate.
+
+.. code:: cpp
+
+    using namespace cif::literals;
+
+    // Load the file
+    cif::file f(gTestDir / "2bi3.cif.gz");
+
+    // Simply assume we can use the first datablock
+    auto &db = f.front();
+
+    // Load the crystal information
+    cif::crystal c(db);
+
+    // Take references to the two categories we need
+    auto struct_conn = db["struct_conn"];
+    auto atom_site = db["atom_site"];
+
+    // Loop over all rows in struct_conn taking only the values we need
+    for (const auto &[
+            asym1, seqid1, authseqid1, atomid1, symm1,
+            asym2, seqid2, authseqid2, atomid2, symm2,
+            dist] : struct_conn.find<
+                std::string,std::optional<int>,std::string,std::string,std::string,
+                std::string,std::optional<int>,std::string,std::string,std::string,
+                float>(
+            cif::key("ptnr1_symmetry") != "1_555" or cif::key("ptnr2_symmetry") != "1_555",
+            "ptnr1_label_asym_id", "ptnr1_label_seq_id", "ptnr1_auth_seq_id", "ptnr1_label_atom_id", "ptnr1_symmetry", 
+            "ptnr2_label_asym_id", "ptnr2_label_seq_id", "ptnr2_auth_seq_id", "ptnr2_label_atom_id", "ptnr2_symmetry", 
+            "pdbx_dist_value"
+        ))
+    {
+        // Find the location of the first atom
+        cif::point p1 = atom_site.find1<float,float,float>(
+            "label_asym_id"_key == asym1 and "label_seq_id"_key == seqid1 and "auth_seq_id"_key == authseqid1 and "label_atom_id"_key == atomid1,
+            "cartn_x", "cartn_y", "cartn_z");
+
+        // Find the location of the second atom
+        cif::point p2 = atom_site.find1<float,float,float>(
+            "label_asym_id"_key == asym2 and "label_seq_id"_key == seqid2 and "auth_seq_id"_key == authseqid2 and "label_atom_id"_key == atomid2,
+            "cartn_x", "cartn_y", "cartn_z");
+
+        // Calculate the position of the first atom using the symmetry operator defined in struct_conn
+        auto sa1 = c.symmetry_copy(p1, cif::sym_op(symm1));
+
+        // Calculate the position of the second atom using the symmetry operator defined in struct_conn
+        auto sa2 = c.symmetry_copy(p2, cif::sym_op(symm2));
+
+        // The distance between these symmetry atoms should be equal to the distance in the struct_conn record
+        assert(cif::distance(sa1, sa2) == dist);
+
+        // And to show how you can obtain the closest symmetry copy of an atom near another one:
+        // here we request the symmetry copy of p2 that lies closest to p1
+        const auto &[d, p, so] = c.closest_symmetry_copy(p1, p2);
+
+        // And that should of course be equal to the location in struct_conn for p2
+        assert(p.m_x == sa2.m_x);
+        assert(p.m_y == sa2.m_y);
+        assert(p.m_z == sa2.m_z);
+
+        // Distance and symmetry operator string should also be the same
+        assert(d == dist);
+        assert(so.string() == symm2);
+    }
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -0,0 +1,7 @@
+cmake_minimum_required(VERSION 3.15)
+project(cifpp_example LANGUAGES CXX)
+
+find_package(cifpp REQUIRED)
+
+add_executable(example example.cpp)
+target_link_libraries(example cifpp::cifpp)
--- a/examples/example.cpp
+++ b/examples/example.cpp
@@ -1,26 +1,37 @@
-#include <iostream>
 #include <filesystem>
+#include <iostream>

 #include <cif++.hpp>

 namespace fs = std::filesystem;

-int main()
+int main(int argc, char *argv[])
 {
-	cif::file file;
-	file.load("1cbs.cif.gz");
+	if (argc != 2)
+	{
+		std::cerr << "Usage: example <inputfile>\n";
+		exit(1);
+	}

-	auto& db = file.front();
+	cif::file file = cif::pdb::read(argv[1]);
+
+	if (file.empty())
+	{
+		std::cerr << "Empty file\n";
+		exit(1);
+	}
+
+	auto &db = file.front();
 	auto &atom_site = db["atom_site"];
 	auto n = atom_site.find(cif::key("label_atom_id") == "OXT").size();

-	std::cout << "File contains " << atom_site.size() << " atoms of which " << n << (n == 1 ? " is" : " are") << " OXT" << std::endl
-		<< "residues with an OXT are:" << std::endl;
-	
-	for (const auto& [asym, comp, seqnr]: atom_site.find<std::string,std::string,int>(
-			cif::key("label_atom_id") == "OXT", "label_asym_id", "label_comp_id", "label_seq_id"))
+	std::cout << "File contains " << atom_site.size() << " atoms of which " << n << (n == 1 ? " is" : " are") << " OXT\n"
+			  << "residues with an OXT are:\n";
+
+	for (const auto &[asym, comp, seqnr] : atom_site.find<std::string, std::string, int>(
+			 cif::key("label_atom_id") == "OXT", "label_asym_id", "label_comp_id", "label_seq_id"))
 	{
-		std::cout << asym << ' ' << comp << ' ' << seqnr << std::endl;
+		std::cout << asym << ' ' << comp << ' ' << seqnr << '\n';
 	}

 	return 0;
--- a/examples/makefile
+++ b/examples/makefile
@@ -1,8 +0,0 @@
-CXX = c++ -std=c++17
-CXXFLAGS = $(shell pkg-config --cflags libcifpp)
-LIBS = $(shell pkg-config --libs libcifpp)
-
-all: example
-
-example: example.cpp
-	$(CXX) -o $@ $? $(CXXFLAGS) $(LIBS)
--- a/include/cif++.hpp
+++ b/include/cif++.hpp
@@ -26,16 +26,16 @@

 #pragma once

-#include <cif++/utilities.hpp>
-#include <cif++/file.hpp>
-#include <cif++/parser.hpp>
-#include <cif++/format.hpp>
+#include "cif++/utilities.hpp"
+#include "cif++/file.hpp"
+#include "cif++/parser.hpp"
+#include "cif++/format.hpp"

-#include <cif++/compound.hpp>
-#include <cif++/point.hpp>
-#include <cif++/symmetry.hpp>
+#include "cif++/compound.hpp"
+#include "cif++/point.hpp"
+#include "cif++/symmetry.hpp"

-#include <cif++/model.hpp>
+#include "cif++/model.hpp"

-#include <cif++/pdb/io.hpp>
-#include <cif++/gzio.hpp>
+#include "cif++/pdb.hpp"
+#include "cif++/gzio.hpp"
--- a/include/cif++/atom_type.hpp
+++ b/include/cif++/atom_type.hpp
@@ -24,207 +24,254 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-// Lib for working with structures as contained in mmCIF and PDB files
+/** \file atom_type.hpp
+ * 
+ * This file contains information about all known elements
+ */

 #pragma once

+#include "cif++/exports.hpp"
+
 #include <cstdint>
+#include <limits>
 #include <stdexcept>
 #include <string>

 namespace cif
 {

+/** Atom type as an integer. All known elements are available as a constant. */
+
 enum atom_type : uint8_t
 {
-	Nn = 0, // Unknown
+	Nn = 0, ///< Unknown

-	H = 1,  // Hydrogen
-	He = 2, // Helium
+	H = 1,  ///< Hydrogen
+	He = 2, ///< Helium

-	Li = 3,  // Lithium
-	Be = 4,  // Beryllium
-	B = 5,   // Boron
-	C = 6,   // Carbon
-	N = 7,   // Nitrogen
-	O = 8,   // Oxygen
-	F = 9,   // Fluorine
-	Ne = 10, // Neon
+	Li = 3,  ///< Lithium
+	Be = 4,  ///< Beryllium
+	B = 5,   ///< Boron
+	C = 6,   ///< Carbon
+	N = 7,   ///< Nitrogen
+	O = 8,   ///< Oxygen
+	F = 9,   ///< Fluorine
+	Ne = 10, ///< Neon

-	Na = 11, // Sodium
-	Mg = 12, // Magnesium
-	Al = 13, // Aluminium
-	Si = 14, // Silicon
-	P = 15,  // Phosphorus
-	S = 16,  // Sulfur
-	Cl = 17, // Chlorine
-	Ar = 18, // Argon
+	Na = 11, ///< Sodium
+	Mg = 12, ///< Magnesium
+	Al = 13, ///< Aluminium
+	Si = 14, ///< Silicon
+	P = 15,  ///< Phosphorus
+	S = 16,  ///< Sulfur
+	Cl = 17, ///< Chlorine
+	Ar = 18, ///< Argon

-	K = 19,  // Potassium
-	Ca = 20, // Calcium
-	Sc = 21, // Scandium
-	Ti = 22, // Titanium
-	V = 23,  // Vanadium
-	Cr = 24, // Chromium
-	Mn = 25, // Manganese
-	Fe = 26, // Iron
-	Co = 27, // Cobalt
-	Ni = 28, // Nickel
-	Cu = 29, // Copper
-	Zn = 30, // Zinc
-	Ga = 31, // Gallium
-	Ge = 32, // Germanium
-	As = 33, // Arsenic
-	Se = 34, // Selenium
-	Br = 35, // Bromine
-	Kr = 36, // Krypton
+	K = 19,  ///< Potassium
+	Ca = 20, ///< Calcium
+	Sc = 21, ///< Scandium
+	Ti = 22, ///< Titanium
+	V = 23,  ///< Vanadium
+	Cr = 24, ///< Chromium
+	Mn = 25, ///< Manganese
+	Fe = 26, ///< Iron
+	Co = 27, ///< Cobalt
+	Ni = 28, ///< Nickel
+	Cu = 29, ///< Copper
+	Zn = 30, ///< Zinc
+	Ga = 31, ///< Gallium
+	Ge = 32, ///< Germanium
+	As = 33, ///< Arsenic
+	Se = 34, ///< Selenium
+	Br = 35, ///< Bromine
+	Kr = 36, ///< Krypton

-	Rb = 37, // Rubidium
-	Sr = 38, // Strontium
-	Y = 39,  // Yttrium
-	Zr = 40, // Zirconium
-	Nb = 41, // Niobium
-	Mo = 42, // Molybdenum
-	Tc = 43, // Technetium
-	Ru = 44, // Ruthenium
-	Rh = 45, // Rhodium
-	Pd = 46, // Palladium
-	Ag = 47, // Silver
-	Cd = 48, // Cadmium
-	In = 49, // Indium
-	Sn = 50, // Tin
-	Sb = 51, // Antimony
-	Te = 52, // Tellurium
-	I = 53,  // Iodine
-	Xe = 54, // Xenon
-	Cs = 55, // Caesium
-	Ba = 56, // Barium
-	La = 57, // Lanthanum
+	Rb = 37, ///< Rubidium
+	Sr = 38, ///< Strontium
+	Y = 39,  ///< Yttrium
+	Zr = 40, ///< Zirconium
+	Nb = 41, ///< Niobium
+	Mo = 42, ///< Molybdenum
+	Tc = 43, ///< Technetium
+	Ru = 44, ///< Ruthenium
+	Rh = 45, ///< Rhodium
+	Pd = 46, ///< Palladium
+	Ag = 47, ///< Silver
+	Cd = 48, ///< Cadmium
+	In = 49, ///< Indium
+	Sn = 50, ///< Tin
+	Sb = 51, ///< Antimony
+	Te = 52, ///< Tellurium
+	I = 53,  ///< Iodine
+	Xe = 54, ///< Xenon
+	Cs = 55, ///< Caesium
+	Ba = 56, ///< Barium
+	La = 57, ///< Lanthanum

-	Hf = 72, // Hafnium
-	Ta = 73, // Tantalum
-	W = 74,  // Tungsten
-	Re = 75, // Rhenium
-	Os = 76, // Osmium
-	Ir = 77, // Iridium
-	Pt = 78, // Platinum
-	Au = 79, // Gold
-	Hg = 80, // Mercury
-	Tl = 81, // Thallium
-	Pb = 82, // Lead
-	Bi = 83, // Bismuth
-	Po = 84, // Polonium
-	At = 85, // Astatine
-	Rn = 86, // Radon
-	Fr = 87, // Francium
-	Ra = 88, // Radium
-	Ac = 89, // Actinium
+	Hf = 72, ///< Hafnium
+	Ta = 73, ///< Tantalum
+	W = 74,  ///< Tungsten
+	Re = 75, ///< Rhenium
+	Os = 76, ///< Osmium
+	Ir = 77, ///< Iridium
+	Pt = 78, ///< Platinum
+	Au = 79, ///< Gold
+	Hg = 80, ///< Mercury
+	Tl = 81, ///< Thallium
+	Pb = 82, ///< Lead
+	Bi = 83, ///< Bismuth
+	Po = 84, ///< Polonium
+	At = 85, ///< Astatine
+	Rn = 86, ///< Radon
+	Fr = 87, ///< Francium
+	Ra = 88, ///< Radium
+	Ac = 89, ///< Actinium

-	Rf = 104, // Rutherfordium
-	Db = 105, // Dubnium
-	Sg = 106, // Seaborgium
-	Bh = 107, // Bohrium
-	Hs = 108, // Hassium
-	Mt = 109, // Meitnerium
-	Ds = 110, // Darmstadtium
-	Rg = 111, // Roentgenium
-	Cn = 112, // Copernicium
-	Nh = 113, // Nihonium
-	Fl = 114, // Flerovium
-	Mc = 115, // Moscovium
-	Lv = 116, // Livermorium
-	Ts = 117, // Tennessine
-	Og = 118, // Oganesson
+	Rf = 104, ///< Rutherfordium
+	Db = 105, ///< Dubnium
+	Sg = 106, ///< Seaborgium
+	Bh = 107, ///< Bohrium
+	Hs = 108, ///< Hassium
+	Mt = 109, ///< Meitnerium
+	Ds = 110, ///< Darmstadtium
+	Rg = 111, ///< Roentgenium
+	Cn = 112, ///< Copernicium
+	Nh = 113, ///< Nihonium
+	Fl = 114, ///< Flerovium
+	Mc = 115, ///< Moscovium
+	Lv = 116, ///< Livermorium
+	Ts = 117, ///< Tennessine
+	Og = 118, ///< Oganesson

-	Ce = 58, // Cerium
-	Pr = 59, // Praseodymium
-	Nd = 60, // Neodymium
-	Pm = 61, // Promethium
-	Sm = 62, // Samarium
-	Eu = 63, // Europium
-	Gd = 64, // Gadolinium
-	Tb = 65, // Terbium
-	Dy = 66, // Dysprosium
-	Ho = 67, // Holmium
-	Er = 68, // Erbium
-	Tm = 69, // Thulium
-	Yb = 70, // Ytterbium
-	Lu = 71, // Lutetium
+	Ce = 58, ///< Cerium
+	Pr = 59, ///< Praseodymium
+	Nd = 60, ///< Neodymium
+	Pm = 61, ///< Promethium
+	Sm = 62, ///< Samarium
+	Eu = 63, ///< Europium
+	Gd = 64, ///< Gadolinium
+	Tb = 65, ///< Terbium
+	Dy = 66, ///< Dysprosium
+	Ho = 67, ///< Holmium
+	Er = 68, ///< Erbium
+	Tm = 69, ///< Thulium
+	Yb = 70, ///< Ytterbium
+	Lu = 71, ///< Lutetium

-	Th = 90,  // Thorium
-	Pa = 91,  // Protactinium
-	U = 92,   // Uranium
-	Np = 93,  // Neptunium
-	Pu = 94,  // Plutonium
-	Am = 95,  // Americium
-	Cm = 96,  // Curium
-	Bk = 97,  // Berkelium
-	Cf = 98,  // Californium
-	Es = 99,  // Einsteinium
-	Fm = 100, // Fermium
-	Md = 101, // Mendelevium
-	No = 102, // Nobelium
-	Lr = 103, // Lawrencium
+	Th = 90,  ///< Thorium
+	Pa = 91,  ///< Protactinium
+	U = 92,   ///< Uranium
+	Np = 93,  ///< Neptunium
+	Pu = 94,  ///< Plutonium
+	Am = 95,  ///< Americium
+	Cm = 96,  ///< Curium
+	Bk = 97,  ///< Berkelium
+	Cf = 98,  ///< Californium
+	Es = 99,  ///< Einsteinium
+	Fm = 100, ///< Fermium
+	Md = 101, ///< Mendelevium
+	No = 102, ///< Nobelium
+	Lr = 103, ///< Lawrencium

-	D = 119, // Deuterium
+	D = 119, ///< Deuterium
 };

 // --------------------------------------------------------------------
-// atom_type_info
+
+/// An enum used to select the desired radius for an atom.
+/// All values are collected from the wikipedia pages on atom radii

 enum class radius_type
 {
-	calculated,
-	empirical,
+	calculated, ///< Calculated radius from theoretical models
+	empirical,  ///< Empirically measured covalent radii
+
+	/// @deprecated It is a bit unclear where these values came from. So, better not use them
 	covalent_empirical,

-	single_bond,
-	double_bond,
-	triple_bond,
+	single_bond, ///< Bond length for a single covalent bond calculated using statistically analysis
+	double_bond, ///< Bond length for a double covalent bond calculated using statistically analysis
+	triple_bond, ///< Bond length for a triple covalent bond calculated using statistically analysis

-	van_der_waals,
+	van_der_waals, ///< Radius of an imaginary hard sphere representing the distance of closest approach for another atom

-	type_count
+	type_count ///< Number of radii
 };

+/// @brief The number of radii per element which can be requested from atom_type_info
 constexpr size_t kRadiusTypeCount = static_cast<size_t>(radius_type::type_count);

+/// An enum used to select either the effective or the crystal radius of an ion.
+/// See explanation on Wikipedia: https://en.wikipedia.org/wiki/Ionic_radius
+
 enum class ionic_radius_type
 {
-	effective, crystal
+	effective, ///< Based on distance between ions in a crystal structure as determined by X-ray crystallography
+	crystal    ///< Calculated ion radius based on a function of ionic charge and spin
 };

+/// Requests for an unknown radius value return kNA
+constexpr float kNA = std::numeric_limits<float>::quiet_NaN();
+
+/// A struct holding the known information for all elements defined in atom_type
+
 struct atom_type_info
 {
+	/// The type as an atom_type
 	atom_type type;
+
+	/// The official name for this element
 	std::string name;
+
+	/// The official symbol for this element
 	std::string symbol;
+
+	/// The weight of this element
 	float weight;
+
+	/// A flag indicating whether the element is a metal
 	bool metal;
+
+	/// Array containing all known radii for this element. A value of kNA is
+	/// stored for unknown values
 	float radii[kRadiusTypeCount];
 };

+/// Array of atom_type_info struct for each of the defined elements in atom_type
+
 extern CIFPP_EXPORT const atom_type_info kKnownAtoms[];

 // --------------------------------------------------------------------
 // AtomTypeTraits

+/// A traits class to access information for known elements
+
 class atom_type_traits
 {
  public:
+	/// Constructor taking an atom_type \a a
 	atom_type_traits(atom_type a);
+
+	/// Constructor based on the element as a string in \a symbol
 	atom_type_traits(const std::string &symbol);

-	atom_type type() const { return m_info->type; }
-	std::string name() const { return m_info->name; }
-	std::string symbol() const { return m_info->symbol; }
-	float weight() const { return m_info->weight; }
+	atom_type type() const { return m_info->type; }       ///< Returns the atom_type
+	std::string name() const { return m_info->name; }     ///< Returns the name of the element
+	std::string symbol() const { return m_info->symbol; } ///< Returns the symbol of the element
+	float weight() const { return m_info->weight; }       ///< Returns the average weight of the element

-	bool is_metal() const { return m_info->metal; }
+	bool is_metal() const { return m_info->metal; } ///< Returns true if the element is a metal

+	/// Return true if the symbol in \a symbol actually exists in the list of known elements in atom_type
 	static bool is_element(const std::string &symbol);
+
+	/// Return true if the symbol in \a symbol exists and is a metal
 	static bool is_metal(const std::string &symbol);

+	/// @brief Return the radius for the element, use \a type to select which radius to return
+	/// @param type The selector for which radius to return
+	/// @return The requested radius or kNA if not known (or applicable)
 	float radius(radius_type type = radius_type::single_bond) const
 	{
 		if (type >= radius_type::type_count)
@@ -247,31 +294,46 @@ class atom_type_traits
 	/// \brief Return the radius for a charged version of this atom, returns the effective radius by default
 	///
 	/// \param charge  The charge of the ion
+	/// \param type    The requested ion radius type
 	/// \return        The radius of the ion
 	float ionic_radius(int charge, ionic_radius_type type = ionic_radius_type::effective) const
 	{
 		return type == ionic_radius_type::effective ? effective_ionic_radius(charge) : crystal_ionic_radius(charge);
 	}

-	// data type encapsulating the Waasmaier & Kirfel scattering factors
-	// in a simplified form (only a and b).
-	// Added the electrion scattering factors as well
+	/**
+	 * @brief data type encapsulating the scattering factors
+	 * in a simplified form (only a and b).
+	 */
 	struct SFData
 	{
+		/** @cond */
 		double a[6], b[6];
+		/** @endcond */
 	};

-	// to get the Cval and Siva values, use this constant as charge:
-	enum
-	{
-		kWKSFVal = -99
-	};
+	/// @brief to get the Cval and Siva scattering factor values, use this constant as charge:
+	static constexpr int kWKSFVal = -99;

+	/// @brief Return the Waasmaier & Kirfel scattering factor values for the element
+	///
+	/// The coefficients from Waasmaier & Kirfel (1995), Acta Cryst. A51, 416-431.
+	///
+	/// @param charge The charge for which the structure values should be returned, use kWSKFVal to return the *Cval* and *Siva* values
+	/// @return The scattering factors as a SFData struct
 	const SFData &wksf(int charge = 0) const;
+
+	/// @brief Return the electron scattering factor values for the element
+	///
+	/// @return The scattering factors as a SFData struct
 	const SFData &elsf() const;

+	/// Clipper doesn't like atoms with charges that do not have a scattering factor. And
+	/// rightly so, but we need to know in advance if this is the case
+	bool has_sf(int charge) const;
+
  private:
 	const struct atom_type_info *m_info;
 };

-} // namespace pdbx
+} // namespace cif
--- a/include/cif++/category.hpp
+++ b/include/cif++/category.hpp
@@ -26,37 +26,58 @@

 #pragma once

-#include <cif++/forward_decl.hpp>
+#include "cif++/forward_decl.hpp"

-#include <cif++/condition.hpp>
-#include <cif++/iterator.hpp>
-#include <cif++/row.hpp>
-#include <cif++/validate.hpp>
+#include "cif++/condition.hpp"
+#include "cif++/iterator.hpp"
+#include "cif++/row.hpp"
+#include "cif++/validate.hpp"
+#include "cif++/text.hpp"

 #include <array>

-// TODO: implement all of:
-// https://en.cppreference.com/w/cpp/named_req/Container
-// https://en.cppreference.com/w/cpp/named_req/SequenceContainer
-// and more?
+/** \file category.hpp
+  * Documentation for the cif::category class
+  *
+  * The category class should meet the requirements of Container and
+  * SequenceContainer.
+  * 
+  * TODO: implement all of:
+  * https://en.cppreference.com/w/cpp/named_req/Container
+  * https://en.cppreference.com/w/cpp/named_req/SequenceContainer
+  * and more?
+  */

 namespace cif
 {

 // --------------------------------------------------------------------
-// special exception
+// special exceptions
+
+/// @brief A duplicate_key_error is thrown when an attempt is made
+/// to insert a row with values that would introduce a duplicate key
+/// in the index. Of course, this can only happen if a @ref category_validator
+/// has been defined for this category.
 class duplicate_key_error : public std::runtime_error
 {
  public:
+    /**
+     * @brief Construct a new duplicate key error object
+     */
 	duplicate_key_error(const std::string &msg)
 		: std::runtime_error(msg)
 	{
 	}
 };

+/// @brief A multiple_results_error is throw when you request a single
+/// row using a query but the query contains more than exactly one row.
 class multiple_results_error : public std::runtime_error
 {
  public:
+    /**
+     * @brief Construct a new multiple results error object
+     */
 	multiple_results_error()
 		: std::runtime_error("query should have returned exactly one row")
 	{
@@ -64,10 +85,29 @@ class multiple_results_error : public std::runtime_error
 };

 // --------------------------------------------------------------------
+// These should be moved elsewhere, one day.
+
+/// \cond
+template <typename _Tp>
+inline constexpr bool is_optional_v = false;
+template <typename _Tp>
+inline constexpr bool is_optional_v<std::optional<_Tp>> = true;
+/// \endcond
+
+// --------------------------------------------------------------------
+
+/// The class category is a sequence container for rows of data values.
+/// You could think of it as a std::vector<cif::row_handle> like class.
+///
+/// A @ref category_validator can be assigned to an object of category
+/// after which this class can validate contained data and use an
+/// index to keep key values unique.

 class category
 {
  public:
+	/// \cond
+
 	friend class row_handle;

 	template <typename, typename...>
@@ -79,38 +119,68 @@ class category
 	using iterator = iterator_impl<category>;
 	using const_iterator = iterator_impl<const category>;

-	category() = default;
+	/// \endcond

-	category(std::string_view name);
-
-	category(const category &rhs);
-
-	category(category &&rhs);
-
-	category &operator=(const category &rhs);
-
-	category &operator=(category &&rhs);
+	category() = default;                     ///< Default constructor
+	category(std::string_view name);          ///< Constructor taking a \a name
+	category(const category &rhs);            ///< Copy constructor
+	category(category &&rhs);                 ///< Move constructor
+	category &operator=(const category &rhs); ///< Copy assignement operator
+	category &operator=(category &&rhs);      ///< Move assignement operator

+	/// @brief Destructor
+	/// @note Please note that the destructor is not virtual. It is assumed that
+	/// you will not derive from this class.
 	~category();

 	// --------------------------------------------------------------------

-	const std::string &name() const { return m_name; }
-
-	iset key_fields() const;
-
-	std::set<uint16_t> key_field_indices() const;
+	const std::string &name() const { return m_name; } ///< Returns the name of the category
+	iset key_fields() const; ///< Returns the cif::iset of key field names. Retrieved from the @ref category_validator for this category
+	std::set<uint16_t> key_field_indices() const; ///< Returns a set of indices for the key fields.

+	/// @brief Set the validator for this category to @a v
+	/// @param v The category_validator to assign. A nullptr value is allowed.
+	/// @param db The enclosing @ref datablock
 	void set_validator(const validator *v, datablock &db);
+
+	/// @brief Update the links in this category
+	/// @param db The enclosing @ref datablock
 	void update_links(datablock &db);

+	/// @brief Return the global @ref validator for the data
+	/// @return The @ref validator or nullptr if not assigned
 	const validator *get_validator() const { return m_validator; }
+
+	/// @brief Return the category validator for this category
+	/// @return The @ref category_validator or nullptr if not assigned
 	const category_validator *get_cat_validator() const { return m_cat_validator; }

+	/// @brief Validate the data stored using the assigned @ref category_validator
+	/// @return Returns true is all validations pass
 	bool is_valid() const;
+
+	/// @brief Validate links, that means, values in this category should have an
+	/// accompanying value in parent categories.
+	/// 
+	/// @note
+	/// The code makes one exception when validating missing links and that's between
+	/// *atom_site* and a parent *pdbx_poly_seq_scheme* or *entity_poly_seq*.
+	/// This particular case should be skipped because it is wrong:
+	/// there are atoms that are not part of a polymer, and thus will have no
+	/// parent in those categories.
+	///
+	/// @return Returns true is all validations pass
 	bool validate_links() const;

+	/// @brief Equality operator, returns true if @a rhs is equal to this
+	/// @param rhs The object to compare with
+	/// @return True if the data contained is equal
 	bool operator==(const category &rhs) const;
+
+	/// @brief Unequality operator, returns true if @a rhs is not equal to this
+	/// @param rhs The object to compare with
+	/// @return True if the data contained is not equal
 	bool operator!=(const category &rhs) const
 	{
 		return not operator==(rhs);
@@ -118,61 +188,87 @@ class category

 	// --------------------------------------------------------------------

+	/// @brief Return a reference to the first row in this category.
+	/// @return Reference to the first row in this category. The result is undefined if
+	/// the category is empty.
 	reference front()
 	{
 		return { *this, *m_head };
 	}

+	/// @brief Return a const reference to the first row in this category.
+	/// @return const reference to the first row in this category. The result is undefined if
+	/// the category is empty.
 	const_reference front() const
 	{
 		return { const_cast<category &>(*this), const_cast<row &>(*m_head) };
 	}

+	/// @brief Return a reference to the last row in this category.
+	/// @return Reference to the last row in this category. The result is undefined if
+	/// the category is empty.
 	reference back()
 	{
 		return { *this, *m_tail };
 	}

+	/// @brief Return a const reference to the last row in this category.
+	/// @return const reference to the last row in this category. The result is undefined if
+	/// the category is empty.
 	const_reference back() const
 	{
 		return { const_cast<category &>(*this), const_cast<row &>(*m_tail) };
 	}

+	/// Return an iterator to the first row
 	iterator begin()
 	{
 		return { *this, m_head };
 	}

+	/// Return an iterator pointing past the last row
 	iterator end()
 	{
 		return { *this, nullptr };
 	}

+	/// Return a const iterator to the first row
 	const_iterator begin() const
 	{
 		return { *this, m_head };
 	}

+	/// Return a const iterator pointing past the last row
 	const_iterator end() const
 	{
 		return { *this, nullptr };
 	}

+	/// Return a const iterator to the first row
 	const_iterator cbegin() const
 	{
 		return { *this, m_head };
 	}

+	/// Return an iterator pointing past the last row
 	const_iterator cend() const
 	{
 		return { *this, nullptr };
 	}

+	/// Return a count of the rows in this container
 	size_t size() const
 	{
 		return std::distance(cbegin(), cend());
 	}

+	/// Return the theoretical maximum number or rows that can be stored
+	size_t max_size() const
+	{
+		return std::numeric_limits<size_t>::max();	// this is a bit optimistic, I guess
+	}
+
+	/// Return true if the category is empty
 	bool empty() const
 	{
 		return m_head == nullptr;
@@ -189,6 +285,9 @@ class category
 	/// @return The row found in the index, or an undefined row_handle
 	row_handle operator[](const key_type &key);

+	/// @brief Return a const row_handle for the row specified by \a key
+	/// @param key The value for the key, fields specified in the dictionary should have a value
+	/// @return The row found in the index, or an undefined row_handle
 	const row_handle operator[](const key_type &key) const
 	{
 		return const_cast<category *>(this)->operator[](key);
@@ -196,6 +295,17 @@ class category

 	// --------------------------------------------------------------------

+	/// @brief Return a special const iterator for all rows in this category.
+	/// This iterator can be used in a structured binding context. E.g.:
+	///
+	/// @code{.cpp}
+	/// for (const auto &[name, value] : cat.rows<std::string,int>("item_name", "item_value"))
+	///   std::cout << name << ": " << value << '\n';
+	/// @endcode 
+	///
+	/// @tparam Ts The types for the columns requested
+	/// @param names The names for the columns requested
+
 	template <typename... Ts, typename... Ns>
 	iterator_proxy<const category, Ts...> rows(Ns... names) const
 	{
@@ -203,6 +313,22 @@ class category
 		return iterator_proxy<const category, Ts...>(*this, begin(), { names... });
 	}

+	/// @brief Return a special iterator for all rows in this category.
+	/// This iterator can be used in a structured binding context. E.g.:
+	///
+	/// @code{.cpp}
+	/// for (const auto &[name, value] : cat.rows<std::string,int>("item_name", "item_value"))
+	///   std::cout << name << ": " << value << '\n';
+	///
+	/// // or in case we only need one column:
+	///
+	/// for (int id : cat.rows<int>("id"))
+	///   std::cout << id << '\n';
+	/// @endcode 
+	///
+	/// @tparam Ts The types for the columns requested
+	/// @param names The names for the columns requested
+
 	template <typename... Ts, typename... Ns>
 	iterator_proxy<category, Ts...> rows(Ns... names)
 	{
@@ -212,26 +338,72 @@ class category

 	// --------------------------------------------------------------------

+	/// @brief Return a special iterator to loop over all rows that conform to @a cond
+	///
+	/// @code{.cpp}
+	/// for (row_handle rh : cat.find(cif::key("first_name") == "John" and cif::key("last_name") == "Doe"))
+	///    .. // do something with rh
+	/// @endcode 
+	///
+	/// @param cond The condition for the query
+	/// @return A special iterator that loops over all elements that match. The iterator can be dereferenced
+	/// to a @ref row_handle
+
 	conditional_iterator_proxy<category> find(condition &&cond)
 	{
 		return find(begin(), std::move(cond));
 	}

+	/// @brief Return a special iterator to loop over all rows that conform to @a cond
+	/// starting at @a pos
+	///
+	/// @param pos Where to start searching
+	/// @param cond The condition for the query
+	/// @return A special iterator that loops over all elements that match. The iterator can be dereferenced
+	/// to a @ref row_handle
+
 	conditional_iterator_proxy<category> find(iterator pos, condition &&cond)
 	{
 		return { *this, pos, std::move(cond) };
 	}

+	/// @brief Return a special const iterator to loop over all rows that conform to @a cond
+	///
+	/// @param cond The condition for the query
+	/// @return A special iterator that loops over all elements that match. The iterator can be dereferenced
+	/// to a const @ref row_handle
+
 	conditional_iterator_proxy<const category> find(condition &&cond) const
 	{
 		return find(cbegin(), std::move(cond));
 	}

+	/// @brief Return a special const iterator to loop over all rows that conform to @a cond
+	/// starting at @a pos
+	///
+	/// @param pos Where to start searching
+	/// @param cond The condition for the query
+	/// @return A special iterator that loops over all elements that match. The iterator can be dereferenced
+	/// to a const @ref row_handle
+
 	conditional_iterator_proxy<const category> find(const_iterator pos, condition &&cond) const
 	{
 		return conditional_iterator_proxy<const category>{ *this, pos, std::move(cond) };
 	}

+	/// @brief Return a special iterator to loop over all rows that conform to @a cond. The resulting
+	/// iterator can be used in a structured binding context.
+	///
+	/// @code{.cpp}
+	/// for (const auto &[name, value] : cat.find<std::string,int>(cif::key("item_value") > 10, "item_name", "item_value"))
+	///    std::cout << name << ": " << value << '\n';
+	/// @endcode 
+	///
+	/// @param cond The condition for the query
+	/// @tparam Ts The types for the columns requested
+	/// @param names The names for the columns requested
+	/// @return A special iterator that loops over all elements that match.
+
 	template <typename... Ts, typename... Ns>
 	conditional_iterator_proxy<category, Ts...> find(condition &&cond, Ns... names)
 	{
@@ -239,6 +411,14 @@ class category
 		return find<Ts...>(cbegin(), std::move(cond), std::forward<Ns>(names)...);
 	}

+	/// @brief Return a special const iterator to loop over all rows that conform to @a cond. The resulting
+	/// iterator can be used in a structured binding context.
+	///
+	/// @param cond The condition for the query
+	/// @tparam Ts The types for the columns requested
+	/// @param names The names for the columns requested
+	/// @return A special iterator that loops over all elements that match.
+
 	template <typename... Ts, typename... Ns>
 	conditional_iterator_proxy<const category, Ts...> find(condition &&cond, Ns... names) const
 	{
@@ -246,6 +426,15 @@ class category
 		return find<Ts...>(cbegin(), std::move(cond), std::forward<Ns>(names)...);
 	}

+	/// @brief Return a special iterator to loop over all rows that conform to @a cond starting at @a pos.
+	/// The resulting iterator can be used in a structured binding context.
+	///
+	/// @param pos Iterator pointing to the location where to start
+	/// @param cond The condition for the query
+	/// @tparam Ts The types for the columns requested
+	/// @param names The names for the columns requested
+	/// @return A special iterator that loops over all elements that match.
+
 	template <typename... Ts, typename... Ns>
 	conditional_iterator_proxy<category, Ts...> find(const_iterator pos, condition &&cond, Ns... names)
 	{
@@ -253,6 +442,15 @@ class category
 		return { *this, pos, std::move(cond), std::forward<Ns>(names)... };
 	}

+	/// @brief Return a special const iterator to loop over all rows that conform to @a cond starting at @a pos.
+	/// The resulting iterator can be used in a structured binding context.
+	///
+	/// @param pos Iterator pointing to the location where to start
+	/// @param cond The condition for the query
+	/// @tparam Ts The types for the columns requested
+	/// @param names The names for the columns requested
+	/// @return A special iterator that loops over all elements that match.
+
 	template <typename... Ts, typename... Ns>
 	conditional_iterator_proxy<const category, Ts...> find(const_iterator pos, condition &&cond, Ns... names) const
 	{
@@ -263,11 +461,20 @@ class category
 	// --------------------------------------------------------------------
 	// if you only expect a single row

+	/// @brief Return the row handle for the row that matches @a cond Throws @a multiple_results_error if
+	/// there are is not exactly one row matching @a cond
+	/// @param cond The condition to search for
+	/// @return Row handle to the row found
 	row_handle find1(condition &&cond)
 	{
 		return find1(begin(), std::move(cond));
 	}

+	/// @brief Return the row handle for the row that matches @a cond starting at @a pos
+	/// Throws @a multiple_results_error if there are is not exactly one row matching @a cond
+	/// @param pos The position to start the search
+	/// @param cond The condition to search for
+	/// @return Row handle to the row found
 	row_handle find1(iterator pos, condition &&cond)
 	{
 		auto h = find(pos, std::move(cond));
@@ -278,11 +485,20 @@ class category
 		return *h.begin();
 	}

+	/// @brief Return the const row handle for the row that matches @a cond Throws @a multiple_results_error if
+	/// there are is not exactly one row matching @a cond
+	/// @param cond The condition to search for
+	/// @return Row handle to the row found
 	const row_handle find1(condition &&cond) const
 	{
 		return find1(cbegin(), std::move(cond));
 	}

+	/// @brief Return const the row handle for the row that matches @a cond starting at @a pos
+	/// Throws @a multiple_results_error if there are is not exactly one row matching @a cond
+	/// @param pos The position to start the search
+	/// @param cond The condition to search for
+	/// @return Row handle to the row found
 	const row_handle find1(const_iterator pos, condition &&cond) const
 	{
 		auto h = find(pos, std::move(cond));
@@ -293,13 +509,27 @@ class category
 		return *h.begin();
 	}

+	/// @brief Return value for the column named @a column for the single row that
+	/// matches @a cond. Throws @a multiple_results_error if there are is not exactly one row
+	/// @tparam The type to use for the result
+	/// @param cond The condition to search for
+	/// @param column The name of the column to return the value for
+	/// @return The value found
 	template <typename T>
 	T find1(condition &&cond, const char *column) const
 	{
 		return find1<T>(cbegin(), std::move(cond), column);
 	}

-	template <typename T>
+	/// @brief Return value for the column named @a column for the single row that
+	/// matches @a cond when starting to search at @a pos.
+	/// Throws @a multiple_results_error if there are is not exactly one row
+	/// @tparam The type to use for the result
+	/// @param pos The location to start the search
+	/// @param cond The condition to search for
+	/// @param column The name of the column to return the value for
+	/// @return The value found
+	template <typename T, std::enable_if_t<not is_optional_v<T>, int> = 0>
 	T find1(const_iterator pos, condition &&cond, const char *column) const
 	{
 		auto h = find<T>(pos, std::move(cond), column);
@@ -310,6 +540,35 @@ class category
 		return *h.begin();
 	}

+	/// @brief Return a value of type std::optional<T> for the column named @a column for the single row that
+	/// matches @a cond when starting to search at @a pos.
+	/// If the row was not found, an empty value is returned.
+	/// @tparam The type to use for the result
+	/// @param pos The location to start the search
+	/// @param cond The condition to search for
+	/// @param column The name of the column to return the value for
+	/// @return The value found, can be empty if no row matches the condition
+	template <typename T, std::enable_if_t<is_optional_v<T>, int> = 0>
+	T find1(const_iterator pos, condition &&cond, const char *column) const
+	{
+		auto h = find<typename T::value_type>(pos, std::move(cond), column);
+
+		if (h.size() > 1)
+			throw multiple_results_error();
+
+		if (h.empty())
+			return {};
+
+		return *h.begin();
+	}
+
+	/// @brief Return a std::tuple for the values for the columns named in @a columns
+	/// for the single row that matches @a cond
+	/// Throws @a multiple_results_error if there are is not exactly one row
+	/// @tparam The types to use for the resulting tuple
+	/// @param cond The condition to search for
+	/// @param columns The names of the columns to return the value for
+	/// @return The values found as a single tuple of type std::tuple<Ts...>
 	template <typename... Ts, typename... Cs, typename U = std::enable_if_t<sizeof...(Ts) != 1>>
 	std::tuple<Ts...> find1(condition &&cond, Cs... columns) const
 	{
@@ -318,6 +577,14 @@ class category
 		return find1<Ts...>(cbegin(), std::move(cond), std::forward<Cs>(columns)...);
 	}

+	/// @brief Return a std::tuple for the values for the columns named in @a columns
+	/// for the single row that matches @a cond when starting to search at @a pos
+	/// Throws @a multiple_results_error if there are is not exactly one row
+	/// @tparam The types to use for the resulting tuple
+	/// @param pos The location to start the search
+	/// @param cond The condition to search for
+	/// @param columns The names of the columns to return the value for
+	/// @return The values found as a single tuple of type std::tuple<Ts...>
 	template <typename... Ts, typename... Cs, typename U = std::enable_if_t<sizeof...(Ts) != 1>>
 	std::tuple<Ts...> find1(const_iterator pos, condition &&cond, Cs... columns) const
 	{
@@ -333,11 +600,18 @@ class category
 	// --------------------------------------------------------------------
 	// if you want only a first hit

+	/// @brief Return a row handle to the first row that matches @a cond
+	/// @param cond The condition to search for
+	/// @return The handle to the row that matches or an empty row_handle
 	row_handle find_first(condition &&cond)
 	{
 		return find_first(begin(), std::move(cond));
 	}

+	/// @brief Return a row handle to the first row that matches @a cond starting at @a pos
+	/// @param pos The location to start searching
+	/// @param cond The condition to search for
+	/// @return The handle to the row that matches or an empty row_handle
 	row_handle find_first(iterator pos, condition &&cond)
 	{
 		auto h = find(pos, std::move(cond));
@@ -345,11 +619,18 @@ class category
 		return h.empty() ? row_handle{} : *h.begin();
 	}

+	/// @brief Return a const row handle to the first row that matches @a cond
+	/// @param cond The condition to search for
+	/// @return The const handle to the row that matches or an empty row_handle
 	const row_handle find_first(condition &&cond) const
 	{
 		return find_first(cbegin(), std::move(cond));
 	}

+	/// @brief Return a const row handle to the first row that matches @a cond starting at @a pos
+	/// @param pos The location to start searching
+	/// @param cond The condition to search for
+	/// @return The const handle to the row that matches or an empty row_handle
 	const row_handle find_first(const_iterator pos, condition &&cond) const
 	{
 		auto h = find(pos, std::move(cond));
@@ -357,12 +638,24 @@ class category
 		return h.empty() ? row_handle{} : *h.begin();
 	}

+	/// @brief Return the value for column @a column for the first row that matches condition @a cond
+	/// @tparam The type of the value to return
+	/// @param cond The condition to search for
+	/// @param column The column for which the value should be returned
+	/// @return The value found or a default constructed value if not found
 	template <typename T>
 	T find_first(condition &&cond, const char *column) const
 	{
 		return find_first<T>(cbegin(), std::move(cond), column);
 	}

+	/// @brief Return the value for column @a column for the first row that matches condition @a cond
+	/// when starting the search at @a pos
+	/// @tparam The type of the value to return
+	/// @param pos The location to start searching
+	/// @param cond The condition to search for
+	/// @param column The column for which the value should be returned
+	/// @return The value found or a default constructed value if not found
 	template <typename T>
 	T find_first(const_iterator pos, condition &&cond, const char *column) const
 	{
@@ -371,6 +664,11 @@ class category
 		return h.empty() ? T{} : *h.begin();
 	}

+	/// @brief Return a tuple containing the values for the columns @a columns for the first row that matches condition @a cond
+	/// @tparam The types of the values to return
+	/// @param cond The condition to search for
+	/// @param columns The columns for which the values should be returned
+	/// @return The values found or default constructed values if not found
 	template <typename... Ts, typename... Cs, typename U = std::enable_if_t<sizeof...(Ts) != 1>>
 	std::tuple<Ts...> find_first(condition &&cond, Cs... columns) const
 	{
@@ -379,6 +677,13 @@ class category
 		return find_first<Ts...>(cbegin(), std::move(cond), std::forward<Cs>(columns)...);
 	}

+	/// @brief Return a tuple containing the values for the columns @a columns for the first row that matches condition @a cond
+	/// when starting the search at @a pos
+	/// @tparam The types of the values to return
+	/// @param pos The location to start searching
+	/// @param cond The condition to search for
+	/// @param columns The columns for which the values should be returned
+	/// @return The values found or default constructed values if not found
 	template <typename... Ts, typename... Cs, typename U = std::enable_if_t<sizeof...(Ts) != 1>>
 	std::tuple<Ts...> find_first(const_iterator pos, condition &&cond, Cs... columns) const
 	{
@@ -390,6 +695,11 @@ class category

 	// --------------------------------------------------------------------

+	/// @brief Return the maximum value for column @a column for all rows that match condition @a cond
+	/// @tparam The type of the value to return
+	/// @param column The column to use for the value
+	/// @param cond The condition to search for
+	/// @return The value found or the minimal value for the type
 	template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
 	T find_max(const char *column, condition &&cond) const
 	{
@@ -404,12 +714,21 @@ class category
 		return result;
 	}

+	/// @brief Return the maximum value for column @a column for all rows
+	/// @tparam The type of the value to return
+	/// @param column The column to use for the value
+	/// @return The value found or the minimal value for the type
 	template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
 	T find_max(const char *column) const
 	{
 		return find_max<T>(column, all());
 	}

+	/// @brief Return the minimum value for column @a column for all rows that match condition @a cond
+	/// @tparam The type of the value to return
+	/// @param column The column to use for the value
+	/// @param cond The condition to search for
+	/// @return The value found or the maximum value for the type
 	template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
 	T find_min(const char *column, condition &&cond) const
 	{
@@ -424,12 +743,19 @@ class category
 		return result;
 	}

+	/// @brief Return the maximum value for column @a column for all rows
+	/// @tparam The type of the value to return
+	/// @param column The column to use for the value
+	/// @return The value found or the maximum value for the type
 	template <typename T, std::enable_if_t<std::is_arithmetic_v<T>, int> = 0>
 	T find_min(const char *column) const
 	{
 		return find_min<T>(column, all());
 	}

+	/// @brief Return whether a row exists that matches condition @a cond
+	/// @param cond The condition to match
+	/// @return True if a row exists
 	bool exists(condition &&cond) const
 	{
 		bool result = false;
@@ -458,6 +784,9 @@ class category
 		return result;
 	}

+	/// @brief Return the total number of rows that match condition @a cond
+	/// @param cond The condition to match
+	/// @return The count
 	size_t count(condition &&cond) const
 	{
 		size_t result = 0;
@@ -485,11 +814,24 @@ class category

 	// --------------------------------------------------------------------

+	/// Using the relations defined in the validator, return whether the row
+	/// in @a r has any children in other categories
 	bool has_children(row_handle r) const;
+
+	/// Using the relations defined in the validator, return whether the row
+	/// in @a r has any parents in other categories
 	bool has_parents(row_handle r) const;

+	/// Using the relations defined in the validator, return the row handles
+	/// for all rows in @a childCat that are linked to row @a r
 	std::vector<row_handle> get_children(row_handle r, const category &childCat) const;
+
+	/// Using the relations defined in the validator, return the row handles
+	/// for all rows in @a parentCat that are linked to row @a r
 	std::vector<row_handle> get_parents(row_handle r, const category &parentCat) const;
+
+	/// Using the relations defined in the validator, return the row handles
+	/// for all rows in @a cat that are in any way linked to row @a r
 	std::vector<row_handle> get_linked(row_handle r, const category &cat) const;

 	// --------------------------------------------------------------------
@@ -504,20 +846,40 @@ class category
 	// 	insert_impl(pos, std::move(row));
 	// }

+	/// Erase the row pointed to by @a pos and return the iterator to the 
+	/// row following pos.
 	iterator erase(iterator pos);
+
+	/// Erase row @a rh
 	void erase(row_handle rh)
 	{
 		erase(iterator(*this, rh.m_row));
 	}

+	/// @brief Erase all rows that match condition @a cond
+	/// @param cond The condition
+	/// @return The number of rows that have been erased
 	size_t erase(condition &&cond);
+
+	/// @brief Erase all rows that match condition @a cond calling
+	/// the visitor function @a visit for each before actually erasing it.
+	/// @param cond The condition
+	/// @param visit The visitor function
+	/// @return The number of rows that have been erased
 	size_t erase(condition &&cond, std::function<void(row_handle)> &&visit);

+	/// @brief Emplace the values in @a ri in a new row
+	/// @param ri An object containing the values to insert
+	/// @return iterator to the newly created row
 	iterator emplace(row_initializer &&ri)
 	{
 		return this->emplace(ri.begin(), ri.end());
 	}

+	/// @brief Create a new row and emplace the values in the range @a b to @a e in it
+	/// @param b Iterator to the beginning of the range of @ref item_value
+	/// @param e Iterator to the end of the range of @ref item_value
+	/// @return iterator to the newly created row
 	template <typename ItemIter>
 	iterator emplace(ItemIter b, ItemIter e)
 	{
@@ -541,6 +903,7 @@ class category
 		return insert_impl(cend(), r);
 	}

+	/// @brief Completely erase all rows contained in this category
 	void clear();

 	// --------------------------------------------------------------------
@@ -548,6 +911,11 @@ class category
 	/// based on a sequence number. This function will be called until the
 	/// result is unique in the context of this category
 	std::string get_unique_id(std::function<std::string(int)> generator = cif::cif_id_for_number);
+
+
+	/// @brief Generate a new, unique ID based on a string prefix followed by a number
+	/// @param prefix The string prefix
+	/// @return a new unique ID
 	std::string get_unique_id(const std::string &prefix)
 	{
 		return get_unique_id([prefix](int nr)
@@ -556,7 +924,7 @@ class category

 	// --------------------------------------------------------------------

-	/// \brief Rename a single column in the rows that match \a cond to value \a value
+	/// \brief Update a single column named @a tag in the rows that match \a cond to value \a value
 	/// making sure the linked categories are updated according to the link.
 	/// That means, child categories are updated if the links are absolute
 	/// and unique. If they are not, the child category rows are split.
@@ -569,6 +937,11 @@ class category
 		update_value(rows, tag, value);
 	}

+	/// \brief Update a single column named @a tag in @a rows to value \a value
+	/// making sure the linked categories are updated according to the link.
+	/// That means, child categories are updated if the links are absolute
+	/// and unique. If they are not, the child category rows are split.
+
 	void update_value(const std::vector<row_handle> &rows, std::string_view tag, std::string_view value);

 	// --------------------------------------------------------------------
@@ -588,12 +961,15 @@ class category
 		{
 			auto iv = m_cat_validator->get_validator_for_item(column_name);
 			if (iv == nullptr)
-				std::cerr << "Invalid name used '" << column_name << "' is not a known column in " + m_name << std::endl;
+				std::cerr << "Invalid name used '" << column_name << "' is not a known column in " + m_name << '\n';
 		}

 		return result;
 	}

+	/// @brief Return the name for column with index @a ix
+	/// @param ix The index number
+	/// @return The name of the column
 	std::string_view get_column_name(uint16_t ix) const
 	{
 		if (ix >= m_columns.size())
@@ -602,6 +978,9 @@ class category
 		return m_columns[ix].m_name;
 	}

+	/// @brief Make sure a column with name @a column_name is known and return its index number
+	/// @param column_name The name of the column
+	/// @return The index number of the column
 	uint16_t add_column(std::string_view column_name)
 	{
 		using namespace std::literals;
@@ -625,29 +1004,55 @@ class category
 		return result;
 	}

+	/// @brief Return whether a column with name @a name exists in this category
+	/// @param name The name of the column
+	/// @return True if the column exists
 	bool has_column(std::string_view name) const
 	{
 		return get_column_ix(name) < m_columns.size();
 	}

+	/// @brief Return the cif::iset of columns in this category
 	iset get_columns() const;

 	// --------------------------------------------------------------------

+	/// @brief Sort the rows using comparator function @a f
+	/// @param f The comparator function taking two row_handles and returning
+	/// an int indicating whether the first is smaller, equal or larger than
+	/// the second. ( respectively a value <0, 0, or >0 )
 	void sort(std::function<int(row_handle, row_handle)> f);
+
+	/// @brief Reorder the rows in the category using the index defined by
+	/// the @ref category_validator
 	void reorder_by_index();

 	// --------------------------------------------------------------------

+	/// This function returns effectively the list of fully qualified column
+	/// names, that is category_name + '.' + column_name for each column
 	std::vector<std::string> get_tag_order() const;

+	/// Write the contents of the category to the std::ostream @a os
 	void write(std::ostream &os) const;
+
+	/// @brief Write the contents of the category to the std::ostream @a os and
+	/// use @a order as the order of the columns. If @a addMissingColumns is
+	/// false, columns that do not contain any value will be suppressed
+	/// @param os The std::ostream to write to
+	/// @param order The order in which the columns should appear
+	/// @param addMissingColumns When false, empty columns are suppressed from the output
 	void write(std::ostream &os, const std::vector<std::string> &order, bool addMissingColumns = true);

  private:
 	void write(std::ostream &os, const std::vector<uint16_t> &order, bool includeEmptyColumns) const;

  public:
+
+	/// friend function to make it possible to do:
+	/// @code {.cpp}
+	/// std::cout << my_category;
+	/// @endcode 
 	friend std::ostream &operator<<(std::ostream &os, const category &cat)
 	{
 		cat.write(os);
@@ -657,7 +1062,6 @@ class category
  private:
 	void update_value(row *row, uint16_t column, std::string_view value, bool updateLinked, bool validate = true);

-  private:
 	void erase_orphans(condition &&cond, category &parent);

 	using allocator_type = std::allocator<void>;
--- a/include/cif++/compound.hpp
+++ b/include/cif++/compound.hpp
@@ -26,18 +26,32 @@

 #pragma once

-/// \file This file contains the definition for the class compound, encapsulating
-/// the information found for compounds in the CCD.
-
-#include <cif++.hpp>
-#include <cif++/atom_type.hpp>
-#include <cif++/point.hpp>
+#include "cif++/atom_type.hpp"
+#include "cif++/datablock.hpp"
+#include "cif++/exports.hpp"
+#include "cif++/point.hpp"
+#include "cif++/utilities.hpp"

 #include <map>
 #include <set>
 #include <tuple>
 #include <vector>

+/// \file compound.hpp
+/// This file contains the definition for the class compound, encapsulating
+/// the information found for compounds in the CCD.
+///
+/// The data is loaded by default from a file called `components.cif`. This file
+/// is located using load_resource. (See documentation on cif::load_resource for more information)
+///
+/// But if the CCP4 environment is available at runtime, the compound information
+/// may also be generated from the CCP4 monomer library.
+///
+/// Note that the information in CCP4 and CCD is not equal.
+///
+
+/// See also :doc:`/compound` for more information.
+
 namespace cif
 {

@@ -47,21 +61,47 @@ class compound;
 struct compound_atom;
 class compound_factory_impl;

-/// \brief The bond type as defined in the CCD, possible values taken from the mmcif_pdbx file
+/// \brief The bond type or bond order as defined in the CCD, possible values taken from the mmcif_pdbx file
 enum class bond_type
 {
-	sing, // 'single bond'
-	doub, // 'double bond'
-	trip, // 'triple bond'
-	quad, // 'quadruple bond'
-	arom, // 'aromatic bond'
-	poly, // 'polymeric bond'
-	delo, // 'delocalized double bond'
-	pi,   // 'pi bond'
+	sing, ///< single bond
+	doub, ///< double bond
+	trip, ///< triple bond
+	quad, ///< quadruple bond
+	arom, ///< aromatic bond
+	poly, ///< polymeric bond
+	delo, ///< delocalized double bond
+	pi,   ///< pi bond
 };

-std::string to_string(bond_type bondType);
-bond_type from_string(const std::string &bondType);
+/// @brief return the string representation of @a bondType
+std::string bond_type_to_string(bond_type bondType);
+
+/// @brief return the cif::bond_type for the string representation @a bondType
+bond_type parse_bond_type_from_string(const std::string &bondType);
+
+/// \brief The possible stereo config values for a compound_atom.
+///
+/// As the site https://psiberg.com/r-s-nomenclature/ states:
+///
+/// > RS nomenclature is currently the preferred system for assigning absolute
+/// > configuration to chiral molecules. The letters R and S come from the Latin
+/// > words ‘Rectus‘ and ‘Sinister‘ meaning ‘right’ and ‘left’. Molecules that
+/// > rotate the plane of polarized light to right are referred to as ‘R isomers’
+/// > and the molecules that rotate the plane of polarized light to left are
+/// > referred to ‘S isomers’.
+enum class stereo_config_type : uint8_t
+{
+	N = 'N', ///< Not polarizing
+	R = 'R', ///< Rectus
+	S = 'S'  ///< Sinister
+};
+
+/// @brief return the string representation of @a stereo_config
+std::string to_string(stereo_config_type stereo_config);
+
+/// @brief return the cif::stereo_config_type for the string representation @a stereo_config
+stereo_config_type parse_stereo_config_from_string(const std::string &stereo_config);

 /// --------------------------------------------------------------------
 /// \brief struct containing information about an atom in a chemical compound.
@@ -69,14 +109,17 @@ bond_type from_string(const std::string &bondType);

 struct compound_atom
 {
-	std::string id;
-	atom_type type_symbol;
-	int charge = 0;
-	bool aromatic = false;
-	bool leaving_atom = false;
-	bool stereo_config = false;
-	float x, y, z;
+	std::string id;                                           ///< Identifier for each atom in the chemical component
+	atom_type type_symbol;                                    ///< The element type for each atom in the chemical component.
+	int charge = 0;                                           ///< The formal charge assigned to each atom in the chemical component.
+	bool aromatic = false;                                    ///< Defines atoms in an aromatic moiety
+	bool leaving_atom = false;                                ///< Flags atoms with "leaving" capability
+	stereo_config_type stereo_config = stereo_config_type::N; ///< Defines the stereochemical configuration of the chiral center atom.
+	float x,                                                  ///< The x component of the coordinates for each atom specified as orthogonal angstroms.
+		y,                                                    ///< The y component of the coordinates for each atom specified as orthogonal angstroms.
+		z;                                                    ///< The z component of the coordinates for each atom specified as orthogonal angstroms.

+	/// Return the location of the atom as a point
 	point get_location() const
 	{
 		return { x, y, z };
@@ -88,9 +131,10 @@ struct compound_atom

 struct compound_bond
 {
-	std::string atom_id[2];
-	bond_type type;
-	bool aromatic = false, stereo_config = false;
+	std::string atom_id[2];    ///< The ID's of the two atoms that define the bond.
+	bond_type type;            ///< The bond order of the chemical bond associated with the specified atoms.
+	bool aromatic = false,     ///< Defines aromatic bonds.
+		stereo_config = false; ///< Defines stereochemical bonds.
 };

 /// --------------------------------------------------------------------
@@ -106,23 +150,26 @@ class compound
  public:
 	// accessors

-	std::string id() const { return m_id; }
-	std::string name() const { return m_name; }
-	std::string type() const { return m_type; }
+	std::string id() const { return m_id; }                   ///< Return the alphanumeric code for the chemical component.
+	std::string name() const { return m_name; }               ///< Return the name of the chemical component.
+	std::string type() const { return m_type; }               ///< Return the type of monomer.
+	std::string formula() const { return m_formula; }         ///< Return the chemical formula of the chemical component.
+	float formula_weight() const { return m_formula_weight; } ///< Return the formula mass of the chemical component in Daltons.
+	int formal_charge() const { return m_formal_charge; }     ///< Return the formal charge on the chemical component.
+
+	/// The group record is only available in CCP4 monomer library files.
+	/// For CCD entries this value will always contain 'non-polymer'
 	std::string group() const { return m_group; }
-	std::string formula() const { return m_formula; }
-	float formula_weight() const { return m_formula_weight; }
-	int formal_charge() const { return m_formal_charge; }

-	const std::vector<compound_atom> &atoms() const { return m_atoms; }
-	const std::vector<compound_bond> &bonds() const { return m_bonds; }
+	const std::vector<compound_atom> &atoms() const { return m_atoms; } ///< Return the list of atoms for this compound
+	const std::vector<compound_bond> &bonds() const { return m_bonds; } ///< Return the list of bonds for this compound

-	compound_atom get_atom_by_atom_id(const std::string &atom_id) const;
+	compound_atom get_atom_by_atom_id(const std::string &atom_id) const; ///< Return the atom with id @a atom_id

-	bool atoms_bonded(const std::string &atomId_1, const std::string &atomId_2) const;
-	float bond_length(const std::string &atomId_1, const std::string &atomId_2) const;
+	bool atoms_bonded(const std::string &atomId_1, const std::string &atomId_2) const; ///< Return true if @a atomId_1 is bonded to @a atomId_2
+	float bond_length(const std::string &atomId_1, const std::string &atomId_2) const; ///< Return the bond length between @a atomId_1 and @a atomId_2

-	bool is_water() const
+	bool is_water() const ///< Return if the compound is actually a water
 	{
 		return m_id == "HOH" or m_id == "H2O" or m_id == "WAT";
 	}
@@ -149,6 +196,8 @@ class compound
 // --------------------------------------------------------------------
 // Factory class for compound and Link objects

+/// Use the compound_factory singleton instance to create compound objects
+
 class compound_factory
 {
  public:
@@ -160,14 +209,28 @@ class compound_factory
 	/// flag to true.

 	static void init(bool useThreadLocalInstanceOnly);
+
+	/// Return the singleton instance. If initialized with local threads, this is the
+	/// instance for the current thread.
 	static compound_factory &instance();
+
+	/// Delete and reset the singleton instance. If initialized with local threads, this is the
+	/// instance for the current thread.
 	static void clear();

+	/// Set the default dictionary file to @a inDictFile
 	void set_default_dictionary(const std::filesystem::path &inDictFile);
+
+	/// Override any previously loaded dictionary with @a inDictFile
 	void push_dictionary(const std::filesystem::path &inDictFile);
+
+	/// Remove the last pushed dictionary
 	void pop_dictionary();

+	/// Return whether @a res_name is a valid and known peptide
 	bool is_known_peptide(const std::string &res_name) const;
+
+	/// Return whether @a res_name is a valid and known base
 	bool is_known_base(const std::string &res_name) const;

 	/// \brief Create the compound object for \a id
@@ -180,7 +243,8 @@ class compound_factory

 	~compound_factory();

-	static CIFPP_EXPORT const std::map<std::string, char> kAAMap, kBaseMap;
+	CIFPP_EXPORT static const std::map<std::string, char> kAAMap, ///< Globally accessible static list of the default amino acids
+		kBaseMap;                                                 ///< Globally accessible static list of the default bases

  private:
 	compound_factory();
--- a/include/cif++/condition.hpp
+++ b/include/cif++/condition.hpp
@@ -26,7 +26,7 @@

 #pragma once

-#include <cif++/row.hpp>
+#include "cif++/row.hpp"

 #include <cassert>
 #include <functional>
@@ -34,14 +34,100 @@
 #include <regex>
 #include <utility>

+/** \file condition.hpp
+ * This file contains code to create conditions: object encapsulating a
+ * query you can use to find rows in a @ref cif::category
+ *
+ * Conditions are created as standard C++ expressions. That means
+ * you can use the standard comparison operators to compare field
+ * contents with a value and boolean operators to chain everything
+ * together.
+ *
+ * To create a query that simply compares one field with one value:
+ *
+ * @code {.cpp}
+ * cif::condition c = cif::key("id") == 1;
+ * @endcode
+ * 
+ * That will find rows where the ID field contains the number 1. If
+ * using cif::key is a bit too much typing, you can also write:
+ * 
+ * @code{.cpp}
+ * using namespace cif::literals;
+ * 
+ * cif::condition c2 = "id"_key == 1;
+ * @endcode
+ * 
+ * Now if you want both ID = 1 and ID = 2 in the result:
+ * 
+ * @code{.cpp}
+ * auto c3 = "id"_key == 1 or "id"_key == 2;
+ * @endcode
+ * 
+ * There are some special values you can use. To find rows with field that
+ * do not have a value:
+ * 
+ * @code{.cpp}
+ * auto c4 = "type"_key == cif::null;
+ * @endcode 
+ * 
+ * Of if it should not be NULL:
+ * 
+ * @code{.cpp}
+ * auto c5 = "type"_key != cif::null;
+ * @endcode 
+ * 
+ * There's even a way to find all records:
+ * 
+ * @code{.cpp}
+ * auto c6 = cif::all;
+ * @endcode
+ * 
+ * And when you want to search for any column containing the value 'foo':
+ * 
+ * @code{.cpp}
+ * auto c7 = cif::any == "foo";
+ * @endcode 
+ * 
+ * All these conditions can be chained together again:
+ * 
+ * @code{.cpp}
+ * auto c8 = std::move(c3) and std::move(c5);
+ * @endcode
+ */
+
 namespace cif
 {

 // --------------------------------------------------------------------
-// let's make life easier
+/// let's make life easier, since @ref cif::category is not known yet,
+/// we declare a function to access its contents

+/**
+ * @brief Get the fields that can be used as key in conditions for a category
+ * 
+ * @param cat The category whose fields to return
+ * @return iset The set of key field names
+ */
 iset get_category_fields(const category &cat);
+
+/**
+ * @brief Get the column index for column @a col in category @a cat
+ * 
+ * @param cat The category
+ * @param col The name of the column
+ * @return uint16_t The index
+ */
 uint16_t get_column_ix(const category &cat, std::string_view col);
+
+/**
+ * @brief Return whether the column @a col in category @a cat has a primitive type of *uchar*
+ * 
+ * @param cat The category
+ * @param col The column name
+ * @return true If the primitive type is of type *uchar*
+ * @return false If the primitive type is not of type *uchar*
+ */
 bool is_column_type_uchar(const category &cat, std::string_view col);

 // --------------------------------------------------------------------
@@ -58,7 +144,7 @@ namespace detail
 		virtual void str(std::ostream &) const = 0;
 		virtual std::optional<row_handle> single() const { return {}; };

-		virtual bool equals(const condition_impl *rhs) const { return false; }
+		virtual bool equals([[maybe_unused]] const condition_impl *rhs) const { return false; }
 	};

 	struct all_condition_impl : public condition_impl
@@ -72,16 +158,32 @@ namespace detail
 	struct not_condition_impl;
 } // namespace detail

+/**
+ * @brief The interface class for conditions. This uses the bridge pattern,
+ * which means the implementation is in the member m_impl
+ */
 class condition
 {
  public:
-	using condition_impl = detail::condition_impl;

+	/** @cond */
+	using condition_impl = detail::condition_impl;
+	/** @endcond */
+
+	/**
+	 * @brief Construct a new, empty condition object
+	 * 
+	 */
 	condition()
 		: m_impl(nullptr)
 	{
 	}

+	/**
+	 * @brief Construct a new condition object with implementation @a impl
+	 * 
+	 * @param impl The implementation to use
+	 */
 	explicit condition(condition_impl *impl)
 		: m_impl(impl)
 	{
@@ -89,6 +191,9 @@ class condition

 	condition(const condition &) = delete;

+	/**
+	 * @brief Construct a new condition object moving the data from @a rhs
+	 */
 	condition(condition &&rhs) noexcept
 		: m_impl(nullptr)
 	{
@@ -97,6 +202,9 @@ class condition

 	condition &operator=(const condition &) = delete;

+	/**
+	 * @brief Assignment operator moving the data from @a rhs
+	 */
 	condition &operator=(condition &&rhs) noexcept
 	{
 		std::swap(m_impl, rhs.m_impl);
@@ -109,8 +217,22 @@ class condition
 		m_impl = nullptr;
 	}

+	/**
+	 * @brief Prepare the condition to be used on category @a c. This will
+	 * take care of setting the correct indices for fields e.g.
+	 * 
+	 * @param c The category this query should act upon
+	 */
 	void prepare(const category &c);

+	/**
+	 * @brief This operator returns true if the row referenced by @a r is 
+	 * a match for this condition.
+	 * 
+	 * @param r The reference to a row.
+	 * @return true If there is a match
+	 * @return false If there is no match
+	 */
 	bool operator()(row_handle r) const
 	{
 		assert(this->m_impl != nullptr);
@@ -118,27 +240,53 @@ class condition
 		return m_impl ? m_impl->test(r) : false;
 	}

+	/**
+	 * @brief Return true if the condition is not empty
+	 */
 	explicit operator bool() { return not empty(); }
+
+	/**
+	 * @brief Return true if the condition is empty, has no condition
+	 */
 	bool empty() const { return m_impl == nullptr; }

+	/**
+	 * @brief If the prepare step found out there is only one hit
+	 * this single hit can be returned by this method.
+	 * 
+	 * @return std::optional<row_handle> The result will contain
+	 * a row reference if there is a single hit, it will be empty otherwise
+	 */
 	std::optional<row_handle> single() const
 	{
 		return m_impl ? m_impl->single() : std::optional<row_handle>();
 	}

-	friend condition operator||(condition &&a, condition &&b);
-	friend condition operator&&(condition &&a, condition &&b);
+	friend condition operator||(condition &&a, condition &&b); /**< Return a condition which is the logical OR or condition @a and @b */
+	friend condition operator&&(condition &&a, condition &&b); /**< Return a condition which is the logical AND or condition @a and @b */

+	/// @cond
 	friend struct detail::or_condition_impl;
 	friend struct detail::and_condition_impl;
 	friend struct detail::not_condition_impl;
+	/// @endcond

+	/**
+	 * @brief Swap two conditions
+	 */
 	void swap(condition &rhs)
 	{
 		std::swap(m_impl, rhs.m_impl);
 		std::swap(m_prepared, rhs.m_prepared);
 	}

+	/**
+	 * @brief Operator to use to write out a condition to @a os, for debugging purposes
+	 * 
+	 * @param os The std::ostream to write to
+	 * @param cond The condition to write
+	 * @return std::ostream& The same as @a os
+	 */
 	friend std::ostream &operator<<(std::ostream &os, const condition &cond)
 	{
 		if (cond.m_impl)
@@ -691,6 +839,9 @@ namespace detail

 } // namespace detail

+/**
+ * @brief Create a condition containing the logical AND of conditions @a a and @a b
+ */
 inline condition operator and(condition &&a, condition &&b)
 {
 	if (a.m_impl and b.m_impl)
@@ -700,6 +851,9 @@ inline condition operator and(condition &&a, condition &&b)
 	return condition(std::move(b));
 }

+/**
+ * @brief Create a condition containing the logical OR of conditions @a a and @a b
+ */
 inline condition operator or(condition &&a, condition &&b)
 {
 	if (a.m_impl and b.m_impl)
@@ -732,21 +886,49 @@ inline condition operator or(condition &&a, condition &&b)
 	return condition(std::move(b));
 }

+/**
+ * @brief A helper class to make it possible to search for empty fields (NULL)
+ * 
+ * @code{.cpp}
+ * "id"_key == cif::empty_type();
+ * @endcode
+ */
+
 struct empty_type
 {
 };

-/// \brief A helper to make it possible to have conditions like ("id"_key == cif::null)
+/**
+ * @brief A helper to make it possible to have conditions like
+ * 
+ * @code{.cpp}
+ * "id"_key == cif::null;
+ * @endcode
+ */

 inline constexpr empty_type null = empty_type();

+/**
+ * @brief Class to use in creating conditions, creates a reference to a field or column
+ * 
+ */
 struct key
 {
+	/**
+	 * @brief Construct a new key object using @a itemTag as name
+	 * 
+	 * @param itemTag 
+	 */
 	explicit key(const std::string &itemTag)
 		: m_item_tag(itemTag)
 	{
 	}

+	/**
+	 * @brief Construct a new key object using @a itemTag as name
+	 * 
+	 * @param itemTag 
+	 */
 	explicit key(const char *itemTag)
 		: m_item_tag(itemTag)
 	{
@@ -755,44 +937,49 @@ struct key
 	key(const key &) = delete;
 	key &operator=(const key &) = delete;

-	std::string m_item_tag;
+	std::string m_item_tag; ///< The column name
 };

+/**
+ * @brief Operator to create an equals condition based on a key @a key and a value @a v
+ */
 template <typename T>
 condition operator==(const key &key, const T &v)
 {
 	return condition(new detail::key_equals_condition_impl({ key.m_item_tag, v }));
 }

-inline condition operator==(const key &key, const char *value)
+/**
+ * @brief Operator to create an equals condition based on a key @a key and a value @a value
+ */
+inline condition operator==(const key &key, std::string_view value)
 {
-	if (value != nullptr and *value != 0)
+	if (not value.empty())
 		return condition(new detail::key_equals_condition_impl({ key.m_item_tag, value }));
 	else
 		return condition(new detail::key_is_empty_condition_impl(key.m_item_tag));
 }

-// inline condition_t operator==(const key& key, const detail::ItemReference& v)
-// {
-// 	if (v.empty())
-// 		return condition_t(new detail::key_is_empty_condition_impl(key.m_item_tag));
-// 	else
-// 		return condition_t(new detail::key_compare_condition_impl(key.m_item_tag, [tag = key.m_item_tag, v](const category& c, const row& r, bool icase)
-// 			{ return r[tag].template compare<(v, icase) == 0; }));
-// }
-
+/**
+ * @brief Operator to create a not equals condition based on a key @a key and a value @a v
+ */
 template <typename T>
 condition operator!=(const key &key, const T &v)
 {
 	return condition(new detail::not_condition_impl(operator==(key, v)));
 }

-inline condition operator!=(const key &key, const char *v)
+/**
+ * @brief Operator to create a not equals condition based on a key @a key and a value @a value
+ */
+inline condition operator!=(const key &key, std::string_view value)
 {
-	std::string value(v ? v : "");
 	return condition(new detail::not_condition_impl(operator==(key, value)));
 }

+/**
+ * @brief Operator to create a greater than condition based on a key @a key and a value @a v
+ */
 template <typename T>
 condition operator>(const key &key, const T &v)
 {
@@ -805,6 +992,9 @@ condition operator>(const key &key, const T &v)
 		s.str()));
 }

+/**
+ * @brief Operator to create a greater than or equals condition based on a key @a key and a value @a v
+ */
 template <typename T>
 condition operator>=(const key &key, const T &v)
 {
@@ -817,6 +1007,9 @@ condition operator>=(const key &key, const T &v)
 		s.str()));
 }

+/**
+ * @brief Operator to create a less than condition based on a key @a key and a value @a v
+ */
 template <typename T>
 condition operator<(const key &key, const T &v)
 {
@@ -829,6 +1022,9 @@ condition operator<(const key &key, const T &v)
 		s.str()));
 }

+/**
+ * @brief Operator to create a less than or equals condition based on a key @a key and a value @a v
+ */
 template <typename T>
 condition operator<=(const key &key, const T &v)
 {
@@ -841,43 +1037,82 @@ condition operator<=(const key &key, const T &v)
 		s.str()));
 }

+/**
+ * @brief Operator to create a condition based on a key @a key and a regular expression @a rx
+ */
 inline condition operator==(const key &key, const std::regex &rx)
 {
 	return condition(new detail::key_matches_condition_impl(key.m_item_tag, rx));
 }

+/**
+ * @brief Operator to create a condition based on a key @a key which should be empty/null
+ */
 inline condition operator==(const key &key, const empty_type &)
 {
 	return condition(new detail::key_is_empty_condition_impl(key.m_item_tag));
 }

+/**
+ * @brief Operator to create a condition based on a key @a key which should be not empty/null
+ */
 inline condition operator!=(const key &key, const empty_type &)
 {
 	return condition(new detail::key_is_not_empty_condition_impl(key.m_item_tag));
 }

+/**
+ * @brief Create a condition to search any column for a value @a v if @a v contains a value
+ * compare to null if not.
+ */
+template <typename T>
+condition operator==(const key &key, const std::optional<T> &v)
+{
+	if (v.has_value())
+		return condition(new detail::key_equals_condition_impl({ key.m_item_tag, *v }));
+	else
+		return condition(new detail::key_is_empty_condition_impl(key.m_item_tag));
+}
+
+/**
+ * @brief Operator to create a boolean opposite of the condition in @a rhs
+ */
 inline condition operator not(condition &&rhs)
 {
 	return condition(new detail::not_condition_impl(std::move(rhs)));
 }

+/** @cond */
 struct any_type
 {
 };
+/** @endcond */

+/**
+ * @brief A helper for any field constructs
+ */
 inline constexpr any_type any = any_type{};

+/**
+ * @brief Create a condition to search any column for a value @a v
+ */
 template <typename T>
 condition operator==(const any_type &, const T &v)
 {
 	return condition(new detail::any_is_condition_impl<T>(v));
 }

+/**
+ * @brief Create a condition to search any column for a regular expression @a rx
+ */
 inline condition operator==(const any_type &, const std::regex &rx)
 {
 	return condition(new detail::any_matches_condition_impl(rx));
 }

+/**
+ * @brief Create a condition to return all rows
+ */
 inline condition all()
 {
 	return condition(new detail::all_condition_impl());
@@ -885,6 +1120,13 @@ inline condition all()

 namespace literals
 {
+	/**
+	 * @brief Return a cif::key for the column name @a text
+	 * 
+	 * @param text The name of the column
+	 * @param length The length of @a text
+	 * @return key The cif::key created
+	 */
 	inline key operator""_key(const char *text, size_t length)
 	{
 		return key(std::string(text, length));
--- a/include/cif++/datablock.hpp
+++ b/include/cif++/datablock.hpp
@@ -26,60 +26,164 @@

 #pragma once

-#include <cif++/category.hpp>
-#include <cif++/forward_decl.hpp>
+#include "cif++/category.hpp"
+#include "cif++/forward_decl.hpp"
+
+/** \file datablock.hpp
+ * Each valid mmCIF file contains at least one @ref cif::datablock.
+ * A datablock has a name and can contain one or more @ref cif::category "categories"
+ */

 namespace cif
 {

 // --------------------------------------------------------------------

+/**
+ * @brief A datablock is a list of category objects with some additional features
+ * 
+ */
+
 class datablock : public std::list<category>
 {
  public:
 	datablock() = default;

+	/**
+	 * @brief Construct a new datablock object with name @a name
+	 * 
+	 * @param name The name for the new datablock
+	 */
 	datablock(std::string_view name)
 		: m_name(name)
 	{
 	}

+	/** @cond */
 	datablock(const datablock &);
 	datablock(datablock &&) = default;

 	datablock &operator=(const datablock &);
 	datablock &operator=(datablock &&) = default;
+	/** @endcond */

 	// --------------------------------------------------------------------

+	/**
+	 * @brief Return the name of this datablock
+	 */
 	const std::string &name() const { return m_name; }

+	/**
+	 * @brief Set the name of this datablock to @a name
+	 * 
+	 * @param name The new name
+	 */
 	void set_name(std::string_view name)
 	{
 		m_name = name;
 	}

+	/**
+	 * @brief Set the validator object to @a v
+	 * 
+	 * @param v The new validator object, may be null
+	 */
 	void set_validator(const validator *v);

+	/**
+	 * @brief Get the validator object
+	 * 
+	 * @return const validator* The validator or nullptr if there is none
+	 */
 	const validator *get_validator() const;

+	/**
+	 * @brief Validates the content of this datablock and all its content
+	 * 
+	 * @return true If the content is valid
+	 * @return false If the content is not valid
+	 */
 	bool is_valid() const;
+
+	/**
+	 * @brief Validates all contained data for valid links between parents and children
+	 * as defined in the validator
+	 * 
+	 * @return true If all links are valid
+	 * @return false If all links are not valid
+	 */
 	bool validate_links() const;

 	// --------------------------------------------------------------------

+	/**
+	 * @brief Return the category named @a name, will create a new and empty
+	 * category named @a name if it does not exist.
+	 * 
+	 * @param name The name of the category to return
+	 * @return category& Reference to the named category
+	 */
 	category &operator[](std::string_view name);
+
+	/**
+	 * @brief Return the const category named @a name, will return a reference
+	 * to a static empty category if it was not found.
+	 * 
+	 * @param name The name of the category to return
+	 * @return category& Reference to the named category
+	 */
 	const category &operator[](std::string_view name) const;

+	/**
+	 * @brief Return a pointer to the category named @a name or nullptr if
+	 * it does not exist.
+	 * 
+	 * @param name The name of the category
+	 * @return category* Pointer to the category found or nullptr
+	 */
 	category *get(std::string_view name);
+
+	/**
+	 * @brief Return a pointer to the category named @a name or nullptr if
+	 * it does not exist.
+	 * 
+	 * @param name The name of the category
+	 * @return category* Pointer to the category found or nullptr
+	 */
 	const category *get(std::string_view name) const;

+	/**
+	 * @brief Tries to find a category with name @a name and will create a
+	 * new one if it is not found. The result is a tuple of an iterator
+	 * pointing to the category and a boolean indicating whether the category
+	 * was created or not.
+	 * 
+	 * @param name The name for the category
+	 * @return std::tuple<iterator, bool> A tuple containing an iterator pointing
+	 * at the category and a boolean indicating whether the category was newly
+	 * created.
+	 */
 	std::tuple<iterator, bool> emplace(std::string_view name);

+	/**
+	 * @brief Get the preferred order of the categories when writing them
+	 */
 	std::vector<std::string> get_tag_order() const;
+
+	/**
+	 * @brief Write out the contents to @a os
+	 */
 	void write(std::ostream &os) const;
+
+	/**
+	 * @brief Write out the contents to @a os using the order defined in @a tag_order
+	 */
 	void write(std::ostream &os, const std::vector<std::string> &tag_order);

+	/**
+	 * @brief Friend operator<< to write datablock @a db to std::ostream @a os
+	 */
 	friend std::ostream &operator<<(std::ostream &os, const datablock &db)
 	{
 		db.write(os);
@@ -88,6 +192,9 @@ class datablock : public std::list<category>

 	// --------------------------------------------------------------------

+	/**
+	 * @brief Comparison operator to compare two datablock for equal content
+	 */
 	bool operator==(const datablock &rhs) const;

  private:
--- a/include/cif++/dictionary_parser.hpp
+++ b/include/cif++/dictionary_parser.hpp
@@ -26,11 +26,25 @@

 #pragma once

-#include <cif++/validate.hpp>
+#include "cif++/validate.hpp"
+
+/**
+ * @file validate.hpp
+ * 
+ * Functions to create and manipulate validator objects 
+ */

 namespace cif
 {

+/**
+ * @brief Parse the contents of @a is and create a new validator object with name @a name
+ */
 validator parse_dictionary(std::string_view name, std::istream &is);

+/**
+ * @brief Extend the definitions in validator @a v with the contents of stream @a is
+ */
+void extend_dictionary(validator &v, std::istream &is);
+
 } // namespace cif
--- a/include/cif++/file.hpp
+++ b/include/cif++/file.hpp
@@ -28,30 +28,63 @@

 #include <list>

-#include <cif++/exports.hpp>
-#include <cif++/datablock.hpp>
-#include <cif++/parser.hpp>
+#include "cif++/datablock.hpp"
+#include "cif++/parser.hpp"
+
+/** \file file.hpp
+ * 
+ * The file class defined here encapsulates the contents of an mmCIF file
+ * It is mainly a list of @ref cif::datablock objects
+ * 
+ * The class file has methods to load dictionaries. These dictionaries are
+ * loaded from resources (if available) or from disk from several locations.
+ * 
+ * See the documentation on load_resource() in file: utilities.hpp for more
+ * information on how data is loaded. 
+ */

 namespace cif
 {

 // --------------------------------------------------------------------

+/**
+ * @brief The class file is actually a list of datablock objects
+ * 
+ */
+
 class file : public std::list<datablock>
 {
  public:
 	file() = default;

+	/**
+	 * @brief Construct a new file object using the data in the file @a p as content
+	 * 
+	 * @param p Path to a file containing the data to load
+	 */
 	explicit file(const std::filesystem::path &p)
 	{
 		load(p);
 	}

+	/**
+	 * @brief Construct a new file object using the data in the std::istream @a is
+	 * 
+	 * @param is The istream containing the data to load
+	 */
 	explicit file(std::istream &is)
 	{
 		load(is);
 	}

+	/**
+	 * @brief Construct a new file object with data in the constant string defined
+	 * by @a data and @a length
+	 * 
+	 * @param data The pointer to the character string with data to load
+	 * @param length The length of the data
+	 */
 	explicit file(const char *data, size_t length)
 	{
 		struct membuf : public std::streambuf
@@ -66,50 +99,136 @@ class file : public std::list<datablock>
 		load(is);
 	}

+	/** @cond */
 	file(const file &) = default;
 	file(file &&) = default;
 	file &operator=(const file &) = default;
 	file &operator=(file &&) = default;
+	/** @endcond */

+	/**
+	 * @brief Set the validator object to @a v
+	 */
 	void set_validator(const validator *v);

+	/**
+	 * @brief Get the validator object
+	 */
 	const validator *get_validator() const
 	{
 		return m_validator;
 	}

+	/**
+	 * @brief Validate the content and return true if everything was valid.
+	 * 
+	 * Will throw an exception if there is no validator defined.
+	 * 
+	 * If each category was valid, validate_links will also be called.
+	 * 
+	 * @return true If the content is valid
+	 * @return false If the content is not valid
+	 */
 	bool is_valid() const;
+
+	/**
+	 * @brief Validate the content and return true if everything was valid.
+	 * 
+	 * Will attempt to load the referenced dictionary if none was specified.
+	 * 
+	 * If each category was valid, validate_links will also be called.
+	 * 
+	 * @return true If the content is valid
+	 * @return false If the content is not valid
+	 */
 	bool is_valid();
+
+	/**
+	 * @brief Validate the links for all datablocks contained.
+	 * 
+	 * Will throw an exception if no validator was specified.
+	 * 
+	 * @return true If all links were valid
+	 * @return false If all links were not valid
+	 */
 	bool validate_links() const;

+	/**
+	 * @brief Attempt to load a dictionary (validator) based on
+	 * the contents of the *audit_conform* category, if available.
+	 */
 	void load_dictionary();
+
+
+	/**
+	 * @brief Attempt to load the named dictionary @a name and
+	 * create a validator based on it.
+	 * 
+	 * @param name The name of the dictionary to load
+	 */
 	void load_dictionary(std::string_view name);

+	/**
+	 * @brief Return true if a datablock with the name @a name is part of this file
+	 */
 	bool contains(std::string_view name) const;

+	/**
+	 * @brief return a reference to the first datablock in the file
+	 */
 	datablock &front()
 	{
 		assert(not empty());
 		return std::list<datablock>::front();
 	}

+	/**
+	 * @brief return a const reference to the first datablock in the file
+	 */
 	const datablock &front() const
 	{
 		assert(not empty());
 		return std::list<datablock>::front();
 	}

+	/**
+	 * @brief return a reference to the datablock named @a name
+	 */
 	datablock &operator[](std::string_view name);
+
+	/**
+	 * @brief return a const reference to the datablock named @a name
+	 */
 	const datablock &operator[](std::string_view name) const;

+	/**
+	 * @brief Tries to find a datablock with name @a name and will create a
+	 * new one if it is not found. The result is a tuple of an iterator
+	 * pointing to the datablock and a boolean indicating whether the datablock
+	 * was created or not.
+	 * 
+	 * @param name The name for the datablock
+	 * @return std::tuple<iterator, bool> A tuple containing an iterator pointing
+	 * at the datablock and a boolean indicating whether the datablock was newly
+	 * created.
+	 */
 	std::tuple<iterator, bool> emplace(std::string_view name);

+	/** Load the data from the file specified by @a p */
 	void load(const std::filesystem::path &p);
+
+	/** Load the data from @a is */
 	void load(std::istream &is);

+	/** Save the data to the file specified by @a p */
 	void save(const std::filesystem::path &p) const;
+
+	/** Save the data to @a is */
 	void save(std::ostream &os) const;

+	/**
+	 * @brief Friend operator<< to write file @a f to std::ostream @a os
+	 */
 	friend std::ostream &operator<<(std::ostream &os, const file &f)
 	{
 		f.save(os);
--- a/include/cif++/format.hpp
+++ b/include/cif++/format.hpp
@@ -28,10 +28,12 @@

 #include <string>

-/// \file format.hpp
-/// File containing a basic reimplementation of boost::format
-/// but then a bit more simplistic. Still this allowed me to move my code
-/// from using boost::format to something without external dependency easily.
+/**  \file format.hpp
+ * 
+ * File containing a basic reimplementation of boost::format
+ * but then a bit more simplistic. Still this allowed me to move my code
+ * from using boost::format to something without external dependency easily.
+ */

 namespace cif
 {
@@ -85,6 +87,8 @@ namespace detail

 } // namespace

+/** @cond */
+
 template <typename... Args>
 class format_plus_arg
 {
@@ -132,6 +136,23 @@ class format_plus_arg
 	vargs_vector_type m_vargs;
 };

+/** @endcond */
+
+/**
+ * @brief A simplistic reimplementation of boost::format, in fact it is
+ * actually a way to call the C function snprintf to format the arguments
+ * in @a args into the format string @a fmt
+ * 
+ * The string in @a fmt should thus be a C style format string.
+ * 
+ * TODO: Move to C++23 style of printing.
+ * 
+ * @tparam Args The types of the arguments
+ * @param fmt The format string
+ * @param args The arguments
+ * @return An object that can be written out to a std::ostream using operator<<
+ */
+
 template <typename... Args>
 constexpr auto format(std::string_view fmt, Args... args)
 {
@@ -144,11 +165,20 @@ constexpr auto format(std::string_view fmt, Args... args)
 class fill_out_streambuf : public std::streambuf
 {
  public:
+
+	/** @cond */
+
 	using base_type = std::streambuf;
 	using int_type = base_type::int_type;
 	using char_type = base_type::char_type;
 	using traits_type = base_type::traits_type;

+	/** @endcond */
+
+	/**
+	 * @brief Construct a new fill out streambuf object based on ostream @a os and a
+	 * width to fill out to of @a width
+	 */
 	fill_out_streambuf(std::ostream &os, int width = 80)
 		: m_os(os)
 		, m_upstream(os.rdbuf())
@@ -156,11 +186,21 @@ class fill_out_streambuf : public std::streambuf
 	{
 	}

+	/** @cond */
+
 	~fill_out_streambuf()
 	{
 		m_os.rdbuf(m_upstream);
 	}

+	/** @endcond */
+
+	/**
+	 * @brief The magic happens here. Write out a couple of spaces when
+	 * the last character to write is a newline to make the line as
+	 * wide as the requested width.
+	 */
+	
 	virtual int_type
 	overflow(int_type ic = traits_type::eof())
 	{
@@ -191,8 +231,10 @@ class fill_out_streambuf : public std::streambuf
 		return result;
 	}

+	/** Return the upstream streambuf */
 	std::streambuf *get_upstream() const { return m_upstream; }

+	/** Return how many lines have been written */
 	int get_line_count() const { return m_line_count; }

  private:
--- a/include/cif++/forward_decl.hpp
+++ b/include/cif++/forward_decl.hpp
@@ -26,11 +26,16 @@

 #pragma once

-#include <cif++/exports.hpp>
-
 #include <string>
 #include <vector>

+/**
+ * @file forward_decl.hpp
+ * 
+ * File containing only forward declarations
+ * 
+ */
+
 namespace cif
 {

--- a/include/cif++/gzio.hpp
+++ b/include/cif++/gzio.hpp
@@ -13,27 +13,29 @@

 #include <zlib.h>

-/// \file gzio.hpp
-///
-/// Single header file for the implementation of stream classes
-/// that can transparently read and write compressed files.
-///
-/// The gzio::istream_buf class sniffs the input and decides whether to use
-/// a decompressor if a signature was recognized.
-///
-/// There's also an ifstream and ofstream class here that can
-/// read and write compressed files. In this case the decission
-/// whether to use a compressions/decompression algorithm is
-/// based on the extension of the \a filename argument.
-
-// This is a stripped down version of the gxrio library from
-// https://github.com/mhekkel/gxrio.git
-// Most notably, the lzma support has been removed since getting
-// that to work in Windows proved to be too much work.
+/** \file gzio.hpp
+ * 
+ * Single header file for the implementation of stream classes
+ * that can transparently read and write compressed files.
+ * 
+ * The gzio::istream_buf class sniffs the input and decides whether to use
+ * a decompressor if a signature was recognized.
+ * 
+ * There's also an ifstream and ofstream class here that can
+ * read and write compressed files. In this case the decission
+ * whether to use a compressions/decompression algorithm is
+ * based on the extension of the \a filename argument.
+ *
+ * This is a stripped down version of the gxrio library from
+ * https://github.com/mhekkel/gxrio.git
+ * Most notably, the lzma support has been removed since getting
+ * that to work in Windows proved to be too much work.
+ */

 namespace cif::gzio
 {

+/** The default buffer size to use */
 const size_t kDefaultBufferSize = 256;

 // --------------------------------------------------------------------
@@ -50,6 +52,8 @@ template <typename CharT, typename Traits>
 class basic_streambuf : public std::basic_streambuf<CharT, Traits>
 {
  public:
+	/** @cond */
+
 	using char_type = CharT;
 	using traits_type = Traits;

@@ -77,14 +81,21 @@ class basic_streambuf : public std::basic_streambuf<CharT, Traits>
 		return *this;
 	}

+	/** @endcond */
+
+	/** Set the upstream streambuf to @a upstream */
 	void set_upstream(streambuf_type *upstream)
 	{
 		m_upstream = upstream;
 	}

+	/** @cond */
+
 	virtual basic_streambuf *init(streambuf_type *sb) = 0;
 	virtual basic_streambuf *close() = 0;

+	/** @endcond */
+
  protected:
 	/// \brief The upstream streambuf object, usually this is a basic_filebuf
 	streambuf_type *m_upstream = nullptr;
@@ -105,6 +116,8 @@ template <typename CharT, typename Traits, size_t BufferSize = kDefaultBufferSiz
 class basic_igzip_streambuf : public basic_streambuf<CharT, Traits>
 {
  public:
+	/** @cond */
+
 	static_assert(sizeof(CharT) == 1, "Unfortunately, support for wide characters is not implemented yet.");

 	using char_type = CharT;
@@ -121,6 +134,8 @@ class basic_igzip_streambuf : public basic_streambuf<CharT, Traits>

 	basic_igzip_streambuf(const basic_igzip_streambuf &) = delete;

+	/** @endcond */
+
 	/// \brief Move constructor
 	basic_igzip_streambuf(basic_igzip_streambuf &&rhs)
 		: base_type(std::move(rhs))
@@ -141,6 +156,8 @@ class basic_igzip_streambuf : public basic_streambuf<CharT, Traits>
 		}
 	}

+	/** @cond */
+
 	basic_igzip_streambuf &operator=(const basic_igzip_streambuf &) = delete;

 	/// \brief Move operator= implementation
@@ -171,6 +188,8 @@ class basic_igzip_streambuf : public basic_streambuf<CharT, Traits>
 		close();
 	}

+	/** @endcond */
+
 	/// \brief This closes the zlib stream and sets the get pointers to null.
 	base_type *close() override
 	{
@@ -246,10 +265,13 @@ class basic_igzip_streambuf : public basic_streambuf<CharT, Traits>
 					zstream.avail_in = static_cast<uInt>(this->m_upstream->sgetn(m_in_buffer.data(), m_in_buffer.size()));
 				}

+				if (zstream.avail_in == 0)
+					break;
+
 				int err = ::inflate(&zstream, Z_SYNC_FLUSH);
 				std::streamsize n = kBufferByteSize - zstream.avail_out;

-				if (err == Z_STREAM_END or (err == Z_OK and n > 0))
+				if (n > 0)
 				{
 					this->setg(
 						m_out_buffer.data(),
@@ -258,6 +280,9 @@ class basic_igzip_streambuf : public basic_streambuf<CharT, Traits>
 					break;
 				}

+				if (err == Z_STREAM_END and zstream.avail_in > 0)
+					err = ::inflateReset2(&zstream, 47);
+
 				if (err < Z_OK)
 					break;
 			}
@@ -296,6 +321,8 @@ template <typename CharT, typename Traits, size_t BufferSize = kDefaultBufferSiz
 class basic_ogzip_streambuf : public basic_streambuf<CharT, Traits>
 {
  public:
+	/** @cond */
+
 	static_assert(sizeof(CharT) == 1, "Unfortunately, support for wide characters is not implemented yet.");

 	using char_type = CharT;
@@ -326,6 +353,8 @@ class basic_ogzip_streambuf : public basic_streambuf<CharT, Traits>

 	basic_ogzip_streambuf &operator=(const basic_ogzip_streambuf &) = delete;

+	/** @endcond */
+
 	/// \brief Move operator=
 	basic_ogzip_streambuf &operator=(basic_ogzip_streambuf &&rhs)
 	{
@@ -481,6 +510,8 @@ template <typename CharT, typename Traits>
 class basic_istream : public std::basic_istream<CharT, Traits>
 {
  public:
+	/** @cond */
+
 	using base_type = std::basic_istream<CharT, Traits>;

 	using traits_type = Traits;
@@ -492,6 +523,8 @@ class basic_istream : public std::basic_istream<CharT, Traits>

 	using gzip_streambuf_type = basic_igzip_streambuf<char_type, traits_type>;

+	/** @endcond */
+
 	/// \brief Regular move constructor
 	basic_istream(basic_istream &&rhs)
 		: base_type(std::move(rhs))
@@ -585,6 +618,8 @@ template <typename CharT, typename Traits>
 class basic_ifstream : public basic_istream<CharT, Traits>
 {
  public:
+	/** @cond */
+
 	using base_type = basic_istream<CharT, Traits>;

 	using char_type = CharT;
@@ -602,6 +637,8 @@ class basic_ifstream : public basic_istream<CharT, Traits>
 		close();
 	}

+	/** @endcond */
+
 	/// \brief Construct an ifstream
 	/// \param filename Null terminated string specifying the file to open
 	/// \param mode The mode in which to open the file
@@ -641,10 +678,13 @@ class basic_ifstream : public basic_istream<CharT, Traits>
 			this->rdbuf(&m_filebuf);
 	}

+	/** @cond */
 	basic_ifstream(const basic_ifstream &) = delete;

 	basic_ifstream &operator=(const basic_ifstream &) = delete;

+	/** @endcond */
+
 	/// \brief Move version of operator=
 	basic_ifstream &operator=(basic_ifstream &&rhs)
 	{
@@ -771,6 +811,8 @@ template <typename CharT, typename Traits>
 class basic_ostream : public std::basic_ostream<CharT, Traits>
 {
  public:
+	/** @cond */
+
 	using base_type = std::basic_ostream<CharT, Traits>;

 	using char_type = CharT;
@@ -779,6 +821,8 @@ class basic_ostream : public std::basic_ostream<CharT, Traits>
 	using z_streambuf_type = basic_streambuf<char_type, traits_type>;
 	using upstreambuf_type = std::basic_streambuf<char_type, traits_type>;

+	/** @endcond */
+
 	/// \brief Regular move constructor
 	basic_ostream(basic_ostream &&rhs)
 		: base_type(std::move(rhs))
@@ -840,6 +884,8 @@ template <typename CharT, typename Traits>
 class basic_ofstream : public basic_ostream<CharT, Traits>
 {
  public:
+	/** @cond */
+
 	using base_type = basic_ostream<CharT, Traits>;

 	using char_type = CharT;
@@ -855,6 +901,8 @@ class basic_ofstream : public basic_ostream<CharT, Traits>
 		close();
 	}

+	/** @endcond */
+
 	/// \brief Construct an ofstream
 	/// \param filename Null terminated string specifying the file to open
 	/// \param mode The mode in which to open the file
@@ -893,10 +941,14 @@ class basic_ofstream : public basic_ostream<CharT, Traits>
 			this->rdbuf(&m_filebuf);
 	}

+	/** @cond */
+
 	basic_ofstream(const basic_ofstream &) = delete;

 	basic_ofstream &operator=(const basic_ofstream &) = delete;

+	/** @endcond */
+
 	/// \brief Move operator=
 	basic_ofstream &operator=(basic_ofstream &&rhs)
 	{
@@ -1026,11 +1078,13 @@ class basic_ofstream : public basic_ostream<CharT, Traits>

 // --------------------------------------------------------------------

-/// \brief Convenience typedefs
+/// \brief Convenience typedef for a regular istream
 using istream = basic_istream<char, std::char_traits<char>>;
+
+/// \brief Convenience typedef for a file ifstream
 using ifstream = basic_ifstream<char, std::char_traits<char>>;

-// using ostream = basic_ostream<char, std::char_traits<char>>;
+/// \brief Convenience typedef for a file ofstream
 using ofstream = basic_ofstream<char, std::char_traits<char>>;

 } // namespace gzio
--- a/include/cif++/item.hpp
+++ b/include/cif++/item.hpp
@@ -26,10 +26,10 @@

 #pragma once

-#include <cif++/exports.hpp>
-#include <cif++/forward_decl.hpp>
-#include <cif++/text.hpp>
-#include <cif++/utilities.hpp>
+#include "cif++/exports.hpp"
+#include "cif++/forward_decl.hpp"
+#include "cif++/text.hpp"
+#include "cif++/utilities.hpp"

 #include <cassert>
 #include <charconv>
@@ -41,22 +41,61 @@
 #include <optional>
 #include <utility>

-/// \file item.hpp
-/// This file contains the declaration of item but also the item_value and item_handle
-/// These handle the storage of and access to the data for a single data field. 
+/** \file item.hpp
+ *
+ * This file contains the declaration of item but also the item_value and item_handle
+ * These handle the storage of and access to the data for a single data field.
+ */

 namespace cif
 {

 // --------------------------------------------------------------------
-/// \brief item is a transient class that is used to pass data into rows
-///        but it also takes care of formatting data. 
+/** @brief item is a transient class that is used to pass data into rows
+ * but it also takes care of formatting data.
+ * 
+ * 
+ * 
+ * The class cif::item is often used implicitly when creating a row in a category
+ * using the emplace function.
+ * 
+ * @code{.cpp}
+ * cif::category cat("my-cat");
+ * cat.emplace({
+ *   { "item-1", 1 },                             // <- stores an item with value 1
+ *   { "item-2", 1.0, 2 },                        // <- stores an item with value 1.00
+ *   { "item-3", std::optional<int>() },          // <- stores an item with value ?
+ *   { "item-4", std::make_optional<int>(42) },   // <- stores an item with value 42
+ *   { "item-5" }                                 // <- stores an item with value .
+ * });
+ * 
+ * std::cout << cat << '\n';
+ * @endcode
+ * 
+ * Will result in:
+ * 
+ * @code{.txt}
+ * _my-cat.item-1 1
+ * _my-cat.item-2 1.00
+ * _my-cat.item-3 ?
+ * _my-cat.item-4 42
+ * _my-cat.item-5 .
+ * @endcode
+ */
 class item
 {
  public:
 	/// \brief Default constructor, empty item
 	item() = default;

+	/// \brief constructor for an item with name \a name and as
+	/// content the character '.', i.e. an inapplicable value.
+	item(std::string_view name)
+		: m_name(name)
+		, m_value({ '.' })
+	{
+	}
+
 	/// \brief constructor for an item with name \a name and as
 	/// content a single character string with content \a value
 	item(std::string_view name, char value)
@@ -66,7 +105,7 @@ class item
 	}

 	/// \brief constructor for an item with name \a name and as
-	/// content a the formatted floating point value \a value with
+	/// content the formatted floating point value \a value with
 	/// precision \a precision
 	template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
 	item(std::string_view name, const T &value, int precision)
@@ -108,8 +147,8 @@ class item
 	}

 	/// \brief constructor for an item with name \a name and as
-	/// content a the formatted integral value \a value
-	template <typename T, std::enable_if_t<std::is_integral_v<T> and not std::is_same_v<T,bool>, int> = 0>
+	/// content the formatted integral value \a value
+	template <typename T, std::enable_if_t<std::is_integral_v<T> and not std::is_same_v<T, bool>, int> = 0>
 	item(const std::string_view name, const T &value)
 		: m_name(name)
 	{
@@ -125,8 +164,8 @@ class item
 	}

 	/// \brief constructor for an item with name \a name and as
-	/// content a the formatted boolean value \a value
-	template <typename T, std::enable_if_t<std::is_same_v<T,bool>, int> = 0>
+	/// content the formatted boolean value \a value
+	template <typename T, std::enable_if_t<std::is_same_v<T, bool>, int> = 0>
 	item(const std::string_view name, const T &value)
 		: m_name(name)
 	{
@@ -141,16 +180,46 @@ class item
 	{
 	}

+	/// \brief constructor for an item with name \a name and as
+	/// content the optional value \a value
+	template <typename T>
+	item(const std::string_view name, const std::optional<T> &value)
+		: m_name(name)
+	{
+		if (value.has_value())
+		{
+			item tmp(name, *value);
+			std::swap(tmp.m_value, m_value);
+		}
+		else
+			m_value.assign("?");
+	}
+
+	/// \brief constructor for an item with name \a name and as
+	/// content the formatted floating point value \a value with
+	/// precision \a precision
+	template <typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
+	item(std::string_view name, const std::optional<T> &value, int precision)
+		: m_name(name)
+	{
+		if (value.has_value())
+		{
+			item tmp(name, *value, precision);
+			std::swap(tmp.m_value, m_value);
+		}
+		else
+			m_value.assign("?");
+	}
+
+	/** @cond */
 	item(const item &rhs) = default;
-
 	item(item &&rhs) noexcept = default;
-
 	item &operator=(const item &rhs) = default;
-
 	item &operator=(item &&rhs) noexcept = default;
+	/** @endcond */

-	std::string_view name() const { return m_name; }
-	std::string_view value() const { return m_value; }
+	std::string_view name() const { return m_name; }   ///< Return the name of the item
+	std::string_view value() const { return m_value; } ///< Return the value of the item

 	/// \brief replace the content of the stored value with \a v
 	void value(std::string_view v) { m_value = v; }
@@ -168,11 +237,13 @@ class item
 	size_t length() const { return m_value.length(); }

 	/// \brief support for structured binding
-	template<size_t N>
+	template <size_t N>
 	decltype(auto) get() const
 	{
-		     if constexpr (N == 0) return name();
-		else if constexpr (N == 1) return value();
+		if constexpr (N == 0)
+			return name();
+		else if constexpr (N == 1)
+			return value();
 	}

  private:
@@ -190,7 +261,9 @@ class item

 struct item_value
 {
+	/** @cond */
 	item_value() = default;
+	/** @endcond */

 	/// \brief constructor
 	item_value(std::string_view text)
@@ -210,6 +283,7 @@ struct item_value
 		}
 	}

+	/** @cond */
 	item_value(item_value &&rhs)
 		: m_length(std::exchange(rhs.m_length, 0))
 		, m_storage(std::exchange(rhs.m_storage, 0))
@@ -236,24 +310,29 @@ struct item_value

 	item_value(const item_value &) = delete;
 	item_value &operator=(const item_value &) = delete;
+	/** @endcond */

+	/** operator bool, allows easy checking for empty items */
 	explicit operator bool() const
 	{
 		return m_length != 0;
 	}

-	size_t m_length = 0;
+	size_t m_length = 0; ///< Length of the data
 	union
 	{
-		char m_local_data[8];
-		char *m_data;
-		uint64_t m_storage;
+		char m_local_data[8]; ///< Storage area for small strings (strings smaller than kBufferSize)
+		char *m_data;         ///< Pointer to a string stored in the heap
+		uint64_t m_storage;   ///< Alternative storage of the data, used in move operations
 	};

+	/** The maximum length of locally stored strings */
 	static constexpr size_t kBufferSize = sizeof(m_local_data);

 	// By using std::string_view instead of c_str we obain a
 	// nice performance gain since we avoid many calls to strlen.
+
+	/** Return the content of the item as a std::string_view */
 	constexpr inline std::string_view text() const
 	{
 		return { m_length >= kBufferSize ? m_data : m_local_data, m_length };
@@ -268,10 +347,19 @@ struct item_value
 struct item_handle
 {
  public:
+	/** @cond */
 	// conversion helper class
 	template <typename T, typename = void>
 	struct item_value_as;
+	/** @endcond */

+	/**
+	 * @brief Assign value @a value to the item referenced
+	 *
+	 * @tparam T Type of the value
+	 * @param value The value
+	 * @return reference to this item_handle
+	 */
 	template <typename T>
 	item_handle &operator=(const T &value)
 	{
@@ -280,6 +368,20 @@ struct item_handle
 		return *this;
 	}

+	/**
+	 * @brief A method with a variable number of arguments that will be concatenated and
+	 * assigned as a string. Use it like this:
+	 *
+	 * @code{.cpp}
+	 * cif::item_handle ih;
+	 * is.os("The result of ", 1, " * ", 42, " is of course ", 42);
+	 * @endcode
+	 *
+	 * And the content will then be `The result of 1 * 42 is of course 42`.
+	 *
+	 * @tparam Ts Types of the parameters
+	 * @param v The parameters to concatenate
+	 */
 	template <typename... Ts>
 	void os(const Ts &...v)
 	{
@@ -288,8 +390,10 @@ struct item_handle
 		this->operator=(ss.str());
 	}

+	/** Swap contents of this and @a b */
 	void swap(item_handle &b);

+	/** Return the contents of this item as type @tparam T */
 	template <typename T = std::string>
 	auto as() const -> T
 	{
@@ -297,18 +401,36 @@ struct item_handle
 		return item_value_as<value_type>::convert(*this);
 	}

+	/** Return the contents of this item as type @tparam T or, if not
+	 * set, use @a dv as the default value.
+	 */
 	template <typename T>
 	auto value_or(const T &dv) const
 	{
 		return empty() ? dv : this->as<T>();
 	}

+	/**
+	 * @brief Compare the contents of this item with value @a value
+	 * optionally ignoring character case, if @a icase is true.
+	 * Returns 0 if both are equal, -1 if this sorts before @a value
+	 * and 1 if this sorts after @a value
+	 *
+	 * @tparam T Type of the value @a value
+	 * @param value The value to compare with
+	 * @param icase Flag indicating if we should compare character case sensitive
+	 * @return -1, 0 or 1
+	 */
 	template <typename T>
 	int compare(const T &value, bool icase = true) const
 	{
 		return item_value_as<T>::compare(*this, value, icase);
 	}

+	/**
+	 * @brief Compare the value contained with the value @a value and
+	 * return true if both are equal.
+	 */
 	template <typename T>
 	bool operator==(const T &value) const
 	{
@@ -317,45 +439,64 @@ struct item_handle
 	}

 	// We may not have C++20 yet...
+
+	/**
+	 * @brief Compare the value contained with the value @a value and
+	 * return true if both are not equal.
+	 */
 	template <typename T>
 	bool operator!=(const T &value) const
 	{
 		return not operator==(value);
 	}

-	// empty means either null or unknown
+	/**
+	 * @brief Returns true if the content string is empty or
+	 * only contains '.' meaning null or '?' meaning unknown
+	 * in a mmCIF context
+	 */
 	bool empty() const
 	{
 		auto txt = text();
 		return txt.empty() or (txt.length() == 1 and (txt.front() == '.' or txt.front() == '?'));
 	}

+	/** Easy way to test for an empty item */
 	explicit operator bool() const { return not empty(); }

-	// is_null means the field contains '.'
+	/// is_null return true if the field contains '.'
 	bool is_null() const
 	{
 		auto txt = text();
 		return txt.length() == 1 and txt.front() == '.';
 	}

-	// is_unknown means the field contains '?'
+	/// is_unknown returns true if the field contains '?'
 	bool is_unknown() const
 	{
 		auto txt = text();
 		return txt.length() == 1 and txt.front() == '?';
 	}

+	/** Return a std::string_view for the contents */
 	std::string_view text() const;

+	/**
+	 * @brief Construct a new item handle object
+	 *
+	 * @param column Column index
+	 * @param row Reference to the row
+	 */
 	item_handle(uint16_t column, row_handle &row)
 		: m_column(column)
 		, m_row_handle(row)
 	{
 	}

-	static CIFPP_EXPORT const item_handle s_null_item;
+	/** A variable holding an empty item */
+	CIFPP_EXPORT static const item_handle s_null_item;

+	/** friend to swap two item handles */
 	friend void swap(item_handle a, item_handle b)
 	{
 		a.swap(b);
@@ -372,6 +513,7 @@ struct item_handle

 // So sad that older gcc implementations of from_chars did not support floats yet...

+/** @cond */
 template <typename T>
 struct item_handle::item_value_as<T, std::enable_if_t<std::is_arithmetic_v<T> and not std::is_same_v<T, bool>>>
 {
@@ -385,17 +527,22 @@ struct item_handle::item_value_as<T, std::enable_if_t<std::is_arithmetic_v<T> an
 		{
 			auto txt = ref.text();

-			std::from_chars_result r = selected_charconv<value_type>::from_chars(txt.data(), txt.data() + txt.size(), result);
+			auto b = txt.data();
+			auto e = txt.data() + txt.size();

-			if (r.ec != std::errc())
+			std::from_chars_result r = (b + 1 < e and *b == '+' and std::isdigit(b[1])) ? selected_charconv<value_type>::from_chars(b + 1, e, result) : selected_charconv<value_type>::from_chars(b, e, result);
+
+			if (r.ec != std::errc() or r.ptr != e)
 			{
 				result = {};
 				if (cif::VERBOSE)
 				{
 					if (r.ec == std::errc::invalid_argument)
-						std::cerr << "Attempt to convert " << std::quoted(txt) << " into a number" << std::endl;
+						std::cerr << "Attempt to convert " << std::quoted(txt) << " into a number\n";
 					else if (r.ec == std::errc::result_out_of_range)
-						std::cerr << "Conversion of " << std::quoted(txt) << " into a type that is too small" << std::endl;
+						std::cerr << "Conversion of " << std::quoted(txt) << " into a type that is too small\n";
+					else
+						std::cerr << "Not a valid number " << std::quoted(txt) << '\n';
 				}
 			}
 		}
@@ -415,16 +562,21 @@ struct item_handle::item_value_as<T, std::enable_if_t<std::is_arithmetic_v<T> an
 		{
 			value_type v = {};

-			std::from_chars_result r = selected_charconv<value_type>::from_chars(txt.data(), txt.data() + txt.size(), v);
+			auto b = txt.data();
+			auto e = txt.data() + txt.size();

-			if (r.ec != std::errc())
+			std::from_chars_result r = (b + 1 < e and *b == '+' and std::isdigit(b[1])) ? selected_charconv<value_type>::from_chars(b + 1, e, v) : selected_charconv<value_type>::from_chars(b, e, v);
+
+			if (r.ec != std::errc() or r.ptr != e)
 			{
 				if (cif::VERBOSE)
 				{
 					if (r.ec == std::errc::invalid_argument)
-						std::cerr << "Attempt to convert " << std::quoted(txt) << " into a number" << std::endl;
+						std::cerr << "Attempt to convert " << std::quoted(txt) << " into a number\n";
 					else if (r.ec == std::errc::result_out_of_range)
-						std::cerr << "Conversion of " << std::quoted(txt) << " into a type that is too small" << std::endl;
+						std::cerr << "Conversion of " << std::quoted(txt) << " into a type that is too small\n";
+					else
+						std::cerr << "Not a valid number " << std::quoted(txt) << '\n';
 				}
 				result = 1;
 			}
@@ -546,22 +698,33 @@ struct item_handle::item_value_as<T, std::enable_if_t<std::is_same_v<T, std::str
 	}
 };

+/** @endcond */
+
 } // namespace cif

 namespace std
 {

-template<> struct tuple_size<::cif::item>
-            : public std::integral_constant<std::size_t, 2> {};
+/** @cond */

-template<> struct tuple_element<0, ::cif::item>
+template <>
+struct tuple_size<::cif::item>
+	: public std::integral_constant<std::size_t, 2>
+{
+};
+
+template <>
+struct tuple_element<0, ::cif::item>
 {
 	using type = decltype(std::declval<::cif::item>().name());
 };

-template<> struct tuple_element<1, ::cif::item>
+template <>
+struct tuple_element<1, ::cif::item>
 {
 	using type = decltype(std::declval<::cif::item>().value());
 };

-}
+/** @endcond */
+
+} // namespace std
--- a/include/cif++/iterator.hpp
+++ b/include/cif++/iterator.hpp
@@ -26,26 +26,50 @@

 #pragma once

-#include <cif++/row.hpp>
+#include "cif++/row.hpp"

 #include <array>

+/**
+ * @file iterator.hpp
+ *
+ * This file contains several implementations of generic iterators.
+ *
+ * Using partial specialization we can have implementation for
+ * iterators that return row_handles, a single value or tuples of
+ * multiple values.
+ *
+ */
+
 namespace cif
 {

 // --------------------------------------------------------------------

+/**
+ * @brief Implementation of an iterator that can return
+ * multiple values in a tuple. Of course, that tuple can
+ * then used in structured binding to receive the values
+ * in a for loop e.g.
+ *
+ * @tparam Category The category for this iterator
+ * @tparam Ts The types this iterator can be dereferenced to
+ */
 template <typename Category, typename... Ts>
 class iterator_impl
 {
  public:
+	/** @cond */
 	template <typename, typename...>
 	friend class iterator_impl;

 	friend class category;
+	/** @endcond */

+	/** variable that contains the number of elements in the tuple */
 	static constexpr size_t N = sizeof...(Ts);

+	/** @cond */
 	using category_type = std::remove_cv_t<Category>;
 	using row_type = std::conditional_t<std::is_const_v<Category>, const row, row>;

@@ -152,14 +176,16 @@ class iterator_impl
 		return m_current != rhs.m_current;
 	}

+	/** @endcond */
+
  private:
 	template <size_t... Is>
 	tuple_type get(std::index_sequence<Is...>) const
 	{
 		if (m_current != nullptr)
 		{
-			row_handle rh{*m_category, *m_current};
-			return tuple_type{rh[m_column_ix[Is]].template as<Ts>()...};
+			row_handle rh{ *m_category, *m_current };
+			return tuple_type{ rh[m_column_ix[Is]].template as<Ts>()... };
 		}

 		return {};
@@ -171,10 +197,18 @@ class iterator_impl
 	std::array<uint16_t, N> m_column_ix;
 };

-template<typename Category>
+/**
+ * @brief Implementation of an iterator that returns
+ * only row_handles
+ *
+ * @tparam Category The category for this iterator
+ */
+template <typename Category>
 class iterator_impl<Category>
 {
  public:
+	/** @cond */
+
 	template <typename, typename...>
 	friend class iterator_impl;

@@ -195,7 +229,7 @@ class iterator_impl<Category>
 	template <typename C2>
 	iterator_impl(const iterator_impl<C2> &rhs)
 		: m_category(rhs.m_category)
-		, m_current(const_cast<row_type*>(rhs.m_current))
+		, m_current(const_cast<row_type *>(rhs.m_current))
 	{
 	}

@@ -223,7 +257,7 @@ class iterator_impl<Category>

 	reference operator*()
 	{
-		return {*m_category, *m_current};
+		return { *m_category, *m_current };
 	}

 	pointer operator->()
@@ -271,16 +305,26 @@ class iterator_impl<Category>
 		return m_current != rhs.m_current;
 	}

+	/** @endcond */
+
  private:
 	category_type *m_category = nullptr;
 	row_type *m_current = nullptr;
 };

+/**
+ * @brief Implementation of an iterator that can return
+ * a single value.
+ *
+ * @tparam Category The category for this iterator
+ * @tparam T The type this iterator can be dereferenced to
+ */

-template<typename Category, typename T>
+template <typename Category, typename T>
 class iterator_impl<Category, T>
 {
  public:
+	/** @cond */
 	template <typename, typename...>
 	friend class iterator_impl;

@@ -390,12 +434,14 @@ class iterator_impl<Category, T>
 		return m_current != rhs.m_current;
 	}

+	/** @endcond */
+
  private:
 	value_type get() const
 	{
 		if (m_current != nullptr)
 		{
-			row_handle rh{*m_category, *m_current};
+			row_handle rh{ *m_category, *m_current };
 			return rh[m_column_ix].template as<T>();
 		}

@@ -411,10 +457,23 @@ class iterator_impl<Category, T>
 // --------------------------------------------------------------------
 // iterator proxy

+/**
+ * @brief An iterator_proxy is used as a result type for methods that
+ * return a range of values you want to iterate over.
+ *
+ * E.g. the class cif::category contains the method cif::category::rows()
+ * that returns an iterator_proxy that allows you to iterate over
+ * all the rows in the category.
+ *
+ * @tparam Category The category for the iterators
+ * @tparam Ts The types the iterators return. See class: iterator
+ */
+
 template <typename Category, typename... Ts>
 class iterator_proxy
 {
  public:
+	/** @cond */
 	static constexpr const size_t N = sizeof...(Ts);

 	using category_type = Category;
@@ -431,21 +490,21 @@ class iterator_proxy

 	iterator_proxy(const iterator_proxy &) = delete;
 	iterator_proxy &operator=(const iterator_proxy &) = delete;
+	/** @endcond */

-	iterator begin() const { return iterator(m_begin, m_column_ix); }
-	iterator end() const { return iterator(m_end, m_column_ix); }
+	iterator begin() const { return iterator(m_begin, m_column_ix); } ///< Return the iterator pointing to the first row
+	iterator end() const { return iterator(m_end, m_column_ix); }     ///< Return the iterator pointing past the last row

-	bool empty() const { return m_begin == m_end; }
-
-	explicit operator bool() const { return not empty(); }
-
-	size_t size() const { return std::distance(begin(), end()); }
+	bool empty() const { return m_begin == m_end; }               ///< Return true if the range is empty
+	explicit operator bool() const { return not empty(); }        ///< Easy way to detect if the range is empty
+	size_t size() const { return std::distance(begin(), end()); } ///< Return size of the range

 	// row front() { return *begin(); }
 	// row back() { return *(std::prev(end())); }

-	category_type &category() const { return *m_category; }
+	category_type &category() const { return *m_category; } ///< Return the category the iterator belong to

+	/** swap */
 	void swap(iterator_proxy &rhs)
 	{
 		std::swap(m_category, rhs.m_category);
@@ -463,10 +522,20 @@ class iterator_proxy
 // --------------------------------------------------------------------
 // conditional iterator proxy

+/**
+ * @brief A conditional iterator proxy is similar to an iterator_proxy
+ * in that it can be used to return a range of rows you can iterate over.
+ * In the case of an conditional_iterator_proxy a cif::condition is used
+ * to filter out only those rows that match the condition.
+ *
+ * @tparam CategoryType The category the iterators belong to
+ * @tparam Ts The types to which the iterators can be dereferenced
+ */
 template <typename CategoryType, typename... Ts>
 class conditional_iterator_proxy
 {
  public:
+	/** @cond */
 	static constexpr const size_t N = sizeof...(Ts);

 	using category_type = std::remove_cv_t<CategoryType>;
@@ -507,7 +576,7 @@ class conditional_iterator_proxy
 			{
 				if (++mBegin == mEnd)
 					break;
-
+				
 				if (m_condition->operator()(mBegin))
 					break;
 			}
@@ -549,20 +618,21 @@ class conditional_iterator_proxy
 	conditional_iterator_proxy(const conditional_iterator_proxy &) = delete;
 	conditional_iterator_proxy &operator=(const conditional_iterator_proxy &) = delete;

-	iterator begin() const;
-	iterator end() const;
+	/** @endcond */

-	bool empty() const;
+	iterator begin() const; ///< Return the iterator pointing to the first row
+	iterator end() const;   ///< Return the iterator pointing past the last row

-	explicit operator bool() const { return not empty(); }
+	bool empty() const;                                           ///< Return true if the range is empty
+	explicit operator bool() const { return not empty(); }        ///< Easy way to detect if the range is empty
+	size_t size() const { return std::distance(begin(), end()); } ///< Return size of the range

-	size_t size() const { return std::distance(begin(), end()); }
-
-	row_handle front() { return *begin(); }
+	row_handle front() { return *begin(); } ///< Return reference to the first row
 	// row_handle back() { return *begin(); }

-	CategoryType &category() const { return *m_cat; }
+	CategoryType &category() const { return *m_cat; } ///< Category the iterators belong to

+	/** swap */
 	void swap(conditional_iterator_proxy &rhs);

  private:
@@ -574,6 +644,7 @@ class conditional_iterator_proxy

 // --------------------------------------------------------------------

+/** @cond */
 template <typename Category, typename... Ts>
 iterator_proxy<Category, Ts...>::iterator_proxy(Category &cat, row_iterator pos, char const *const columns[N])
 	: m_category(&cat)
@@ -607,6 +678,8 @@ conditional_iterator_proxy<Category, Ts...>::conditional_iterator_impl::conditio
 	, mEnd(cat.end(), cix)
 	, m_condition(&cond)
 {
+	if (m_condition == nullptr or m_condition->empty())
+		mBegin = mEnd;
 }

 template <typename Category, typename... Ts>
@@ -631,10 +704,15 @@ conditional_iterator_proxy<Category, Ts...>::conditional_iterator_proxy(Category
 {
 	static_assert(sizeof...(Ts) == sizeof...(Ns), "Number of column names should be equal to number of requested value types");

-	m_condition.prepare(cat);
+	if (m_condition)
+	{
+		m_condition.prepare(cat);

-	while (mCBegin != mCEnd and not m_condition(*mCBegin))
-		++mCBegin;
+		while (mCBegin != mCEnd and not m_condition(*mCBegin))
+			++mCBegin;
+	}
+	else
+		mCBegin == mCEnd;

 	uint16_t i = 0;
 	((mCix[i++] = m_cat->get_column_ix(names)), ...);
@@ -675,4 +753,6 @@ void conditional_iterator_proxy<Category, Ts...>::swap(conditional_iterator_prox
 	std::swap(mCix, rhs.mCix);
 }

+/** @endcond */
+
 } // namespace cif
--- a/include/cif++/matrix.hpp
+++ b/include/cif++/matrix.hpp
@@ -0,0 +1,689 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2023 NKI/AVL, Netherlands Cancer Institute
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include <array>
+#include <cassert>
+#include <cmath>
+#include <cstdint>
+#include <ostream>
+#include <tuple>
+#include <type_traits>
+#include <vector>
+
+/**
+ * @file matrix.hpp
+ * 
+ * Some basic matrix operations and classes to hold matrices.
+ * 
+ * We're using expression templates for optimal performance.
+ * 
+ */
+
+namespace cif
+{
+// --------------------------------------------------------------------
+// We're using expression templates here
+
+/**
+ * @brief Base for the matrix expression templates
+ * This all uses the Curiously recurring template pattern
+ * 
+ * @tparam M The type of the derived class
+ */
+template <typename M>
+class matrix_expression
+{
+  public:
+	constexpr size_t dim_m() const { return static_cast<const M &>(*this).dim_m(); } ///< Return the size (dimension) in direction m
+	constexpr size_t dim_n() const { return static_cast<const M &>(*this).dim_n(); } ///< Return the size (dimension) in direction n
+
+	constexpr bool empty() const { return dim_m() == 0 or dim_n() == 0; } ///< Convenient way to test for empty matrices
+
+	/** Return a reference to element [ @a i, @a j ] */
+	constexpr auto &operator()(size_t i, size_t j)
+	{
+		return static_cast<M &>(*this).operator()(i, j);
+	}
+
+	/** Return the value of element [ @a i, @a j ] */
+	constexpr auto operator()(size_t i, size_t j) const
+	{
+		return static_cast<const M &>(*this).operator()(i, j);
+	}
+
+	/** Swap the contents of rows @a r1 and @a r2 */
+	void swap_row(size_t r1, size_t r2)
+	{
+		for (size_t c = 0; c < dim_m(); ++c)
+		{
+			auto v = operator()(r1, c);
+			operator()(r1, c) = operator()(r2, c);
+			operator()(r2, c) = v;
+		}
+	}
+
+	/** Swap the contents of columns @a c1 and @a c2 */
+	void swap_col(size_t c1, size_t c2)
+	{
+		for (size_t r = 0; r < dim_n(); ++r)
+		{
+			auto &a = operator()(r, c1);
+			auto &b = operator()(r, c2);
+			std::swap(a, b);
+		}
+	}
+
+	/** write the matrix @a m to std::ostream @a os */
+	friend std::ostream &operator<<(std::ostream &os, const matrix_expression &m)
+	{
+		os << '[';
+
+		for (size_t i = 0; i < m.dim_m(); ++i)
+		{
+			os << '[';
+
+			for (size_t j = 0; j < m.dim_n(); ++j)
+			{
+				os << m(i, j);
+				if (j + 1 < m.dim_n())
+					os << ", ";
+			}
+
+			if (i + 1 < m.dim_m())
+				os << ", ";
+
+			os << ']';
+		}
+
+		os << ']';
+
+		return os;
+	}
+};
+
+// --------------------------------------------------------------------
+
+/**
+ * @brief Storage class implementation of matrix_expression.
+ * 
+ * @tparam F The type of the stored values
+ *  
+ * matrix is m x n, addressing i,j is 0 <= i < m and 0 <= j < n
+ * element m i,j is mapped to [i * n + j] and thus storage is row major
+ */
+
+template <typename F = float>
+class matrix : public matrix_expression<matrix<F>>
+{
+  public:
+	/** The value type */
+	using value_type = F;
+
+	/**
+	 * @brief Copy construct a new matrix object using @a m
+	 * 
+	 * @tparam M2 Type of @a m
+	 * @param m The matrix expression to copy values from
+	 */
+	template <typename M2>
+	matrix(const matrix_expression<M2> &m)
+		: m_m(m.dim_m())
+		, m_n(m.dim_n())
+		, m_data(m_m * m_n)
+	{
+		for (size_t i = 0; i < m_m; ++i)
+		{
+			for (size_t j = 0; j < m_n; ++j)
+				operator()(i, j) = m(i, j);
+		}
+	}
+
+	/**
+	 * @brief Construct a new matrix object with dimension @a m and @a n
+	 * setting the values to @a v
+	 * 
+	 * @param m Requested dimension M
+	 * @param n Requested dimension N
+	 * @param v Value to store in each element
+	 */
+	matrix(size_t m, size_t n, value_type v = 0)
+		: m_m(m)
+		, m_n(n)
+		, m_data(m_m * m_n)
+	{
+		std::fill(m_data.begin(), m_data.end(), v);
+	}
+
+	/** @cond */
+	matrix() = default;
+	matrix(matrix &&m) = default;
+	matrix(const matrix &m) = default;
+	matrix &operator=(matrix &&m) = default;
+	matrix &operator=(const matrix &m) = default;
+	/** @endcond */
+
+	constexpr size_t dim_m() const { return m_m; } ///< Return dimension m
+	constexpr size_t dim_n() const { return m_n; } ///< Return dimension n
+
+	/** Return the value of element [ @a i, @a j ] */
+	constexpr value_type operator()(size_t i, size_t j) const
+	{
+		assert(i < m_m);
+		assert(j < m_n);
+		return m_data[i * m_n + j];
+	}
+
+	/** Return a reference to element [ @a i, @a j ] */
+	constexpr value_type &operator()(size_t i, size_t j)
+	{
+		assert(i < m_m);
+		assert(j < m_n);
+		return m_data[i * m_n + j];
+	}
+
+  private:
+	size_t m_m = 0, m_n = 0;
+	std::vector<value_type> m_data;
+};
+
+// --------------------------------------------------------------------
+// special case, 3x3 matrix
+
+/**
+ * @brief Storage class implementation of matrix_expression
+ * with compile time fixed size.
+ * 
+ * @tparam F The type of the stored values
+ *  
+ * matrix is m x n, addressing i,j is 0 <= i < m and 0 <= j < n
+ * element m i,j is mapped to [i * n + j] and thus storage is row major
+ */
+
+template <typename F, size_t M, size_t N>
+class matrix_fixed : public matrix_expression<matrix_fixed<F, M, N>>
+{
+  public:
+	/** The value type */
+	using value_type = F;
+
+	/** The storage size */
+	static constexpr size_t kSize = M * N;
+
+	/** Copy constructor */
+	template <typename M2>
+	matrix_fixed(const M2 &m)
+	{
+		assert(M == m.dim_m() and N == m.dim_n());
+		for (size_t i = 0; i < M; ++i)
+		{
+			for (size_t j = 0; j < N; ++j)
+				operator()(i, j) = m(i, j);
+		}
+	}
+
+	/** default constructor */
+	matrix_fixed(value_type v = 0)
+	{
+		m_data.fill(v);
+	}
+
+	/** Alternate constructor taking an array of values to store */
+	matrix_fixed(const F (&v)[kSize])
+	{
+		fill(v, std::make_index_sequence<kSize>{});
+	}
+
+	/** @cond */
+	matrix_fixed(matrix_fixed &&m) = default;
+	matrix_fixed(const matrix_fixed &m) = default;
+	matrix_fixed &operator=(matrix_fixed &&m) = default;
+	matrix_fixed &operator=(const matrix_fixed &m) = default;
+	/** @endcond */
+
+	/** Store the values in @a a in the matrix */
+	template<size_t... Ixs>
+	matrix_fixed& fill(const F (&a)[kSize], std::index_sequence<Ixs...>)
+	{
+		m_data = { a[Ixs]... };
+		return *this;
+	}
+
+	constexpr size_t dim_m() const { return M; } ///< Return dimension m
+	constexpr size_t dim_n() const { return N; } ///< Return dimension n
+
+	/** Return the value of element [ @a i, @a j ] */
+	constexpr value_type operator()(size_t i, size_t j) const
+	{
+		assert(i < M);
+		assert(j < N);
+		return m_data[i * N + j];
+	}
+
+	/** Return a reference to element [ @a i, @a j ] */
+	constexpr value_type &operator()(size_t i, size_t j)
+	{
+		assert(i < M);
+		assert(j < N);
+		return m_data[i * N + j];
+	}
+
+  private:
+	std::array<value_type, M * N> m_data;
+};
+
+/** typedef of a fixed matrix of size 3x3 */
+template <typename F>
+using matrix3x3 = matrix_fixed<F, 3, 3>;
+
+/** typedef of a fixed matrix of size 4x4 */
+template <typename F>
+using matrix4x4 = matrix_fixed<F, 4, 4>;
+
+// --------------------------------------------------------------------
+
+/**
+ * @brief Storage class implementation of symmetric matrix_expression
+ * 
+ * @tparam F The type of the stored values
+ *  
+ * matrix is m x n, addressing i,j is 0 <= i < m and 0 <= j < n
+ * element m i,j is mapped to [i * n + j] and thus storage is row major
+ */
+template <typename F = float>
+class symmetric_matrix : public matrix_expression<symmetric_matrix<F>>
+{
+  public:
+	/** The value type */
+	using value_type = F;
+
+	/** constructor for a matrix of size @a n x @a n elements with value @a v */
+	symmetric_matrix(size_t n, value_type v = 0)
+		: m_n(n)
+		, m_data((m_n * (m_n + 1)) / 2)
+	{
+		std::fill(m_data.begin(), m_data.end(), v);
+	}
+
+	/** @cond */
+	symmetric_matrix() = default;
+	symmetric_matrix(symmetric_matrix &&m) = default;
+	symmetric_matrix(const symmetric_matrix &m) = default;
+	symmetric_matrix &operator=(symmetric_matrix &&m) = default;
+	symmetric_matrix &operator=(const symmetric_matrix &m) = default;
+	/** @endcond */
+
+	constexpr size_t dim_m() const { return m_n; } ///< Return dimension m
+	constexpr size_t dim_n() const { return m_n; } ///< Return dimension n
+
+	/** Return the value of element [ @a i, @a j ] */
+	constexpr value_type operator()(size_t i, size_t j) const
+	{
+		return i < j
+		           ? m_data[(j * (j + 1)) / 2 + i]
+		           : m_data[(i * (i + 1)) / 2 + j];
+	}
+
+	/** Return a reference to element [ @a i, @a j ] */
+	constexpr value_type &operator()(size_t i, size_t j)
+	{
+		if (i > j)
+			std::swap(i, j);
+		assert(j < m_n);
+		return m_data[(j * (j + 1)) / 2 + i];
+	}
+
+  private:
+	size_t m_n;
+	std::vector<value_type> m_data;
+};
+
+// --------------------------------------------------------------------
+
+/**
+ * @brief Storage class implementation of symmetric matrix_expression
+ * with compile time fixed size.
+ * 
+ * @tparam F The type of the stored values
+ *  
+ * matrix is m x n, addressing i,j is 0 <= i < m and 0 <= j < n
+ * element m i,j is mapped to [i * n + j] and thus storage is row major
+ */
+template <typename F, size_t M>
+class symmetric_matrix_fixed : public matrix_expression<symmetric_matrix_fixed<F, M>>
+{
+  public:
+	/** The value type */
+	using value_type = F;
+
+	/** constructor with all elements set to value @a v */
+	symmetric_matrix_fixed(value_type v = 0)
+	{
+		std::fill(m_data.begin(), m_data.end(), v);
+	}
+
+	/** @cond */
+	symmetric_matrix_fixed(symmetric_matrix_fixed &&m) = default;
+	symmetric_matrix_fixed(const symmetric_matrix_fixed &m) = default;
+	symmetric_matrix_fixed &operator=(symmetric_matrix_fixed &&m) = default;
+	symmetric_matrix_fixed &operator=(const symmetric_matrix_fixed &m) = default;
+	/** @endcond */
+
+	constexpr size_t dim_m() const { return M; } ///< Return dimension m
+	constexpr size_t dim_n() const { return M; } ///< Return dimension n
+
+	/** Return the value of element [ @a i, @a j ] */
+	constexpr value_type operator()(size_t i, size_t j) const
+	{
+		return i < j
+		           ? m_data[(j * (j + 1)) / 2 + i]
+		           : m_data[(i * (i + 1)) / 2 + j];
+	}
+
+	/** Return a reference to element [ @a i, @a j ] */
+	constexpr value_type &operator()(size_t i, size_t j)
+	{
+		if (i > j)
+			std::swap(i, j);
+		assert(j < M);
+		return m_data[(j * (j + 1)) / 2 + i];
+	}
+
+  private:
+	std::array<value_type, (M * (M + 1)) / 2> m_data;
+};
+
+/** typedef of a fixed symmetric matrix of size 3x3 */
+template <typename F>
+using symmetric_matrix3x3 = symmetric_matrix_fixed<F, 3>;
+
+/** typedef of a fixed symmetric matrix of size 4x4 */
+template <typename F>
+using symmetric_matrix4x4 = symmetric_matrix_fixed<F, 4>;
+
+// --------------------------------------------------------------------
+
+/**
+ * @brief implementation of symmetric matrix_expression with a value
+ * of 1 for the diagonal values and 0 for all the others.
+ *  
+ * @tparam F The type of the stored values
+ *  
+ * matrix is m x n, addressing i,j is 0 <= i < m and 0 <= j < n
+ * element m i,j is mapped to [i * n + j] and thus storage is row major
+ */
+template <typename F = float>
+class identity_matrix : public matrix_expression<identity_matrix<F>>
+{
+  public:
+	/** the value type */
+	using value_type = F;
+
+	/** constructor taking a dimension @a n */
+	identity_matrix(size_t n)
+		: m_n(n)
+	{
+	}
+
+	constexpr size_t dim_m() const { return m_n; } ///< Return dimension m
+	constexpr size_t dim_n() const { return m_n; } ///< Return dimension n
+
+	/** Return the value of element [ @a i, @a j ] */
+	constexpr value_type operator()(size_t i, size_t j) const
+	{
+		return static_cast<value_type>(i == j ? 1 : 0);
+	}
+
+  private:
+	size_t m_n;
+};
+
+// --------------------------------------------------------------------
+// matrix functions, implemented as expression templates
+
+/**
+ * @brief Implementation of a substraction operation as a matrix expression
+ * 
+ * @tparam M1 Type of matrix 1
+ * @tparam M2 Type of matrix 2
+ */
+template <typename M1, typename M2>
+class matrix_subtraction : public matrix_expression<matrix_subtraction<M1, M2>>
+{
+  public:
+	/** constructor */
+	matrix_subtraction(const M1 &m1, const M2 &m2)
+		: m_m1(m1)
+		, m_m2(m2)
+	{
+		assert(m_m1.dim_m() == m_m2.dim_m());
+		assert(m_m1.dim_n() == m_m2.dim_n());
+	}
+
+	constexpr size_t dim_m() const { return m_m1.dim_m(); } ///< Return dimension m
+	constexpr size_t dim_n() const { return m_m1.dim_n(); } ///< Return dimension n
+
+	/** Access to the value of element [ @a i, @a j ] */
+	constexpr auto operator()(size_t i, size_t j) const
+	{
+		return m_m1(i, j) - m_m2(i, j);
+	}
+
+  private:
+	const M1 &m_m1;
+	const M2 &m_m2;
+};
+
+/** operator to subtract two matrices and return a matrix expression */
+template <typename M1, typename M2>
+auto operator-(const matrix_expression<M1> &m1, const matrix_expression<M2> &m2)
+{
+	return matrix_subtraction(m1, m2);
+}
+
+/**
+ * @brief Implementation of a multiplication operation as a matrix expression
+ * 
+ * @tparam M1 Type of matrix 1
+ * @tparam M2 Type of matrix 2
+ */
+template <typename M1, typename M2>
+class matrix_matrix_multiplication : public matrix_expression<matrix_matrix_multiplication<M1, M2>>
+{
+  public:
+	/** constructor */
+	matrix_matrix_multiplication(const M1 &m1, const M2 &m2)
+		: m_m1(m1)
+		, m_m2(m2)
+	{
+		assert(m1.dim_m() == m2.dim_n());
+	}
+
+	constexpr size_t dim_m() const { return m_m1.dim_m(); } ///< Return dimension m
+	constexpr size_t dim_n() const { return m_m1.dim_n(); } ///< Return dimension n
+
+	/** Access to the value of element [ @a i, @a j ] */
+	constexpr auto operator()(size_t i, size_t j) const
+	{
+		using value_type = decltype(m_m1(0, 0));
+
+		value_type result = {};
+
+		for (size_t k = 0; k < m_m1.dim_m(); ++k)
+			result += m_m1(i, k) * m_m2(k, j);
+
+		return result;
+	}
+
+  private:
+	const M1 &m_m1;
+	const M2 &m_m2;
+};
+
+/**
+ * @brief Implementation of a multiplication operation of a matrix and a scalar value as a matrix expression
+ * 
+ * @tparam M1 Type of matrix
+ * @tparam M2 Type of scalar value
+ */
+template <typename M, typename T>
+class matrix_scalar_multiplication : public matrix_expression<matrix_scalar_multiplication<M, T>>
+{
+  public:
+	/** value type */
+	using value_type = T;
+
+	/** constructor */
+	matrix_scalar_multiplication(const M &m, value_type v)
+		: m_m(m)
+		, m_v(v)
+	{
+	}
+
+	constexpr size_t dim_m() const { return m_m.dim_m(); } ///< Return dimension m
+	constexpr size_t dim_n() const { return m_m.dim_n(); } ///< Return dimension n
+
+	/** Access to the value of element [ @a i, @a j ] */
+	constexpr auto operator()(size_t i, size_t j) const
+	{
+		return m_m(i, j) * m_v;
+	}
+
+  private:
+	const M &m_m;
+	value_type m_v;
+};
+
+/** First implementation of operator*, enabled if the second parameter is a scalar */
+template <typename M1, typename T, std::enable_if_t<std::is_floating_point_v<T>, int> = 0>
+auto operator*(const matrix_expression<M1> &m, T v)
+{
+	return matrix_scalar_multiplication(m, v);
+}
+
+/** First implementation of operator*, enabled if the second parameter is not a scalar and thus must be a matrix, right? */
+template <typename M1, typename M2, std::enable_if_t<not std::is_floating_point_v<M2>, int> = 0>
+auto operator*(const matrix_expression<M1> &m1, const matrix_expression<M2> &m2)
+{
+	return matrix_matrix_multiplication(m1, m2);
+}
+
+// --------------------------------------------------------------------
+
+/** Generic routine to calculate the determinant of a matrix
+ * 
+ * @note This is currently only implemented for fixed matrices of size 3x3
+ */
+template <typename M>
+auto determinant(const M &m);
+
+/** Implementation of the determinant function for fixed size matrices of size 3x3 */
+template <typename F = float>
+auto determinant(const matrix3x3<F> &m)
+{
+	return (m(0, 0) * (m(1, 1) * m(2, 2) - m(1, 2) * m(2, 1)) +
+			m(0, 1) * (m(1, 2) * m(2, 0) - m(1, 0) * m(2, 2)) +
+			m(0, 2) * (m(1, 0) * m(2, 1) - m(1, 1) * m(2, 0)));
+}
+
+/** Generic routine to calculate the inverse of a matrix
+ * 
+ * @note This is currently only implemented for fixed matrices of size 3x3
+ */
+template <typename M>
+M inverse(const M &m);
+
+/** Implementation of the inverse function for fixed size matrices of size 3x3 */
+template <typename F = float>
+matrix3x3<F> inverse(const matrix3x3<F> &m)
+{
+	F det = determinant(m);
+
+	matrix3x3<F> result;
+
+	result(0, 0) = (m(1, 1) * m(2, 2) - m(1, 2) * m(2, 1)) / det;
+	result(1, 0) = (m(1, 2) * m(2, 0) - m(1, 0) * m(2, 2)) / det;
+	result(2, 0) = (m(1, 0) * m(2, 1) - m(1, 1) * m(2, 0)) / det;
+	result(0, 1) = (m(2, 1) * m(0, 2) - m(2, 2) * m(0, 1)) / det;
+	result(1, 1) = (m(2, 2) * m(0, 0) - m(2, 0) * m(0, 2)) / det;
+	result(2, 1) = (m(2, 0) * m(0, 1) - m(2, 1) * m(0, 0)) / det;
+	result(0, 2) = (m(0, 1) * m(1, 2) - m(0, 2) * m(1, 1)) / det;
+	result(1, 2) = (m(0, 2) * m(1, 0) - m(0, 0) * m(1, 2)) / det;
+	result(2, 2) = (m(0, 0) * m(1, 1) - m(0, 1) * m(1, 0)) / det;
+
+	return result;
+}
+
+// --------------------------------------------------------------------
+
+/**
+ * @brief Implementation of a cofactor calculation as a matrix expression
+ * 
+ * @tparam M Type of matrix
+ */
+template <typename M>
+class matrix_cofactors : public matrix_expression<matrix_cofactors<M>>
+{
+  public:
+	/** constructor */
+	matrix_cofactors(const M &m)
+		: m_m(m)
+	{
+	}
+
+	constexpr size_t dim_m() const { return m_m.dim_m(); } ///< Return dimension m
+	constexpr size_t dim_n() const { return m_m.dim_n(); } ///< Return dimension n
+
+	/** Access to the value of element [ @a i, @a j ] */
+	constexpr auto operator()(size_t i, size_t j) const
+	{
+		const size_t ixs[4][3] = {
+			{ 1, 2, 3 },
+			{ 0, 2, 3 },
+			{ 0, 1, 3 },
+			{ 0, 1, 2 }
+		};
+
+		const size_t *ix = ixs[i];
+		const size_t *iy = ixs[j];
+
+		auto result =
+			m_m(ix[0], iy[0]) * m_m(ix[1], iy[1]) * m_m(ix[2], iy[2]) +
+			m_m(ix[0], iy[1]) * m_m(ix[1], iy[2]) * m_m(ix[2], iy[0]) +
+			m_m(ix[0], iy[2]) * m_m(ix[1], iy[0]) * m_m(ix[2], iy[1]) -
+			m_m(ix[0], iy[2]) * m_m(ix[1], iy[1]) * m_m(ix[2], iy[0]) -
+			m_m(ix[0], iy[1]) * m_m(ix[1], iy[0]) * m_m(ix[2], iy[2]) -
+			m_m(ix[0], iy[0]) * m_m(ix[1], iy[2]) * m_m(ix[2], iy[1]);
+
+		return (i + j) % 2 == 1 ? -result : result;
+	}
+
+  private:
+	const M &m_m;
+};
+
+} // namespace cif
--- a/include/cif++/model.hpp
+++ b/include/cif++/model.hpp
--- a/include/cif++/parser.hpp
+++ b/include/cif++/parser.hpp
@@ -26,19 +26,26 @@

 #pragma once

-#include <cif++/row.hpp>
+#include "cif++/row.hpp"

 #include <map>
-#include <regex>
+
+/**
+ * @file parser.hpp
+ * 
+ * This file contains the declaration of an mmCIF parser
+ */

 namespace cif
 {

 // --------------------------------------------------------------------

+/** Exception that is thrown when the mmCIF file contains a parsing error */
 class parse_error : public std::runtime_error
 {
  public:
+	/// \brief constructor
 	parse_error(uint32_t line_nr, const std::string &message)
 		: std::runtime_error("parse error at line " + std::to_string(line_nr) + ": " + message)
 	{
@@ -47,73 +54,83 @@ class parse_error : public std::runtime_error

 // --------------------------------------------------------------------

+/**
+ * @brief The sac_parser is a similar to SAX parsers (Simple API for XML, 
+ * in our case it is Simple API for CIF)
+ * 
+ * This is a hand crafted, optimised parser for reading cif files,
+ * both cif 1.0 and cif 1.1 is supported. But version 2.0 is not.
+ * That means that the content of files strictly contains only
+ * ASCII characters. Anything else will generate an error.
+ * 
+ * This class is an abstract base class. Derived classes should
+ * implement the produce_ methods.
+ */
+
 // TODO: Need to implement support for transformed long lines

 class sac_parser
 {
  public:
+	/** @cond */
 	using datablock_index = std::map<std::string, std::size_t>;

-	sac_parser(std::istream &is, bool init = true);
-
 	virtual ~sac_parser() = default;
+	/** @endcond */

+	/// \brief The parser only supports ASCII so we can
+	/// create a table with character properties.
 	enum CharTraitsMask : uint8_t
 	{
-		kOrdinaryMask = 1 << 0,
-		kNonBlankMask = 1 << 1,
-		kTextLeadMask = 1 << 2,
-		kAnyPrintMask = 1 << 3
+		kOrdinaryMask = 1 << 0,	///< The character is in the Ordinary class
+		kNonBlankMask = 1 << 1,	///< The character is in the NonBlank class
+		kTextLeadMask = 1 << 2,	///< The character is in the TextLead class
+		kAnyPrintMask = 1 << 3	///< The character is in the AnyPrint class
 	};

-	static bool is_white(int ch)
+	/// \brief Return true if the character @a ch is a *space* character
+	static constexpr bool is_space(int ch)
 	{
-		return std::isspace(ch) or ch == '#';
+		return ch == ' ' or ch == '\t' or ch == '\r' or ch == '\n';
 	}

+	/// \brief Return true if the character @a ch is a *white* character
+	static constexpr bool is_white(int ch)
+	{
+		return is_space(ch) or ch == '#';
+	}
+
+	/// \brief Return true if the character @a ch is a *ordinary* character
 	static constexpr bool is_ordinary(int ch)
 	{
 		return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kOrdinaryMask) != 0;
 	}

+	/// \brief Return true if the character @a ch is a *non_blank* character
 	static constexpr bool is_non_blank(int ch)
 	{
 		return ch > 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kNonBlankMask) != 0;
 	}

+	/// \brief Return true if the character @a ch is a *text_lead* character
 	static constexpr bool is_text_lead(int ch)
 	{
 		return ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kTextLeadMask) != 0;
 	}

+	/// \brief Return true if the character @a ch is a *any_print* character
 	static constexpr bool is_any_print(int ch)
 	{
 		return ch == '\t' or
 		       (ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
 	}

-	static bool is_unquoted_string(std::string_view text)
-	{
-		bool result = text.empty() or is_ordinary(text.front());
-
-		if (result)
-		{
-			for (auto ch : text)
-			{
-				if (is_non_blank(ch))
-					continue;
-				result = false;
-				break;
-			}
-		}
-
-		static const std::regex kReservedRx(R"(loop_|stop_|global_|data_\S+|save_\S+)", std::regex_constants::icase);
-
-		// but be careful it does not contain e.g. stop_
-		return result and not std::regex_match(text.begin(), text.end(), kReservedRx);
-	}
+	/// \brief Return true if the string in @a text can safely be written without quotation
+	static bool is_unquoted_string(std::string_view text);

  protected:
+	/** @cond */
+
 	static constexpr uint8_t kCharTraitsTable[128] = {
 		//	0	1	2	3	4	5	6	7	8	9	a	b	c	d	e	f
 		14, 15, 14, 14, 14, 15, 15, 14, 15, 15, 15, 15, 15, 15, 15, 15, //	2
@@ -133,7 +150,8 @@ class sac_parser
 		DATA,
 		LOOP,
 		GLOBAL,
-		SAVE,
+		SAVE_,
+		SAVE_NAME,
 		STOP,
 		Tag,
 		Value
@@ -148,7 +166,8 @@ class sac_parser
 			case CIFToken::DATA: return "DATA";
 			case CIFToken::LOOP: return "LOOP";
 			case CIFToken::GLOBAL: return "GLOBAL";
-			case CIFToken::SAVE: return "SAVE";
+			case CIFToken::SAVE_: return "SAVE";
+			case CIFToken::SAVE_NAME: return "SAVE+name";
 			case CIFToken::STOP: return "STOP";
 			case CIFToken::Tag: return "Tag";
 			case CIFToken::Value: return "Value";
@@ -156,55 +175,58 @@ class sac_parser
 		}
 	}

-	enum class CIFValue
-	{
-		Int,
-		Float,
-		Numeric,
-		String,
-		TextField,
-		Inapplicable,
-		Unknown
-	};
-
-	static constexpr const char *get_value_name(CIFValue type)
-	{
-		switch (type)
-		{
-			case CIFValue::Int: return "Int";
-			case CIFValue::Float: return "Float";
-			case CIFValue::Numeric: return "Numeric";
-			case CIFValue::String: return "String";
-			case CIFValue::TextField: return "TextField";
-			case CIFValue::Inapplicable: return "Inapplicable";
-			case CIFValue::Unknown: return "Unknown";
-			default: return "Invalid type parameter";
-		}
-	}
-
-	// get_next_char takes a char from the buffer, or if it is empty
-	// from the istream. This function also does carriage/linefeed
-	// translation.
+	// get_next_char takes the next character from the istream.
+	// This function also does carriage/linefeed translation.
 	int get_next_char();

+	// Put the last read character back into the istream
 	void retract();

-	int restart(int start);
-
 	CIFToken get_next_token();

 	void match(CIFToken token);

+	/** @endcond */
+
  public:
+
+	/** \brief Parse only a single datablock in the string @a datablock
+	 * The start of the datablock is first located and then data
+	 * is parsed up until the next start of a datablock or the end of
+	 * the data.
+	 * */
 	bool parse_single_datablock(const std::string &datablock);

+	/** \brief Return an index for all the datablocks found, that is
+	 * the index will contain the names and offsets for each.
+	 */
 	datablock_index index_datablocks();

+	/**
+	 * @brief Parse the datablock named @a datablock
+	 * 
+	 * This will first lookup the datablock's offset in the index @a index
+	 * and then start parsing from that location until the next datablock.
+	 * 
+	 * @param datablock Name of the datablock to parse
+	 * @param index The index created using index_datablocks
+	 * @return true If the datablock was found
+	 * @return false If the datablock was not found
+	 */
 	bool parse_single_datablock(const std::string &datablock, const datablock_index &index);

+	/**
+	 * @brief Parse the file
+	 * 
+	 */
 	void parse_file();

  protected:
+
+	/** @cond */
+
+	sac_parser(std::istream &is, bool init = true);
+
 	void parse_global();

 	void parse_datablock();
@@ -214,7 +236,7 @@ class sac_parser
 	void error(const std::string &msg)
 	{
 		if (cif::VERBOSE > 0)
-			std::cerr << "Error parsing mmCIF: " << msg << std::endl;
+			std::cerr << "Error parsing mmCIF: " << msg << '\n';

 		throw parse_error(m_line_nr, msg);
 	}
@@ -222,18 +244,19 @@ class sac_parser
 	void warning(const std::string &msg)
 	{
 		if (cif::VERBOSE > 0)
-			std::cerr << "parser warning at line " << m_line_nr << ": " << msg << std::endl;
+			std::cerr << "parser warning at line " << m_line_nr << ": " << msg << '\n';
 	}

 	// production methods, these are pure virtual here

-	virtual void produce_datablock(const std::string &name) = 0;
-	virtual void produce_category(const std::string &name) = 0;
+	virtual void produce_datablock(std::string_view name) = 0;
+	virtual void produce_category(std::string_view name) = 0;
 	virtual void produce_row() = 0;
-	virtual void produce_item(const std::string &category, const std::string &item, const std::string &value) = 0;
+	virtual void produce_item(std::string_view category, std::string_view item, std::string_view value) = 0;

  protected:
-	enum State
+
+	enum class State
 	{
 		Start,
 		White,
@@ -246,49 +269,59 @@ class sac_parser
 		UnquotedString,
 		Tag,
 		TextField,
-		Float = 100,
-		Int = 110,
-		Value = 300,
-		DATA,
-		SAVE
+		TextFieldNL,
+		Reserved,
+		Value
 	};

 	std::streambuf &m_source;

 	// Parser state
-	bool m_validate;
 	uint32_t m_line_nr;
 	bool m_bol;
 	CIFToken m_lookahead;
-	std::string m_token_value;
-	CIFValue mTokenType;
-	std::vector<int> m_buffer;	// retract buffer, used to be a stack<char>
+
+	// token buffer
+	std::vector<char> m_token_buffer;
+	std::string_view m_token_value;
+
+	/** @endcond */
 };

 // --------------------------------------------------------------------

+/**
+ * @brief An actual implementation of a sac_parser generating data in a file
+ * 
+ * This parser will create the cif::file, cif::datablock and cif::category
+ * objects required to contain all data
+ */
 class parser : public sac_parser
 {
  public:
+	/// \brief constructor, generates data into @a file from @a is
 	parser(std::istream &is, file &file)
 		: sac_parser(is)
 		, m_file(file)
 	{
 	}

-	void produce_datablock(const std::string &name) override;
+	/** @cond */
+	void produce_datablock(std::string_view name) override;

-	void produce_category(const std::string &name) override;
+	void produce_category(std::string_view name) override;

 	void produce_row() override;

-	void produce_item(const std::string &category, const std::string &item, const std::string &value) override;
+	void produce_item(std::string_view category, std::string_view item, std::string_view value) override;

  protected:
 	file &m_file;
 	datablock *m_datablock = nullptr;
 	category *m_category = nullptr;
 	row_handle m_row;
+
+	/** @endcond */
 };

 } // namespace cif
--- a/include/cif++/pdb.hpp
+++ b/include/cif++/pdb.hpp
@@ -0,0 +1,147 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ * 
+ * Copyright (c) 2023 NKI/AVL, Netherlands Cancer Institute
+ * 
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ * 
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ * 
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include "cif++/file.hpp"
+
+/**
+ * @file pdb.hpp
+ * 
+ * This file presents the API to read and write files in the
+ * legacy and ancient PDB format.
+ * 
+ * The code works on the basis of best effort since it is
+ * impossible to have correct round trip fidelity.
+ * 
+ */
+
+namespace cif::pdb
+{
+
+/// --------------------------------------------------------------------
+// PDB to mmCIF
+
+/** @brief Read a file in either mmCIF or PDB format from file @a file,
+ * compressed or not, depending on the content.
+ */
+
+file read(const std::filesystem::path &file);
+
+/** @brief Read a file in either mmCIF or PDB format from std::istream @a is,
+ * compressed or not, depending on the content.
+ */
+
+file read(std::istream &is);
+
+/**
+ * @brief Read a file in legacy PDB format from std::istream @a is and
+ * put the data into @a cifFile
+ */
+file read_pdb_file(std::istream &pdbFile);
+
+// mmCIF to PDB
+
+/** @brief Write out the data in @a db in legacy PDB format
+ * to std::ostream @a os
+ */
+void write(std::ostream &os, const datablock &db);
+
+/** @brief Write out the data in @a f in legacy PDB format
+ * to std::ostream @a os
+ */
+inline void write(std::ostream &os, const file &f)
+{
+	write(os, f.front());
+}
+
+/** @brief Write out the data in @a db to file @a file
+ * in legacy PDB format or mmCIF format, depending on the
+ * filename extension.
+ * 
+ * If extension of @a file is *.gz* the resulting file will
+ * be written in gzip compressed format.
+ */
+void write(const std::filesystem::path &file, const datablock &db);
+
+/** @brief Write out the data in @a f to file @a file
+ * in legacy PDB format or mmCIF format, depending on the
+ * filename extension.
+ * 
+ * If extension of @a file is *.gz* the resulting file will
+ * be written in gzip compressed format.
+ */
+inline void write(const std::filesystem::path &p, const file &f)
+{
+	write(p, f.front());
+}
+
+// --------------------------------------------------------------------
+// Other I/O related routines
+
+/** @brief Return the HEADER line for the data in @a data
+ *
+ * The line returned should be compatible with the legacy PDB
+ * format and is e.g. used in the DSSP program.
+ * 
+ * @param data The datablock to use as source for the requested data
+ * @param truncate_at The maximum length of the line returned
+ */
+
+std::string get_HEADER_line(const datablock &data, std::string::size_type truncate_at = 127);
+/** @brief Return the COMPND line for the data in @a data
+ *
+ * The line returned should be compatible with the legacy PDB
+ * format and is e.g. used in the DSSP program.
+ * 
+ * @param data The datablock to use as source for the requested data
+ * @param truncate_at The maximum length of the line returned
+ */
+
+std::string get_COMPND_line(const datablock &data, std::string::size_type truncate_at = 127);
+/** @brief Return the SOURCE line for the data in @a data
+ *
+ * The line returned should be compatible with the legacy PDB
+ * format and is e.g. used in the DSSP program.
+ * 
+ * @param data The datablock to use as source for the requested data
+ * @param truncate_at The maximum length of the line returned
+ */
+
+std::string get_SOURCE_line(const datablock &data, std::string::size_type truncate_at = 127);
+/** @brief Return the AUTHOR line for the data in @a data
+ *
+ * The line returned should be compatible with the legacy PDB
+ * format and is e.g. used in the DSSP program.
+ * 
+ * @param data The datablock to use as source for the requested data
+ * @param truncate_at The maximum length of the line returned
+ */
+
+std::string get_AUTHOR_line(const datablock &data, std::string::size_type truncate_at = 127);
+
+} // namespace pdbx
+
--- a/include/cif++/pdb/cif2pdb.hpp
+++ b/include/cif++/pdb/cif2pdb.hpp
@@ -26,17 +26,8 @@

 #pragma once

-#include <cif++.hpp>
+/// \file cif2pdb.hpp
+/// \deprecated This file is no longer used. Please use "cif++/pdb.hpp" instead

-namespace cif::pdb
-{
+#warning "Use of this file is deprecated, please use "cif++/pdb.hpp"

-/// \brief Just the HEADER, COMPND, SOURCE and AUTHOR lines
-void write_header_lines(std::ostream &os, const datablock &data);
-
-std::string get_HEADER_line(const datablock &data, std::string::size_type truncate_at = 127);
-std::string get_COMPND_line(const datablock &data, std::string::size_type truncate_at = 127);
-std::string get_SOURCE_line(const datablock &data, std::string::size_type truncate_at = 127);
-std::string get_AUTHOR_line(const datablock &data, std::string::size_type truncate_at = 127);
-
-} // namespace pdbx
--- a/include/cif++/pdb/io.hpp
+++ b/include/cif++/pdb/io.hpp
@@ -26,35 +26,7 @@

 #pragma once

-#include <cif++.hpp>
+/// \file io.hpp
+/// \deprecated This file is no longer used. Please use "cif++/pdb.hpp" instead

-namespace cif::pdb
-{
-
-/// \brief Read a file in either mmCIF or PDB format, compressed or not,
-/// depending on the content.
-file read(const std::filesystem::path &file);
-
-/// \brief Read a file in either mmCIF or PDB format, compressed or not,
-/// depending on the content.
-file read(std::istream &is);
-
-/// \brief Write out a file in PDB format
-void write(std::ostream &os, const datablock &db);
-
-/// \brief Write out a file in PDB format
-inline void write(std::ostream &os, const file &f)
-{
-	write(os, f.front());
-}
-
-/// \brief Write out a file in PDB format or mmCIF format, depending on the filename extension
-void write(const std::filesystem::path &file, const datablock &db);
-
-/// \brief Write out a file in PDB format or mmCIF format, depending on the filename extension
-inline void write(const std::filesystem::path &p, const file &f)
-{
-	write(p, f.front());
-}
-
-}
+#warning "Use of this file is deprecated, please use "cif++/pdb.hpp"
--- a/include/cif++/pdb/pdb2cif.hpp
+++ b/include/cif++/pdb/pdb2cif.hpp
@@ -26,40 +26,7 @@

 #pragma once

-#include <cif++.hpp>
+/// \file pdb2cif.hpp
+/// \deprecated This file is no longer used. Please use "cif++/pdb.hpp" instead

-namespace cif::pdb
-{
-
-// --------------------------------------------------------------------
-
-struct PDBRecord
-{
-	PDBRecord *mNext;
-	uint32_t mLineNr;
-	char mName[11];
-	size_t mVlen;
-	char mValue[1];
-
-	PDBRecord(uint32_t lineNr, const std::string &name, const std::string &value);
-	~PDBRecord();
-
-	void *operator new(size_t);
-	void *operator new(size_t size, size_t vLen);
-
-	void operator delete(void *p);
-	void operator delete(void *p, size_t vLen);
-
-	bool is(const char *name) const;
-
-	char vC(size_t column);
-	std::string vS(size_t columnFirst, size_t columnLast = std::numeric_limits<size_t>::max());
-	int vI(int columnFirst, int columnLast);
-	std::string vF(size_t columnFirst, size_t columnLast);
-};
-
-// --------------------------------------------------------------------
-
-void ReadPDBFile(std::istream &pdbFile, file &cifFile);
-
-} // namespace pdbx
+#warning "Use of this file is deprecated, please use "cif++/pdb.hpp"
--- a/include/cif++/pdb/tls.hpp
+++ b/include/cif++/pdb/tls.hpp
@@ -26,26 +26,7 @@

 #pragma once

-#include <cif++.hpp>
+/// \file tls.hpp
+/// \deprecated This code has been moved to libpdb-redo

-#include <string>
-#include <tuple>
-#include <vector>
-
-namespace cif
-{
-
-struct tls_selection;
-struct tls_residue;
-
-struct tls_selection
-{
-	virtual ~tls_selection() {}
-	virtual void collect_residues(cif::datablock &db, std::vector<tls_residue> &residues, std::size_t indentLevel = 0) const = 0;
-	std::vector<std::tuple<std::string, int, int>> get_ranges(cif::datablock &db, bool pdbNamespace) const;
-};
-
-// Low level: get the selections
-std::unique_ptr<tls_selection> parse_tls_selection_details(const std::string &program, const std::string &selection);
-
-} // namespace cif
+#warning "This code has been moved to libpdb-redo"
--- a/include/cif++/point.hpp
+++ b/include/cif++/point.hpp
@@ -26,11 +26,10 @@

 #pragma once

-#include <cif++/exports.hpp>
-
 #include <array>
 #include <cmath>
 #include <complex>
+#include <cstdint>
 #include <functional>
 #include <valarray>

@@ -39,24 +38,49 @@
 #include <clipper/core/coords.h>
 #endif

+/** \file point.hpp
+ *
+ * This file contains the definition for *cif::point* as well as
+ * lots of routines and classes that can manipulate points.
+ */
+
 namespace cif
 {

 // --------------------------------------------------------------------

+/// \brief Our value for Pi
 const double
 	kPI = 3.141592653589793238462643383279502884;

 // --------------------------------------------------------------------
-// A stripped down quaternion implementation, based on boost::math::quaternion
-// We use quaternions to do rotations in 3d space
+/**
+ * @brief A stripped down quaternion implementation, based on boost::math::quaternion
+ *
+ * We use quaternions to do rotations in 3d space. Quaternions are faster than
+ * matrix calculations and they also suffer less from drift caused by rounding
+ * errors.
+ *
+ * Like complex number, quaternions do have a meaningful notion of "real part",
+ * but unlike them there is no meaningful notion of "imaginary part".
+ * Instead there is an "unreal part" which itself is a quaternion, and usually
+ * nothing simpler (as opposed to the complex number case).
+ * However, for practicality, there are accessors for the other components
+ * (these are necessary for the templated copy constructor, for instance).
+ *
+ * @note Quaternion multiplication is *NOT* commutative;
+ * symbolically, "q *= rhs;" means "q = q * rhs;"
+ * and "q /= rhs;" means "q = q * inverse_of(rhs);"
+ */

 template <typename T>
 class quaternion_type
 {
  public:
+	/// \brief the value type of the elements, usually this is float
 	using value_type = T;

+	/// \brief constructor with the four members
 	constexpr explicit quaternion_type(value_type const &value_a = {}, value_type const &value_b = {}, value_type const &value_c = {}, value_type const &value_d = {})
 		: a(value_a)
 		, b(value_b)
@@ -65,6 +89,7 @@ class quaternion_type
 	{
 	}

+	/// \brief constructor taking two complex values as input
 	constexpr explicit quaternion_type(std::complex<value_type> const &z0, std::complex<value_type> const &z1 = std::complex<value_type>())
 		: a(z0.real())
 		, b(z0.imag())
@@ -73,9 +98,10 @@ class quaternion_type
 	{
 	}

-	constexpr quaternion_type(quaternion_type const &) = default;
-	constexpr quaternion_type(quaternion_type &&) = default;
+	constexpr quaternion_type(quaternion_type const &) = default; ///< Copy constructor
+	constexpr quaternion_type(quaternion_type &&) = default;      ///< Copy constructor

+	/// \brief Copy constructor accepting a quaternion with a different value_type
 	template <typename X>
 	constexpr explicit quaternion_type(quaternion_type<X> const &rhs)
 		: a(static_cast<value_type>(rhs.a))
@@ -86,24 +112,20 @@ class quaternion_type
 	}

 	// accessors
-	//
-	// Note:    Like complex number, quaternions do have a meaningful notion of "real part",
-	//            but unlike them there is no meaningful notion of "imaginary part".
-	//            Instead there is an "unreal part" which itself is a quaternion, and usually
-	//            nothing simpler (as opposed to the complex number case).
-	//            However, for practicality, there are accessors for the other components
-	//            (these are necessary for the templated copy constructor, for instance).

+	/// \brief See class description, return the *real* part of the quaternion
 	constexpr value_type real() const
 	{
 		return a;
 	}

+	/// \brief See class description, return the *unreal* part of the quaternion
 	constexpr quaternion_type unreal() const
 	{
 		return { 0, b, c, d };
 	}

+	/// \brief swap
 	constexpr void swap(quaternion_type &o)
 	{
 		std::swap(a, o.a);
@@ -114,6 +136,7 @@ class quaternion_type

 	// assignment operators

+	/// \brief Assignment operator accepting a quaternion with optionally another value_type
 	template <typename X>
 	constexpr quaternion_type &operator=(quaternion_type<X> const &rhs)
 	{
@@ -125,6 +148,7 @@ class quaternion_type
 		return *this;
 	}

+	/// \brief Assignment operator
 	constexpr quaternion_type &operator=(quaternion_type const &rhs)
 	{
 		a = rhs.a;
@@ -135,6 +159,7 @@ class quaternion_type
 		return *this;
 	}

+	/// \brief Assignment operator that sets the *real* part to @a rhs and the *unreal* parts to zero
 	constexpr quaternion_type &operator=(value_type const &rhs)
 	{
 		a = rhs;
@@ -144,6 +169,9 @@ class quaternion_type
 		return *this;
 	}

+	/// \brief Assignment operator that sets the *real* part to the real part of @a rhs
+	/// and the first *unreal* part to the imaginary part of of @a rhs. The other *unreal*
+	// parts are set to zero.
 	constexpr quaternion_type &operator=(std::complex<value_type> const &rhs)
 	{
 		a = rhs.real();
@@ -155,17 +183,16 @@ class quaternion_type
 	}

 	// other assignment-related operators
-	//
-	// NOTE:    Quaternion multiplication is *NOT* commutative;
-	//            symbolically, "q *= rhs;" means "q = q * rhs;"
-	//            and "q /= rhs;" means "q = q * inverse_of(rhs);"

+	/// \brief operator += adding value @a rhs to the *real* part
 	constexpr quaternion_type &operator+=(value_type const &rhs)
 	{
 		a += rhs;
 		return *this;
 	}

+	/// \brief operator += adding the real part of @a rhs to the *real* part
+	/// and the imaginary part of @a rhs to the first *unreal* part
 	constexpr quaternion_type &operator+=(std::complex<value_type> const &rhs)
 	{
 		a += std::real(rhs);
@@ -173,6 +200,7 @@ class quaternion_type
 		return *this;
 	}

+	/// \brief operator += adding the parts of @a rhs to the equivalent part of this
 	template <class X>
 	constexpr quaternion_type &operator+=(quaternion_type<X> const &rhs)
 	{
@@ -183,12 +211,15 @@ class quaternion_type
 		return *this;
 	}

+	/// \brief operator -= subtracting value @a rhs from the *real* part
 	constexpr quaternion_type &operator-=(value_type const &rhs)
 	{
 		a -= rhs;
 		return *this;
 	}

+	/// \brief operator -= subtracting the real part of @a rhs from the *real* part
+	/// and the imaginary part of @a rhs from the first *unreal* part
 	constexpr quaternion_type &operator-=(std::complex<value_type> const &rhs)
 	{
 		a -= std::real(rhs);
@@ -196,6 +227,7 @@ class quaternion_type
 		return *this;
 	}

+	/// \brief operator -= subtracting the parts of @a rhs from the equivalent part of this
 	template <class X>
 	constexpr quaternion_type &operator-=(quaternion_type<X> const &rhs)
 	{
@@ -206,6 +238,7 @@ class quaternion_type
 		return *this;
 	}

+	/// \brief multiply all parts with value @a rhs
 	constexpr quaternion_type &operator*=(value_type const &rhs)
 	{
 		a *= rhs;
@@ -215,6 +248,7 @@ class quaternion_type
 		return *this;
 	}

+	/// \brief multiply with complex number @a rhs
 	constexpr quaternion_type &operator*=(std::complex<value_type> const &rhs)
 	{
 		value_type ar = rhs.real();
@@ -224,13 +258,15 @@ class quaternion_type
 		return *this;
 	}

-	constexpr friend quaternion_type operator*(const quaternion_type &a, const quaternion_type &b)
+	/// \brief multiply @a a with @a b and return the result
+	friend constexpr quaternion_type operator*(const quaternion_type &a, const quaternion_type &b)
 	{
 		auto result = a;
 		result *= b;
 		return result;
 	}

+	/// \brief multiply with quaternion @a rhs
 	template <typename X>
 	constexpr quaternion_type &operator*=(quaternion_type<X> const &rhs)
 	{
@@ -244,6 +280,7 @@ class quaternion_type
 		return *this;
 	}

+	/// \brief divide all parts by @a rhs
 	constexpr quaternion_type &operator/=(value_type const &rhs)
 	{
 		a /= rhs;
@@ -253,6 +290,7 @@ class quaternion_type
 		return *this;
 	}

+	/// \brief divide by complex number @a rhs
 	constexpr quaternion_type &operator/=(std::complex<value_type> const &rhs)
 	{
 		value_type ar = rhs.real();
@@ -263,6 +301,7 @@ class quaternion_type
 		return *this;
 	}

+	/// \brief divide by quaternion @a rhs
 	template <typename X>
 	constexpr quaternion_type &operator/=(quaternion_type<X> const &rhs)
 	{
@@ -277,7 +316,8 @@ class quaternion_type
 		return *this;
 	}

-	constexpr friend quaternion_type normalize(quaternion_type q)
+	/// \brief normalise the values so that the length of the result is exactly 1
+	friend constexpr quaternion_type normalize(quaternion_type q)
 	{
 		std::valarray<value_type> t(4);

@@ -298,35 +338,52 @@ class quaternion_type
 		return q;
 	}

-	constexpr friend quaternion_type conj(quaternion_type q)
+	/// \brief return the conjugate of this
+	friend constexpr quaternion_type conj(quaternion_type q)
 	{
 		return quaternion_type{ +q.a, -q.b, -q.c, -q.d };
 	}

-	constexpr value_type get_a() const { return a; }
-	constexpr value_type get_b() const { return b; }
-	constexpr value_type get_c() const { return c; }
-	constexpr value_type get_d() const { return d; }
+	constexpr value_type get_a() const { return a; } ///< Return part a
+	constexpr value_type get_b() const { return b; } ///< Return part b
+	constexpr value_type get_c() const { return c; } ///< Return part c
+	constexpr value_type get_d() const { return d; } ///< Return part d

+	/// \brief compare with @a rhs
 	constexpr bool operator==(const quaternion_type &rhs) const
 	{
 		return a == rhs.a and b == rhs.b and c == rhs.c and d == rhs.d;
 	}

+	/// \brief compare with @a rhs
 	constexpr bool operator!=(const quaternion_type &rhs) const
 	{
 		return a != rhs.a or b != rhs.b or c != rhs.c or d != rhs.d;
 	}

+	/// \brief test for all zero values
 	constexpr operator bool() const
 	{
-		return operator!=({});
+		return a != 0 or b != 0 or c != 0 or d != 0;
 	}

  private:
 	value_type a, b, c, d;
 };

+/**
+ * @brief This code is similar to the code in boost so I copy the documentation as well:
+ *
+ * > spherical is a simple transposition of polar, it takes as inputs a (positive)
+ * > magnitude and a point on the hypersphere, given by three angles. The first of
+ * > these, theta has a natural range of -pi to +pi, and the other two have natural
+ * > ranges of -pi/2 to +pi/2 (as is the case with the usual spherical coordinates in
+ * > **R**<sup>3</sup>). Due to the many symmetries and periodicities, nothing untoward happens if
+ * > the magnitude is negative or the angles are outside their natural ranges. The
+ * > expected degeneracies (a magnitude of zero ignores the angles settings...) do
+ * > happen however.
+ */
+
 template <typename T>
 inline quaternion_type<T> spherical(T const &rho, T const &theta, T const &phi1, T const &phi2)
 {
@@ -344,24 +401,34 @@ inline quaternion_type<T> spherical(T const &rho, T const &theta, T const &phi1,
 	return result;
 }

+/// \brief By default we use the float version of a quaternion
 using quaternion = quaternion_type<float>;

 // --------------------------------------------------------------------

-//	point, a location with x, y and z coordinates as floating point.
-//	This one is derived from a tuple<float,float,float> so
-//	you can do things like:
-//
-//	float x, y, z;
-//	tie(x, y, z) = atom.loc();
+/**
+ * @brief 3D point: a location with x, y and z coordinates as floating point.
+ *
+ * Note that you can simply use structured binding to get access to the
+ * individual parts like so:
+ *
+ * @code{.cpp}
+ * float x, y, z;
+ * tie(x, y, z) = atom.get_location();
+ * @endcode
+ */

 template <typename F>
 struct point_type
 {
+	/// \brief the value type of the x, y and z members
 	using value_type = F;

-	value_type m_x, m_y, m_z;
+	value_type m_x, ///< The x part of the location
+		m_y,        ///< The y part of the location
+		m_z;        ///< The z part of the location

+	/// \brief default constructor, initialises the values to zero
 	constexpr point_type()
 		: m_x(0)
 		, m_y(0)
@@ -369,6 +436,7 @@ struct point_type
 	{
 	}

+	/// \brief constructor taking three values
 	constexpr point_type(value_type x, value_type y, value_type z)
 		: m_x(x)
 		, m_y(y)
@@ -376,6 +444,7 @@ struct point_type
 	{
 	}

+	/// \brief Copy constructor
 	template <typename PF>
 	constexpr point_type(const point_type<PF> &pt)
 		: m_x(static_cast<F>(pt.m_x))
@@ -384,12 +453,14 @@ struct point_type
 	{
 	}

+	/// \brief constructor taking a tuple of three values
 	constexpr point_type(const std::tuple<value_type, value_type, value_type> &pt)
 		: point_type(std::get<0>(pt), std::get<1>(pt), std::get<2>(pt))
 	{
 	}

 #if HAVE_LIBCLIPPER
+	/// \brief Construct a point using the values in clipper coordinate @a pt
 	constexpr point_type(const clipper::Coord_orth &pt)
 		: m_x(pt[0])
 		, m_y(pt[1])
@@ -397,6 +468,7 @@ struct point_type
 	{
 	}

+	/// \brief Assign a point using the values in clipper coordinate @a rhs
 	constexpr point_type &operator=(const clipper::Coord_orth &rhs)
 	{
 		m_x = rhs[0];
@@ -406,6 +478,7 @@ struct point_type
 	}
 #endif

+	/// \brief Assignment operator
 	template <typename PF>
 	constexpr point_type &operator=(const point_type<PF> &rhs)
 	{
@@ -415,18 +488,19 @@ struct point_type
 		return *this;
 	}

-	constexpr value_type &get_x() { return m_x; }
-	constexpr value_type get_x() const { return m_x; }
-	constexpr void set_x(value_type x) { m_x = x; }
+	constexpr value_type &get_x() { return m_x; }      ///< Get a reference to x
+	constexpr value_type get_x() const { return m_x; } ///< Get the value of x
+	constexpr void set_x(value_type x) { m_x = x; }    ///< Set the value of x to @a x

-	constexpr value_type &get_y() { return m_y; }
-	constexpr value_type get_y() const { return m_y; }
-	constexpr void set_y(value_type y) { m_y = y; }
+	constexpr value_type &get_y() { return m_y; }      ///< Get a reference to y
+	constexpr value_type get_y() const { return m_y; } ///< Get the value of y
+	constexpr void set_y(value_type y) { m_y = y; }    ///< Set the value of y to @a y

-	constexpr value_type &get_z() { return m_z; }
-	constexpr value_type get_z() const { return m_z; }
-	constexpr void set_z(value_type z) { m_z = z; }
+	constexpr value_type &get_z() { return m_z; }      ///< Get a reference to z
+	constexpr value_type get_z() const { return m_z; } ///< Get the value of z
+	constexpr void set_z(value_type z) { m_z = z; }    ///< Set the value of z to @a z

+	/// \brief add @a rhs
 	constexpr point_type &operator+=(const point_type &rhs)
 	{
 		m_x += rhs.m_x;
@@ -436,6 +510,7 @@ struct point_type
 		return *this;
 	}

+	/// \brief add @a d to all members
 	constexpr point_type &operator+=(value_type d)
 	{
 		m_x += d;
@@ -445,6 +520,14 @@ struct point_type
 		return *this;
 	}

+	/// \brief Add the points @a lhs and @a rhs and return the result
+	template <typename F2>
+	friend constexpr auto operator+(const point_type &lhs, const point_type<F2> &rhs)
+	{
+		return point_type<std::common_type_t<value_type, F2>>(lhs.m_x + rhs.m_x, lhs.m_y + rhs.m_y, lhs.m_z + rhs.m_z);
+	}
+
+	/// \brief subtract @a rhs
 	constexpr point_type &operator-=(const point_type &rhs)
 	{
 		m_x -= rhs.m_x;
@@ -454,6 +537,7 @@ struct point_type
 		return *this;
 	}

+	/// \brief subtract @a d from all members
 	constexpr point_type &operator-=(value_type d)
 	{
 		m_x -= d;
@@ -463,6 +547,20 @@ struct point_type
 		return *this;
 	}

+	/// \brief Subtract the points @a lhs and @a rhs and return the result
+	template <typename F2>
+	friend constexpr auto operator-(const point_type &lhs, const point_type<F2> &rhs)
+	{
+		return point_type<std::common_type_t<value_type, F2>>(lhs.m_x - rhs.m_x, lhs.m_y - rhs.m_y, lhs.m_z - rhs.m_z);
+	}
+
+	/// \brief Return the negative copy of @a pt
+	friend constexpr point_type operator-(const point_type &pt)
+	{
+		return point_type(-pt.m_x, -pt.m_y, -pt.m_z);
+	}
+
+	/// \brief multiply all members with @a rhs
 	constexpr point_type &operator*=(value_type rhs)
 	{
 		m_x *= rhs;
@@ -471,6 +569,21 @@ struct point_type
 		return *this;
 	}

+	/// \brief multiply point @a pt with value @a f and return the result
+	template <typename F2>
+	friend constexpr auto operator*(const point_type &pt, F2 f)
+	{
+		return point_type<std::common_type_t<value_type, F2>>(pt.m_x * f, pt.m_y * f, pt.m_z * f);
+	}
+
+	/// \brief multiply point @a pt with value @a f and return the result
+	template <typename F2>
+	friend constexpr auto operator*(F2 f, const point_type &pt)
+	{
+		return point_type<std::common_type_t<value_type, F2>>(pt.m_x * f, pt.m_y * f, pt.m_z * f);
+	}
+
+	/// \brief divide all members by @a rhs
 	constexpr point_type &operator/=(value_type rhs)
 	{
 		m_x /= rhs;
@@ -479,6 +592,20 @@ struct point_type
 		return *this;
 	}

+	/// \brief divide point @a pt by value @a f and return the result
+	template <typename F2>
+	friend constexpr auto operator/(const point_type &pt, F2 f)
+	{
+		return point_type<std::common_type_t<value_type, F2>>(pt.m_x / f, pt.m_y / f, pt.m_z / f);
+	}
+
+	/**
+	 * @brief looking at this point as a vector, normalise it which
+	 * means dividing all members by the length making the length
+	 * effectively 1.
+	 *
+	 * @return The previous length of this vector
+	 */
 	constexpr value_type normalize()
 	{
 		auto length = m_x * m_x + m_y * m_y + m_z * m_z;
@@ -490,6 +617,7 @@ struct point_type
 		return length;
 	}

+	/// \brief Rotate this point using the quaterion @a q
 	constexpr void rotate(const quaternion &q)
 	{
 		quaternion_type<value_type> p(0, m_x, m_y, m_z);
@@ -501,6 +629,9 @@ struct point_type
 		m_z = p.get_d();
 	}

+	/// \brief Rotate this point using the quaterion @a q by first
+	/// moving the point to @a pivot and after rotating moving it
+	/// back
 	constexpr void rotate(const quaternion &q, point_type pivot)
 	{
 		operator-=(pivot);
@@ -509,97 +640,71 @@ struct point_type
 	}

 #if HAVE_LIBCLIPPER
+	/// \brief Make it possible to pass a point to clipper functions expecting a clipper coordinate
 	operator clipper::Coord_orth() const
 	{
 		return clipper::Coord_orth(m_x, m_y, m_z);
 	}
 #endif

+	/// \brief Allow access to this point as if it is a tuple of three const value_type's
 	constexpr operator std::tuple<const value_type &, const value_type &, const value_type &>() const
 	{
 		return std::make_tuple(std::ref(m_x), std::ref(m_y), std::ref(m_z));
 	}

+	/// \brief Allow access to this point as if it is a tuple of three value_type's
 	constexpr operator std::tuple<value_type &, value_type &, value_type &>()
 	{
 		return std::make_tuple(std::ref(m_x), std::ref(m_y), std::ref(m_z));
 	}

+	/// \brief Compare with @a rhs
 	constexpr bool operator==(const point_type &rhs) const
 	{
 		return m_x == rhs.m_x and m_y == rhs.m_y and m_z == rhs.m_z;
 	}

 	// consider point as a vector... perhaps I should rename point?
+
+	/// \brief looking at the point as if it is a vector, return the squared length
 	constexpr value_type length_sq() const
 	{
 		return m_x * m_x + m_y * m_y + m_z * m_z;
 	}

+	/// \brief looking at the point as if it is a vector, return the length
 	constexpr value_type length() const
 	{
-		return std::sqrt(m_x * m_x + m_y * m_y + m_z * m_z);
+		return std::sqrt(length_sq());
+	}
+
+	/// \brief Print out the point @a pt to @a os
+	friend std::ostream &operator<<(std::ostream &os, const point_type &pt)
+	{
+		os << '(' << pt.m_x << ',' << pt.m_y << ',' << pt.m_z << ')';
+		return os;
 	}
 };

+/// \brief By default we use points with float value_type
 using point = point_type<float>;

-template <typename F>
-inline constexpr std::ostream &operator<<(std::ostream &os, const point_type<F> &pt)
-{
-	os << '(' << pt.m_x << ',' << pt.m_y << ',' << pt.m_z << ')';
-	return os;
-}
-
-template <typename F>
-inline constexpr point_type<F> operator+(const point_type<F> &lhs, const point_type<F> &rhs)
-{
-	return point_type<F>(lhs.m_x + rhs.m_x, lhs.m_y + rhs.m_y, lhs.m_z + rhs.m_z);
-}
-
-template <typename F>
-inline constexpr point_type<F> operator-(const point_type<F> &lhs, const point_type<F> &rhs)
-{
-	return point_type<F>(lhs.m_x - rhs.m_x, lhs.m_y - rhs.m_y, lhs.m_z - rhs.m_z);
-}
-
-template <typename F>
-inline constexpr point_type<F> operator-(const point_type<F> &pt)
-{
-	return point_type<F>(-pt.m_x, -pt.m_y, -pt.m_z);
-}
-
-template <typename F>
-inline constexpr point_type<F> operator*(const point_type<F> &pt, F f)
-{
-	return point_type<F>(pt.m_x * f, pt.m_y * f, pt.m_z * f);
-}
-
-template <typename F>
-inline constexpr point_type<F> operator*(F f, const point_type<F> &pt)
-{
-	return point_type<F>(pt.m_x * f, pt.m_y * f, pt.m_z * f);
-}
-
-template <typename F>
-inline constexpr point_type<F> operator/(const point_type<F> &pt, F f)
-{
-	return point_type<F>(pt.m_x / f, pt.m_y / f, pt.m_z / f);
-}
-
 // --------------------------------------------------------------------
 // several standard 3d operations

-template <typename F>
-inline constexpr auto distance_squared(const point_type<F> &a, const point_type<F> &b)
+/// \brief return the squared distance between points @a a and @a b
+template <typename F1, typename F2>
+constexpr auto distance_squared(const point_type<F1> &a, const point_type<F2> &b)
 {
 	return (a.m_x - b.m_x) * (a.m_x - b.m_x) +
 	       (a.m_y - b.m_y) * (a.m_y - b.m_y) +
 	       (a.m_z - b.m_z) * (a.m_z - b.m_z);
 }

-template <typename F>
-inline constexpr auto distance(const point_type<F> &a, const point_type<F> &b)
+/// \brief return the distance between points @a a and @a b
+template <typename F1, typename F2>
+constexpr auto distance(const point_type<F1> &a, const point_type<F2> &b)
 {
 	return std::sqrt(
 		(a.m_x - b.m_x) * (a.m_x - b.m_x) +
@@ -607,20 +712,24 @@ inline constexpr auto distance(const point_type<F> &a, const point_type<F> &b)
 		(a.m_z - b.m_z) * (a.m_z - b.m_z));
 }

-template <typename F>
-inline constexpr auto dot_product(const point_type<F> &a, const point_type<F> &b)
+/// \brief return the dot product between the vectors @a a and @a b
+template <typename F1, typename F2>
+inline constexpr auto dot_product(const point_type<F1> &a, const point_type<F2> &b)
 {
 	return a.m_x * b.m_x + a.m_y * b.m_y + a.m_z * b.m_z;
 }

-template <typename F>
-inline constexpr point_type<F> cross_product(const point_type<F> &a, const point_type<F> &b)
+/// \brief return the cross product between the vectors @a a and @a b
+template <typename F1, typename F2>
+inline constexpr auto cross_product(const point_type<F1> &a, const point_type<F2> &b)
 {
-	return point_type<F>(a.m_y * b.m_z - b.m_y * a.m_z,
+	return point_type<std::common_type_t<F1, F2>>(
+		a.m_y * b.m_z - b.m_y * a.m_z,
 		a.m_z * b.m_x - b.m_z * a.m_x,
 		a.m_x * b.m_y - b.m_x * a.m_y);
 }

+/// \brief return the angle in degrees between the vectors from point @a p2 to @a p1 and @a p2 to @a p3
 template <typename F>
 constexpr auto angle(const point_type<F> &p1, const point_type<F> &p2, const point_type<F> &p3)
 {
@@ -630,6 +739,9 @@ constexpr auto angle(const point_type<F> &p1, const point_type<F> &p2, const poi
 	return std::acos(dot_product(v1, v2) / (v1.length() * v2.length())) * 180 / kPI;
 }

+/// \brief return the dihedral angle in degrees for the four points @a p1, @a p2, @a p3 and @a p4
+///
+/// See https://en.wikipedia.org/wiki/Dihedral_angle for an explanation of what a dihedral angle is
 template <typename F>
 constexpr auto dihedral_angle(const point_type<F> &p1, const point_type<F> &p2, const point_type<F> &p3, const point_type<F> &p4)
 {
@@ -657,6 +769,7 @@ constexpr auto dihedral_angle(const point_type<F> &p1, const point_type<F> &p2,
 	return result;
 }

+/// \brief return the cosinus angle for the four points @a p1, @a p2, @a p3 and @a p4
 template <typename F>
 constexpr auto cosinus_angle(const point_type<F> &p1, const point_type<F> &p2, const point_type<F> &p3, const point_type<F> &p4)
 {
@@ -668,6 +781,7 @@ constexpr auto cosinus_angle(const point_type<F> &p1, const point_type<F> &p2, c
 	return x > 0 ? dot_product(v12, v34) / std::sqrt(x) : 0;
 }

+/// \brief return the distance from point @a p to the line from @a l1 to @a l2
 template <typename F>
 constexpr auto distance_point_to_line(const point_type<F> &l1, const point_type<F> &l2, const point_type<F> &p)
 {
@@ -679,15 +793,19 @@ constexpr auto distance_point_to_line(const point_type<F> &l1, const point_type<
 }

 // --------------------------------------------------------------------
-// For e.g. simulated annealing, returns a new point that is moved in
-// a random direction with a distance randomly chosen from a normal
-// distribution with a stddev of offset.
-
+/**
+ * @brief For e.g. simulated annealing, returns a new point that is moved in
+ * a random direction with a distance randomly chosen from a normal
+ * distribution with a stddev of offset.
+ */
 point nudge(point p, float offset);

 // --------------------------------------------------------------------

+/// \brief Return a quaternion created from angle @a angle and axis @a axis
 quaternion construct_from_angle_axis(float angle, point axis);
+
+/// \brief Return a tuple of an angle and an axis for quaternion @a q
 std::tuple<double, point> quaternion_to_angle_axis(quaternion q);

 /// @brief Given four points and an angle, return the quaternion required to rotate
@@ -696,8 +814,12 @@ std::tuple<double, point> quaternion_to_angle_axis(quaternion q);
 quaternion construct_for_dihedral_angle(point p1, point p2, point p3, point p4,
 	float angle, float esd);

-point centroid(const std::vector<point> &Points);
-point center_points(std::vector<point> &Points);
+/// \brief Return the point that is the centroid of all the points in @a pts
+point centroid(const std::vector<point> &pts);
+
+/// \brief Move all the points in @a pts so that their centroid is at the origin
+/// (0, 0, 0) and return the offset used (the former centroid)
+point center_points(std::vector<point> &pts);

 /// \brief Returns how the two sets of points \a a and \b b can be aligned
 ///
@@ -711,39 +833,56 @@ quaternion align_points(const std::vector<point> &a, const std::vector<point> &b
 double RMSd(const std::vector<point> &a, const std::vector<point> &b);

 // --------------------------------------------------------------------
-// Helper class to generate evenly divided points on a sphere
-// we use a fibonacci sphere to calculate even distribution of the dots
-
+/**
+ * @brief Helper class to generate evenly divided points on a sphere
+ *
+ * We use a fibonacci sphere to calculate even distribution of the dots
+ *
+ * @tparam N The number of points on the sphere is 2 * N + 1
+ */
 template <int N>
 class spherical_dots
 {
  public:
-
+	/// \brief the number of points
 	constexpr static int P = 2 * N * 1;

+	/// \brief the *weight* of the fibonacci sphere
+	constexpr static double W = (4 * kPI) / P;
+
+	/// \brief the internal storage type
 	using array_type = typename std::array<point, P>;
+
+	/// \brief iterator type
 	using iterator = typename array_type::const_iterator;

+	/// \brief singleton instance
 	static spherical_dots &instance()
 	{
 		static spherical_dots sInstance;
 		return sInstance;
 	}

-	size_t size() const { return m_points.size(); }
+	/// \brief The number of points
+	size_t size() const { return P; }
+
+	/// \brief Access a point by index
 	const point operator[](uint32_t inIx) const { return m_points[inIx]; }
+
+	/// \brief iterator pointing to the first point
 	iterator begin() const { return m_points.begin(); }
+
+	/// \brief iterator pointing past the last point
 	iterator end() const { return m_points.end(); }

-	double weight() const { return m_weight; }
+	/// \brief return the *weight*,
+	double weight() const { return W; }

 	spherical_dots()
 	{
 		const double
 			kGoldenRatio = (1 + std::sqrt(5.0)) / 2;

-		m_weight = (4 * kPI) / P;
-
 		auto p = m_points.begin();

 		for (int32_t i = -N; i <= N; ++i)
@@ -761,7 +900,6 @@ class spherical_dots

  private:
 	array_type m_points;
-	double m_weight;
 };

 } // namespace cif
--- a/include/cif++/row.hpp
+++ b/include/cif++/row.hpp
@@ -26,10 +26,55 @@

 #pragma once

-#include <cif++/item.hpp>
+#include "cif++/item.hpp"

 #include <array>

+/**
+ * @file row.hpp
+ * 
+ * The class cif::row should be an opaque type. It is used to store the
+ * internal data per row in a category. You should use cif::row_handle
+ * to get access to the contents in a row.
+ * 
+ * One could think of rows as vectors of cif::item. But internally
+ * that's not the case.
+ * 
+ * You can access the values of stored items by name or index.
+ * The return value of operator[] is an cif::item_handle object.
+ * 
+ * @code {.cpp}
+ * cif::category &atom_site = my_db["atom_site"];
+ * cif::row_handle rh = atom_site.front();
+ * 
+ * // by name:
+ * std::string name = rh["label_atom_id"].as<std::string>();
+ * 
+ * // by index:
+ * uint16_t ix = atom_site.get_column_ix("label_atom_id");
+ * assert(rh[ix].as<std::string() == name);
+ * @endcode
+ * 
+ * There some template magic here to allow easy extracting of data
+ * from rows. This can be done using cif::tie e.g.:
+ * 
+ * @code {.cpp}
+ * std::string name;
+ * float x, y, z;
+ * 
+ * cif::tie(name, x, y, z) = rh.get("label_atom_id", "cartn_x", "cartn_y", "cartn_z");
+ * @endcode
+ * 
+ * However, a more modern way uses structured binding:
+ * 
+ * @code {.cpp}
+ * const auto &[name, x, y, z] = rh.get<std::string,float,float,float>("label_atom_id", "cartn_x", "cartn_y", "cartn_z");
+ * @endcode
+ * 
+ * 
+ * 
+ */
+
 namespace cif
 {

@@ -96,6 +141,8 @@ namespace detail

 } // namespace detail

+/// \brief similar to std::tie, assign values to each element in @a v from the 
+/// result of a get on a row_handle.
 template <typename... Ts>
 auto tie(Ts &...v)
 {
@@ -110,14 +157,20 @@ class row : public std::vector<item_value>
  public:
 	row() = default;

+	/**
+	 * @brief Return the item_value pointer for item at index @a ix
+	 */
 	item_value* get(uint16_t ix)
 	{
-		return ix < size() ? &at(ix) : nullptr;
+		return ix < size() ? &data()[ix] : nullptr;
 	}

+	/**
+	 * @brief Return the const item_value pointer for item at index @a ix
+	 */
 	const item_value* get(uint16_t ix) const
 	{
-		return ix < size() ? &at(ix) : nullptr;
+		return ix < size() ? &data()[ix] : nullptr;
 	}

  private:
@@ -150,6 +203,7 @@ class row : public std::vector<item_value>
 class row_handle
 {
  public:
+	/** @cond */
 	friend struct item_handle;
 	friend class category;
 	friend class category_index;
@@ -163,79 +217,119 @@ class row_handle
 	row_handle &operator=(const row_handle &) = default;
 	row_handle &operator=(row_handle &&) = default;

+	/** @endcond */
+
+	/// \brief constructor taking a category @a cat and a row @a r
 	row_handle(const category &cat, const row &r)
 		: m_category(const_cast<category *>(&cat))
 		, m_row(const_cast<row *>(&r))
 	{
 	}

+	/// \brief return the category this row belongs to
 	const category &get_category() const
 	{
 		return *m_category;
 	}

+	/// \brief Return true if the row is empty or uninitialised
 	bool empty() const
 	{
 		return m_category == nullptr or m_row == nullptr;
 	}

+	/// \brief convenience method to test for empty()
 	explicit operator bool() const
 	{
 		return not empty();
 	}

+	/// \brief return a cif::item_handle to the item in column @a column_ix
 	item_handle operator[](uint16_t column_ix)
 	{
 		return empty() ? item_handle::s_null_item : item_handle(column_ix, *this);
 	}

+	/// \brief return a const cif::item_handle to the item in column @a column_ix
 	const item_handle operator[](uint16_t column_ix) const
 	{
 		return empty() ? item_handle::s_null_item : item_handle(column_ix, const_cast<row_handle &>(*this));
 	}

+	/// \brief return a cif::item_handle to the item in the column named @a column_name
 	item_handle operator[](std::string_view column_name)
 	{
 		return empty() ? item_handle::s_null_item : item_handle(add_column(column_name), *this);
 	}

+	/// \brief return a const cif::item_handle to the item in the column named @a column_name
 	const item_handle operator[](std::string_view column_name) const
 	{
 		return empty() ? item_handle::s_null_item : item_handle(get_column_ix(column_name), const_cast<row_handle &>(*this));
 	}

+	/// \brief Return an object that can be used in combination with cif::tie
+	/// to assign the values for the columns @a columns
 	template <typename... C>
 	auto get(C... columns) const
 	{
 		return detail::get_row_result<C...>(*this, { get_column_ix(columns)... });
 	}

-	template <typename... Ts, typename... C, std::enable_if_t<sizeof...(Ts) == sizeof...(C), int> = 0>
+	/// \brief Return a tuple of values of types @a Ts for the columns @a columns
+	template <typename... Ts, typename... C, std::enable_if_t<sizeof...(Ts) == sizeof...(C) and sizeof...(C) != 1, int> = 0>
 	std::tuple<Ts...> get(C... columns) const
 	{
 		return detail::get_row_result<Ts...>(*this, { get_column_ix(columns)... });
 	}

+	/// \brief Get the value of column @a column cast to type @a T
 	template <typename T>
-	T get(const char *column)
+	T get(const char *column) const
 	{
 		return operator[](get_column_ix(column)).template as<T>();
 	}

+	/// \brief assign each of the columns named in @a values to their respective value
 	void assign(const std::vector<item> &values)
 	{
 		for (auto &value : values)
 			assign(value, true);
 	}

+	/** \brief assign the value @a value to the column named @a name 
+	 * 
+	 * If updateLinked it true, linked records are updated as well.
+	 * That means that if column @a name is part of the link definition
+	 * and the link results in a linked record in another category
+	 * this record in the linked category is updated as well.
+	 * 
+	 * If validate is true, which is default, the assigned value is
+	 * checked to see if it conforms to the rules defined in the dictionary
+	 */
+
 	void assign(std::string_view name, std::string_view value, bool updateLinked, bool validate = true)
 	{
 		assign(add_column(name), value, updateLinked, validate);
 	}

+	/** \brief assign the value @a value to column at index @a column
+	 * 
+	 * If updateLinked it true, linked records are updated as well.
+	 * That means that if column @a column is part of the link definition
+	 * and the link results in a linked record in another category
+	 * this record in the linked category is updated as well.
+	 * 
+	 * If validate is true, which is default, the assigned value is
+	 * checked to see if it conforms to the rules defined in the dictionary
+	 */
+
 	void assign(uint16_t column, std::string_view value, bool updateLinked, bool validate = true);

+	/// \brief compare two rows
 	bool operator==(const row_handle &rhs) const { return m_category == rhs.m_category and m_row == rhs.m_row; }
+
+	/// \brief compare two rows
 	bool operator!=(const row_handle &rhs) const { return m_category != rhs.m_category or m_row != rhs.m_row; }

  private:
@@ -267,9 +361,17 @@ class row_handle

 // --------------------------------------------------------------------

+/**
+ * @brief The class row_initializer is a list of cif::item's.
+ * 
+ * This class is used to construct new rows, it allows to
+ * group a list of item name and value pairs and pass it
+ * in one go to the constructing function.
+ */
 class row_initializer : public std::vector<item>
 {
  public:
+	/** @cond */
 	friend class category;

 	row_initializer() = default;
@@ -278,26 +380,38 @@ class row_initializer : public std::vector<item>
 	row_initializer &operator=(const row_initializer &) = default;
 	row_initializer &operator=(row_initializer &&) = default;

+	/** @endcond */
+
+	/// \brief constructor taking a std::initializer_list of items
 	row_initializer(std::initializer_list<item> items)
 		: std::vector<item>(items)
 	{
 	}

+	/// \brief constructor taking a range of items
 	template <typename ItemIter, std::enable_if_t<std::is_same_v<typename ItemIter::value_type, item>, int> = 0>
 	row_initializer(ItemIter b, ItemIter e)
 		: std::vector<item>(b, e)
 	{
 	}

+	/// \brief constructor taking the values of an existing row
 	row_initializer(row_handle rh);

+
+	/// \brief set the value for item name @a name to @a value
 	void set_value(std::string_view name, std::string_view value);
+
+	/// \brief set the value for item based on @a i
 	void set_value(const item &i)
 	{
 		set_value(i.name(), i.value());
 	}

+	/// \brief set the value for item name @a name to @a value, but only if the item did not have a value already
 	void set_value_if_empty(std::string_view name, std::string_view value);
+
+	/// \brief set the value for item @a i, but only if the item did not have a value already
 	void set_value_if_empty(const item &i)
 	{
 		set_value_if_empty(i.name(), i.value());
--- a/include/cif++/symmetry.hpp
+++ b/include/cif++/symmetry.hpp
@@ -26,86 +26,138 @@

 #pragma once

-#include <cif++/exports.hpp>
+#include "cif++/exports.hpp"
+#include "cif++/matrix.hpp"
+#include "cif++/point.hpp"

 #include <array>
 #include <cstdint>
 #include <string>

+#if defined(__cpp_impl_three_way_comparison)
+#include <compare>
+#endif
+
+/** \file cif++/symmetry.hpp
+ *
+ * This file contains code to do symmetry operations based on the
+ * operations as specified in the International Tables.
+ */
+
 namespace cif
 {

 // --------------------------------------------------------------------

+/// \brief Apply matrix transformation @a m on point @a pt and return the result
+inline point operator*(const matrix3x3<float> &m, const point &pt)
+{
+	return {
+		m(0, 0) * pt.m_x + m(0, 1) * pt.m_y + m(0, 2) * pt.m_z,
+		m(1, 0) * pt.m_x + m(1, 1) * pt.m_y + m(1, 2) * pt.m_z,
+		m(2, 0) * pt.m_x + m(2, 1) * pt.m_y + m(2, 2) * pt.m_z
+	};
+}
+
+// --------------------------------------------------------------------
+
+/// \brief the space groups we know
 enum class space_group_name
 {
-	full,
-	xHM,
-	Hall
+	full, ///< The *full* spacegroup
+	xHM,  ///< The *xHM* spacegroup
+	Hall  ///< The *Hall* spacegroup
 };

+/// \brief For each known spacegroup we define a structure like this
 struct space_group
 {
-	const char *name;
-	const char *xHM;
-	const char *Hall;
-	int nr;
+	const char *name; ///< The name according to *full*
+	const char *xHM;  ///< The name according to *xHM*
+	const char *Hall; ///< The name according to *Hall*
+	int nr;           ///< The number for this spacegroup
 };

+/// \brief Global list of spacegroups
 extern CIFPP_EXPORT const space_group kSpaceGroups[];
+
+/// \brief Global for the size of the list of spacegroups
 extern CIFPP_EXPORT const std::size_t kNrOfSpaceGroups;

 // --------------------------------------------------------------------

+/**
+ * @brief Helper class to efficiently pack the data that
+ * makes up a symmetry operation
+ *
+ */
+
 struct symop_data
 {
+	/// \brief constructor
 	constexpr symop_data(const std::array<int, 15> &data)
-		: m_packed((data[0] & 0x03ULL) << 34 bitor
-				   (data[1] & 0x03ULL) << 32 bitor
-				   (data[2] & 0x03ULL) << 30 bitor
-				   (data[3] & 0x03ULL) << 28 bitor
-				   (data[4] & 0x03ULL) << 26 bitor
-				   (data[5] & 0x03ULL) << 24 bitor
-				   (data[6] & 0x03ULL) << 22 bitor
-				   (data[7] & 0x03ULL) << 20 bitor
-				   (data[8] & 0x03ULL) << 18 bitor
-				   (data[9] & 0x07ULL) << 15 bitor
-				   (data[10] & 0x07ULL) << 12 bitor
-				   (data[11] & 0x07ULL) << 9 bitor
-				   (data[12] & 0x07ULL) << 6 bitor
-				   (data[13] & 0x07ULL) << 3 bitor
-				   (data[14] & 0x07ULL) << 0)
+		: m_packed((data[0] bitand 0x03ULL) << 34 bitor
+				   (data[1] bitand 0x03ULL) << 32 bitor
+				   (data[2] bitand 0x03ULL) << 30 bitor
+				   (data[3] bitand 0x03ULL) << 28 bitor
+				   (data[4] bitand 0x03ULL) << 26 bitor
+				   (data[5] bitand 0x03ULL) << 24 bitor
+				   (data[6] bitand 0x03ULL) << 22 bitor
+				   (data[7] bitand 0x03ULL) << 20 bitor
+				   (data[8] bitand 0x03ULL) << 18 bitor
+				   (data[9] bitand 0x07ULL) << 15 bitor
+				   (data[10] bitand 0x07ULL) << 12 bitor
+				   (data[11] bitand 0x07ULL) << 9 bitor
+				   (data[12] bitand 0x07ULL) << 6 bitor
+				   (data[13] bitand 0x07ULL) << 3 bitor
+				   (data[14] bitand 0x07ULL) << 0)
 	{
 	}

+	/// \brief compare
 	bool operator==(const symop_data &rhs) const
 	{
 		return m_packed == rhs.m_packed;
 	}

+	/// \brief sorting order
 	bool operator<(const symop_data &rhs) const
 	{
 		return m_packed < rhs.m_packed;
 	}

-	std::array<int, 15> data() const
+	/// \brief return an int representing the value stored in the two bits at offset @a offset
+	inline constexpr int unpack3(int offset) const
+	{
+		int result = (m_packed >> offset) bitand 0x03;
+		return result == 3 ? -1 : result;
+	}
+
+	/// \brief return an int representing the value stored in the three bits at offset @a offset
+	inline constexpr int unpack7(int offset) const
+	{
+		return (m_packed >> offset) bitand 0x07;
+	}
+
+	/// \brief return an array of 15 ints representing the values stored
+	constexpr std::array<int, 15> data() const
 	{
 		return {
-			static_cast<int>(m_packed >> 34) bitand 0x03,
-			static_cast<int>(m_packed >> 32) bitand 0x03,
-			static_cast<int>(m_packed >> 30) bitand 0x03,
-			static_cast<int>(m_packed >> 28) bitand 0x03,
-			static_cast<int>(m_packed >> 26) bitand 0x03,
-			static_cast<int>(m_packed >> 24) bitand 0x03,
-			static_cast<int>(m_packed >> 22) bitand 0x03,
-			static_cast<int>(m_packed >> 20) bitand 0x03,
-			static_cast<int>(m_packed >> 18) bitand 0x03,
-			static_cast<int>(m_packed >> 15) bitand 0x07,
-			static_cast<int>(m_packed >> 12) bitand 0x07,
-			static_cast<int>(m_packed >> 9) bitand 0x07,
-			static_cast<int>(m_packed >> 6) bitand 0x07,
-			static_cast<int>(m_packed >> 3) bitand 0x07,
-			static_cast<int>(m_packed >> 0) bitand 0x07,
+			unpack3(34),
+			unpack3(32),
+			unpack3(30),
+			unpack3(28),
+			unpack3(26),
+			unpack3(24),
+			unpack3(22),
+			unpack3(20),
+			unpack3(18),
+			unpack7(15),
+			unpack7(12),
+			unpack7(9),
+			unpack7(6),
+			unpack7(3),
+			unpack7(0)
 		};
 	}

@@ -122,18 +174,24 @@ struct symop_data
 	uint64_t m_packed;
 };

+/**
+ * @brief For each symmetry operator defined in the international tables
+ * we have an entry in this struct type. It contains the spacegroup
+ * number, the symmetry operations and the rotational number.
+ */
 struct symop_datablock
 {
+	/// \brief constructor
 	constexpr symop_datablock(int spacegroup, int rotational_number, const std::array<int, 15> &rt_data)
-		: m_v((spacegroup & 0xffffULL) << 48 bitor
-			  (rotational_number & 0xffULL) << 40 bitor
+		: m_v((spacegroup bitand 0xffffULL) << 48 bitor
+			  (rotational_number bitand 0xffULL) << 40 bitor
 			  symop_data(rt_data).m_packed)
 	{
 	}

-	uint16_t spacegroup() const { return m_v >> 48; }
-	symop_data symop() const { return symop_data(m_v); }
-	uint8_t rotational_number() const { return (m_v >> 40) bitand 0xff; }
+	uint16_t spacegroup() const { return m_v >> 48; }                     ///< Return the spacegroup
+	symop_data symop() const { return symop_data(m_v); }                  ///< Return the symmetry operation
+	uint8_t rotational_number() const { return (m_v >> 40) bitand 0xff; } ///< Return the rotational_number

  private:
 	uint64_t m_v;
@@ -141,12 +199,345 @@ struct symop_datablock

 static_assert(sizeof(symop_datablock) == sizeof(uint64_t), "Size of symop_data is wrong");

+/// \brief Global containing the list of known symmetry operations
 extern CIFPP_EXPORT const symop_datablock kSymopNrTable[];
+
+/// \brief Size of the list of known symmetry operations
 extern CIFPP_EXPORT const std::size_t kSymopNrTableSize;

 // --------------------------------------------------------------------
+// Some more symmetry related stuff here.

-int get_space_group_number(std::string spacegroup);                        // alternative for clipper's parsing code, using space_group_name::full
-int get_space_group_number(std::string spacegroup, space_group_name type); // alternative for clipper's parsing code
+class datablock;
+
+class cell;
+class spacegroup;
+class rtop;
+struct sym_op;
+
+/** @brief A class that encapsulates the symmetry operations as used in PDB files,
+ * i.e. a rotational number and a translation vector.
+ *
+ * The syntax in string format follows the syntax as used in mmCIF files, i.e.
+ * rotational number followed by underscore and the three translations where 5 is
+ * no movement.
+ *
+ * So the string 1_555 means no symmetry movement at all since the rotational number
+ * 1 always corresponds to the symmetry operation [x, y, z].
+ */
+
+struct sym_op
+{
+  public:
+	/// \brief constructor
+	sym_op(uint8_t nr = 1, uint8_t ta = 5, uint8_t tb = 5, uint8_t tc = 5)
+		: m_nr(nr)
+		, m_ta(ta)
+		, m_tb(tb)
+		, m_tc(tc)
+	{
+	}
+
+	/// \brief construct a sym_op based on the contents encoded in string @a s
+	explicit sym_op(std::string_view s);
+
+	/** @cond */
+	sym_op(const sym_op &) = default;
+	sym_op(sym_op &&) = default;
+	sym_op &operator=(const sym_op &) = default;
+	sym_op &operator=(sym_op &&) = default;
+	/** @endcond */
+
+	/// \brief return true if this sym_op is the identity operator
+	constexpr bool is_identity() const
+	{
+		return m_nr == 1 and m_ta == 5 and m_tb == 5 and m_tc == 5;
+	}
+
+	/// \brief quick test for unequal to identity
+	explicit constexpr operator bool() const
+	{
+		return not is_identity();
+	}
+
+	/// \brief return the content encoded in a string
+	std::string string() const;
+
+#if defined(__cpp_impl_three_way_comparison)
+	/// \brief a default spaceship operator
+	constexpr auto operator<=>(const sym_op &rhs) const = default;
+#else
+	/// \brief a default equals operator
+	constexpr bool operator==(const sym_op &rhs) const
+	{
+		return m_nr == rhs.m_nr and m_ta == rhs.m_ta and m_tb == rhs.m_tb and m_tc == rhs.m_tc;
+	}
+
+	/// \brief a default not-equals operator
+	constexpr bool operator!=(const sym_op &rhs) const
+	{
+		return not operator==(rhs);
+	}
+#endif
+
+	/// @cond
+	uint8_t m_nr;
+	uint8_t m_ta, m_tb, m_tc;
+	/// @endcond
+};
+
+static_assert(sizeof(sym_op) == 4, "Sym_op should be four bytes");
+
+namespace literals
+{
+	/**
+	 * @brief This operator allows you to write code like this:
+	 *
+	 * @code {.cpp}
+	 * using namespace cif::literals;
+	 *
+	 * cif::sym_op so = "1_555"_symop;
+	 * @endcode
+	 *
+	 */
+	inline sym_op operator""_symop(const char *text, size_t length)
+	{
+		return sym_op({ text, length });
+	}
+} // namespace literals
+
+// --------------------------------------------------------------------
+// The transformation class
+
+/**
+ * @brief A class you can use to apply symmetry transformations on points
+ *
+ * Transformations consist of two operations, a matrix transformation which
+ * is often a rotation followed by a translation.
+ *
+ * In case the matrix transformation is a pure rotation a quaternion
+ * is created to do the actual calculations. That's faster and more
+ * precise.
+ */
+class transformation
+{
+  public:
+	/// \brief constructor taking a symop_data object @a data
+	transformation(const symop_data &data);
+
+	/// \brief constructor taking a rotation matrix @a r and a translation vector @a t
+	transformation(const matrix3x3<float> &r, const cif::point &t);
+
+	/** @cond */
+	transformation(const transformation &) = default;
+	transformation(transformation &&) = default;
+	transformation &operator=(const transformation &) = default;
+	transformation &operator=(transformation &&) = default;
+	/** @endcond */
+
+	/// \brief operator() to perform the transformation on point @a pt and return the result
+	point operator()(point pt) const
+	{
+		if (m_q)
+			pt.rotate(m_q);
+		else
+			pt = m_rotation * pt;
+
+		return pt + m_translation;
+	}
+
+	/// \brief return a transformation object that is the result of applying @a rhs after @a lhs
+	friend transformation operator*(const transformation &lhs, const transformation &rhs);
+
+	/// \brief return the inverse transformation for @a t
+	friend transformation inverse(const transformation &t);
+
+	/// \brief return the inverse tranformation for this
+	transformation operator-() const
+	{
+		return inverse(*this);
+	}
+
+	friend class spacegroup;
+
+  private:
+	// Most rotation matrices provided by the International Tables
+	// are really rotation matrices, in those cases we can construct
+	// a quaternion. Unfortunately, that doesn't work for all of them
+
+	void try_create_quaternion();
+
+	matrix3x3<float> m_rotation;
+	quaternion m_q;
+	point m_translation;
+};
+
+// --------------------------------------------------------------------
+// class cell
+
+/**
+ * @brief The cell class describes the dimensions and angles of a unit cell
+ * in a crystal
+ */
+
+class cell
+{
+  public:
+	/// \brief constructor
+	cell(float a, float b, float c, float alpha = 90.f, float beta = 90.f, float gamma = 90.f);
+
+	/// \brief constructor that takes the appropriate values from the *cell* category in datablock @a db
+	cell(const datablock &db);
+
+	float get_a() const { return m_a; } ///< return dimension a
+	float get_b() const { return m_b; } ///< return dimension b
+	float get_c() const { return m_c; } ///< return dimension c
+
+	float get_alpha() const { return m_alpha; } ///< return angle alpha
+	float get_beta() const { return m_beta; }   ///< return angle beta
+	float get_gamma() const { return m_gamma; } ///< return angle gamma
+
+	float get_volume() const; ///< return the calculated volume for this cell
+
+	matrix3x3<float> get_orthogonal_matrix() const { return m_orthogonal; } ///< return the matrix to use to transform coordinates from fractional to orthogonal
+	matrix3x3<float> get_fractional_matrix() const { return m_fractional; } ///< return the matrix to use to transform coordinates from orthogonal to fractional
+
+  private:
+	void init();
+
+	float m_a, m_b, m_c, m_alpha, m_beta, m_gamma;
+	matrix3x3<float> m_orthogonal, m_fractional;
+};
+
+// --------------------------------------------------------------------
+
+/// \brief Return the spacegroup number from the *symmetry* category in datablock @a db
+int get_space_group_number(const datablock &db);
+
+/// \brief Return the spacegroup number for spacegroup named @a spacegroup
+int get_space_group_number(std::string_view spacegroup);
+
+/// \brief Return the spacegroup number for spacegroup named @a spacegroup assuming space_group_name @a type
+int get_space_group_number(std::string_view spacegroup, space_group_name type);
+
+/**
+ * @brief class to encapsulate the list of transformations making up a spacegroup
+ *
+ */
+class spacegroup : public std::vector<transformation>
+{
+  public:
+	/// \brief constructor using the information in the *symmetry* category in datablock @a db
+	spacegroup(const datablock &db)
+		: spacegroup(get_space_group_number(db))
+	{
+	}
+
+	/// \brief constructor using the spacegroup named @a name
+	spacegroup(std::string_view name)
+		: spacegroup(get_space_group_number(name))
+	{
+	}
+
+	/// \brief constructor using the spacegroup named @a name assuming space_group_name @a type
+	spacegroup(std::string_view name, space_group_name type)
+		: spacegroup(get_space_group_number(name, type))
+	{
+	}
+
+	/// \brief constructor using the spacegroup number @a nr
+	spacegroup(int nr);
+
+	int get_nr() const { return m_nr; } ///< Return the nr
+	std::string get_name() const;       ///< Return the name
+
+	/** \brief perform a spacegroup operation on point @a pt using
+	 * cell @a c and sym_op @a symop
+	 */
+
+	point operator()(const point &pt, const cell &c, sym_op symop) const;
+
+	/** \brief perform an inverse spacegroup operation on point @a pt using
+	 * cell @a c and sym_op @a symop
+	 */
+	point inverse(const point &pt, const cell &c, sym_op symop) const;
+
+  private:
+	int m_nr;
+	size_t m_index;
+};
+
+// --------------------------------------------------------------------
+/**
+ * @brief A crystal combines a cell and a spacegroup.
+ *
+ * The information in cell and spacegroup together make up all
+ * information you need to do symmetry calculations in a crystal
+ */
+
+class crystal
+{
+  public:
+	/// \brief constructor using the information found in datablock @a db
+	crystal(const datablock &db)
+		: m_cell(db)
+		, m_spacegroup(db)
+	{
+	}
+
+	/// \brief constructor using cell @a c and spacegroup @a sg
+	crystal(const cell &c, const spacegroup &sg)
+		: m_cell(c)
+		, m_spacegroup(sg)
+	{
+	}
+
+	/** @cond */
+	crystal(const crystal &) = default;
+	crystal(crystal &&) = default;
+	crystal &operator=(const crystal &) = default;
+	crystal &operator=(crystal &&) = default;
+	/** @endcond */
+
+	const cell &get_cell() const { return m_cell; }                   ///< Return the cell
+	const spacegroup &get_spacegroup() const { return m_spacegroup; } ///< Return the spacegroup
+
+	/// \brief Return the symmetry copy of point @a pt using symmetry operation @a symop
+	point symmetry_copy(const point &pt, sym_op symop) const
+	{
+		return m_spacegroup(pt, m_cell, symop);
+	}
+
+	/// \brief Return the symmetry copy of point @a pt using the inverse of symmetry operation @a symop
+	point inverse_symmetry_copy(const point &pt, sym_op symop) const
+	{
+		return m_spacegroup.inverse(pt, m_cell, symop);
+	}
+
+	/// \brief Return a tuple consisting of distance, new location and symmetry operation
+	/// for the point @a b with respect to point @a a.
+	std::tuple<float, point, sym_op> closest_symmetry_copy(point a, point b) const;
+
+  private:
+	cell m_cell;
+	spacegroup m_spacegroup;
+};
+
+// --------------------------------------------------------------------
+// Symmetry operations on points
+
+/// \brief convenience function returning the fractional point @a pt in orthogonal coordinates for cell @a c
+inline point orthogonal(const point &pt, const cell &c)
+{
+	return c.get_orthogonal_matrix() * pt;
+}
+
+/// \brief convenience function returning the orthogonal point @a pt in fractional coordinates for cell @a c
+inline point fractional(const point &pt, const cell &c)
+{
+	return c.get_fractional_matrix() * pt;
+}
+
+// --------------------------------------------------------------------

 } // namespace cif
--- a/include/cif++/text.hpp
+++ b/include/cif++/text.hpp
@@ -26,10 +26,11 @@

 #pragma once

-#include <cif++/exports.hpp>
+#include "cif++/exports.hpp"

 #include <charconv>
 #include <cmath>
+#include <cstdint>
 #include <limits>
 #include <set>
 #include <sstream>
@@ -43,6 +44,12 @@
 #include <zeep/type-traits.hpp>
 #endif

+/**
+ * \file text.hpp
+ * 
+ * Various text manipulating routines
+ */
+
 namespace cif
 {

@@ -51,18 +58,40 @@ namespace cif
 // some basic utilities: Since we're using ASCII input only, we define for optimisation
 // our own case conversion routines.

+/// \brief return whether string @a is equal to string @a b ignoring changes in character case
 bool iequals(std::string_view a, std::string_view b);
+
+/// \brief compare string @a is to string @a b ignoring changes in character case
 int icompare(std::string_view a, std::string_view b);

+/// \brief return whether string @a is equal to string @a b ignoring changes in character case
 bool iequals(const char *a, const char *b);
+
+/// \brief compare string @a is to string @a b ignoring changes in character case
 int icompare(const char *a, const char *b);

+/// \brief convert the string @a s to lower case in situ
 void to_lower(std::string &s);
+
+/// \brief return a lower case copy of string @a s
 std::string to_lower_copy(std::string_view s);

+/// \brief convert the string @a s to upper case in situ
 void to_upper(std::string &s);
-// std::string toUpperCopy(const std::string &s);

+/**
+ * @brief Join the strings in the range [ @a a, @a e ) using
+ * @a sep as separator
+ * 
+ * Example usage:
+ * 
+ * @code {.cpp}
+ * std::vector<std::string> v{ "aap", "noot", "mies" };
+ * 
+ * assert(cif::join(v.begin(), v.end(), ", ") == "aap, noot, mies");
+ * @endcode
+ * 
+ */
 template <typename IterType>
 std::string join(IterType b, IterType e, std::string_view sep)
 {
@@ -90,12 +119,41 @@ std::string join(IterType b, IterType e, std::string_view sep)
 	return s.str();
 }

+/**
+ * @brief Join the strings in the array @a arr using @a sep as separator
+ * 
+ * Example usage:
+ * 
+ * @code {.cpp}
+ * std::list<std::string> v{ "aap", "noot", "mies" };
+ * 
+ * assert(cif::join(v, ", ") == "aap, noot, mies");
+ * @endcode
+ * 
+ */
 template <typename V>
 std::string join(const V &arr, std::string_view sep)
 {
 	return join(arr.begin(), arr.end(), sep);
 }

+/**
+ * @brief Split the string in @a s based on the characters in @a separators
+ * 
+ * Each of the characters in @a separators induces a split.
+ * 
+ * When suppress_empty is true, empty strings are not produced in the
+ * resulting array.
+ * 
+ * Example:
+ * 
+ * @code {.cpp}
+ * auto v = cif::split("aap:noot,,mies", ":,", true);
+ * 
+ * assert(v == std::vector{"aap", "noot", "mies"});
+ * @endcode
+ * 
+ */
 template <typename StringType = std::string_view>
 std::vector<StringType> split(std::string_view s, std::string_view separators, bool suppress_empty = false)
 {
@@ -123,15 +181,23 @@ std::vector<StringType> split(std::string_view s, std::string_view separators, b
 	return result;
 }

+/**
+ * @brief Replace all occurrences of @a what in string @a s with the string @a with
+ * 
+ * The string @a with may be empty in which case each occurrence of @a what is simply
+ * deleted.
+ */
 void replace_all(std::string &s, std::string_view what, std::string_view with = {});

 #if defined(__cpp_lib_starts_ends_with)

+/// \brief return whether string @a s starts with @a with
 inline bool starts_with(std::string s, std::string_view with)
 {
 	return s.starts_with(with);
 }

+/// \brief return whether string @a s ends with @a with
 inline bool ends_with(std::string_view s, std::string_view with)
 {
 	return s.ends_with(with);
@@ -139,11 +205,13 @@ inline bool ends_with(std::string_view s, std::string_view with)

 #else

+/// \brief return whether string @a s starts with @a with
 inline bool starts_with(std::string s, std::string_view with)
 {
 	return s.compare(0, with.length(), with) == 0;
 }

+/// \brief return whether string @a s ends with @a with
 inline bool ends_with(std::string_view s, std::string_view with)
 {
 	return s.length() >= with.length() and s.compare(s.length() - with.length(), with.length(), with) == 0;
@@ -153,6 +221,7 @@ inline bool ends_with(std::string_view s, std::string_view with)

 #if defined(__cpp_lib_string_contains)

+/// \brief return whether string @a s contains @a q
 inline bool contains(std::string_view s, std::string_view q)
 {
 	return s.contains(q);
@@ -160,6 +229,7 @@ inline bool contains(std::string_view s, std::string_view q)

 #else

+/// \brief return whether string @a s contains @a q
 inline bool contains(std::string_view s, std::string_view q)
 {
 	return s.find(q) != std::string_view::npos;
@@ -167,33 +237,51 @@ inline bool contains(std::string_view s, std::string_view q)

 #endif

+/// \brief return whether string @a s contains @a q ignoring character case
 bool icontains(std::string_view s, std::string_view q);

+/// \brief trim white space at the start of string @a s in situ
 void trim_left(std::string &s);
+
+/// \brief trim white space at the end of string @a s in situ
 void trim_right(std::string &s);
+
+/// \brief trim white space at both the start and the end of string @a s in situ
 void trim(std::string &s);

+/// \brief return a string trimmed of white space at the start of string @a s
 std::string trim_left_copy(std::string_view s);
+
+/// \brief return a string trimmed of white space at the end of string @a s
 std::string trim_right_copy(std::string_view s);
+
+/// \brief return a string trimmed of white space at both the start and the end of string @a s
 std::string trim_copy(std::string_view s);

 // To make life easier, we also define iless and iset using iequals

+/// \brief an operator object you can use to compare strings ignoring their character case
 struct iless
 {
+	/// \brief return the result of icompare for @a a and @a b
 	bool operator()(const std::string &a, const std::string &b) const
 	{
 		return icompare(a, b) < 0;
 	}
 };

-typedef std::set<std::string, iless> iset;
+
+/// iset is a std::set of std::string but with a comparator that
+/// ignores character case.
+using iset = std::set<std::string, iless>;

 // --------------------------------------------------------------------
 // This really makes a difference, having our own tolower routines

+/// \brief global list containing the lower case version of each ASCII character
 extern CIFPP_EXPORT const uint8_t kCharToLowerMap[256];

+/// \brief a very fast tolower implementation
 inline char tolower(int ch)
 {
 	return static_cast<char>(kCharToLowerMap[static_cast<uint8_t>(ch)]);
@@ -201,22 +289,37 @@ inline char tolower(int ch)

 // --------------------------------------------------------------------

+/** \brief return a tuple consisting of the category and item name for @a tag
+ * 
+ * The category name is stripped of its leading underscore character.
+ * 
+ * If no dot character was found, the category name is empty. That's for
+ * cif 1.0 formatted data.
+*/
+
 std::tuple<std::string, std::string> split_tag_name(std::string_view tag);

 // --------------------------------------------------------------------
-// generate a cif name, mainly used to generate asym_id's

+/// \brief generate a cif name, used e.g. to generate asym_id's
 std::string cif_id_for_number(int number);

 // --------------------------------------------------------------------
-//	custom wordwrapping routine

+/** \brief custom word wrapping routine.
+ * 
+ * Wrap the text in @a text based on a maximum line width @a width using
+ * a dynamic programming approach to get the most efficient filling of
+ * the space.
+ */
 std::vector<std::string> word_wrap(const std::string &text, size_t width);

 // --------------------------------------------------------------------
-/// std::from_chars for floating point types.
+/// \brief std::from_chars for floating point types.
+///
 /// These are optional, there's a selected_charconv class below that selects
-/// the best option to used based on support by the stl library
+/// the best option to use based on support by the stl library.
+///
 /// I.e. that in case of GNU < 12 (or something) the cif implementation will
 /// be used, all other cases will use the stl version.

@@ -341,6 +444,7 @@ std::from_chars_result from_chars(const char *first, const char *last, FloatType
 	return result;
 }

+/// \brief duplication of std::chars_format for deficient STL implementations
 enum class chars_format
 {
 	scientific = 1,
@@ -349,6 +453,7 @@ enum class chars_format
 	general = fixed | scientific
 };

+/// \brief a simplistic implementation of std::to_chars for old STL implementations
 template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
 std::to_chars_result to_chars(char *first, char *last, FloatType &value, chars_format fmt)
 {
@@ -388,6 +493,7 @@ std::to_chars_result to_chars(char *first, char *last, FloatType &value, chars_f
 	return result;
 }

+/// \brief a simplistic implementation of std::to_chars for old STL implementations
 template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
 std::to_chars_result to_chars(char *first, char *last, FloatType &value, chars_format fmt, int precision)
 {
@@ -427,37 +533,50 @@ std::to_chars_result to_chars(char *first, char *last, FloatType &value, chars_f
 	return result;
 }

+/// \brief class that uses our implementation of std::from_chars and std::to_chars
 template <typename T>
 struct my_charconv
 {
+	/// @brief Simply call our version of std::from_chars
 	static std::from_chars_result from_chars(const char *a, const char *b, T &d)
 	{
 		return cif::from_chars(a, b, d);
 	}

+	/// @brief Simply call our version of std::to_chars
 	static std::to_chars_result to_chars(char *first, char *last, T &value, chars_format fmt)
 	{
 		return cif::to_chars(first, last, value, fmt);
 	}
 };

+/// \brief class that uses the STL implementation of std::from_chars and std::to_chars
 template <typename T>
 struct std_charconv
 {
+	/// @brief Simply call std::from_chars
 	static std::from_chars_result from_chars(const char *a, const char *b, T &d)
 	{
 		return std::from_chars(a, b, d);
 	}

+	/// @brief Simply call std::to_chars
 	static std::to_chars_result to_chars(char *first, char *last, T &value, chars_format fmt)
 	{
 		return std::to_chars(first, last, value, fmt);
 	}
 };

+/// \brief helper to find a from_chars function
 template <typename T>
 using from_chars_function = decltype(std::from_chars(std::declval<const char *>(), std::declval<const char *>(), std::declval<T &>()));

+/**
+ * @brief Helper to select the best implementation of charconv based on availability of the
+ * function in the std:: namespace
+ * 
+ * @tparam T The type for which we want to find a from_chars/to_chars function
+ */
 template <typename T>
 using selected_charconv = typename std::conditional_t<std::experimental::is_detected_v<from_chars_function, T>, std_charconv<T>, my_charconv<T>>;

--- a/include/cif++/utilities.hpp
+++ b/include/cif++/utilities.hpp
@@ -1,17 +1,17 @@
 /*-
 * SPDX-License-Identifier: BSD-2-Clause
- * 
+ *
 * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
- * 
+ *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
- * 
+ *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
- * 
+ *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -26,15 +26,22 @@

 #pragma once

-#include <cif++/exports.hpp>
+#include "cif++/exports.hpp"

 #include <filesystem>
+#include <iostream>

 #ifndef STDOUT_FILENO
+/// @brief For systems that lack this value
 #define STDOUT_FILENO 1
 #endif

-#if _MSC_VER
+#ifndef STDERR_FILENO
+/// @brief For systems that lack this value
+#define STDERR_FILENO 2
+#endif
+
+#if _WIN32
 #include <io.h>
 #define isatty _isatty
 #else
@@ -49,137 +56,327 @@
 #define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING 1
 #endif

+/** \file utilities.hpp
+ *
+ * This file contains code that is very generic in nature like a progress_bar
+ * and classes you can use to colourise output text.
+ */
+
 namespace cif
 {

+/**
+ * @brief The global variable VERBOSE contains the level of verbosity
+ * requested. A value of 0 is normal, with some output on error conditions.
+ * A value > 0 will result in more output, the higher the value, the more
+ * output. A value < 0 will make the library silent, even in error
+ * conditions.
+ */
 extern CIFPP_EXPORT int VERBOSE;

-// the git 'build' number
+/// return the git 'build' number
 std::string get_version_nr();
-// std::string get_version_date();
-
-// --------------------------------------------------------------------
-//	Code helping with terminal i/o

+/// return the width of the current output terminal, or 80 if it cannot be determined
 uint32_t get_terminal_width();

 // --------------------------------------------------------------------
-//	Path of the current executable

-std::string get_executable_path();
-
-// --------------------------------------------------------------------
-//	some manipulators to write coloured text to terminals
-
-enum StringColour
+namespace colour
 {
-	scBLACK = 0,
-	scRED,
-	scGREEN,
-	scYELLOW,
-	scBLUE,
-	scMAGENTA,
-	scCYAN,
-	scWHITE,
-	scNONE = 9
-};
-
-template <typename String, typename CharT>
-struct ColouredString
-{
-	static_assert(std::is_reference<String>::value or std::is_pointer<String>::value, "String type must be pointer or reference");
-
-	ColouredString(String s, StringColour fore, StringColour back, bool bold = true)
-		: m_s(s)
-		, m_fore(fore)
-		, m_back(back)
-		, m_bold(bold)
+	/// @brief The defined colours
+	enum colour_type
 	{
-	}
+		black = 0,
+		red,
+		green,
+		yellow,
+		blue,
+		magenta,
+		cyan,
+		white,
+		none = 9
+	};

-	ColouredString &operator=(const ColouredString &) = delete;
-
-	String m_s;
-	StringColour m_fore, m_back;
-	bool m_bold;
-};
-
-template <typename CharT, typename Traits>
-std::basic_ostream<CharT, Traits> &operator<<(std::basic_ostream<CharT, Traits> &os, const ColouredString<const CharT *, CharT> &s)
-{
-	if (isatty(STDOUT_FILENO))
+	/// @brief The defined styles
+	enum style_type
 	{
-		std::basic_ostringstream<CharT, Traits> ostr;
-		ostr << "\033[" << (30 + s.m_fore) << ';' << (s.m_bold ? "1" : "22") << ';' << (40 + s.m_back) << 'm'
-			 << s.m_s
-			 << "\033[0m";
+		bold = 1,
+		underlined = 4,
+		blink = 5,
+		inverse = 7,
+		regular = 22,
+	};

-		return os << ostr.str();
-	}
-	else
-		return os << s.m_s;
-}
-
-template <typename CharT, typename Traits, typename String>
-std::basic_ostream<CharT, Traits> &operator<<(std::basic_ostream<CharT, Traits> &os, const ColouredString<String, CharT> &s)
-{
-	if (isatty(STDOUT_FILENO))
+	namespace detail
 	{
-		std::basic_ostringstream<CharT, Traits> ostr;
-		ostr << "\033[" << (30 + s.m_fore) << ';' << (s.m_bold ? "1" : "22") << ';' << (40 + s.m_back) << 'm'
-			 << s.m_s
-			 << "\033[0m";
+		/**
+		 * @brief Struct for delimited strings.
+		 */
+		template <typename StringType>
+		struct coloured_string_t
+		{
+			static_assert(std::is_reference_v<StringType> or std::is_pointer_v<StringType>,
+				"String type must be pointer or reference");

-		return os << ostr.str();
-	}
-	else
-		return os << s.m_s;
+			/**
+			 * @brief Construct a new coloured string t object
+			 */
+			coloured_string_t(StringType s, colour_type fc, colour_type bc, style_type st)
+				: m_str(s)
+				, m_fore_colour(static_cast<int>(fc) + 30)
+				, m_back_colour(static_cast<int>(bc) + 40)
+				, m_style(static_cast<int>(st))
+			{
+			}
+
+			coloured_string_t &operator=(coloured_string_t &) = delete;
+
+			/**
+			 * @brief Write out the string, either coloured or not
+			 */
+			template <typename char_type, typename traits_type>
+			friend std::basic_ostream<char_type, traits_type> &operator<<(
+				std::basic_ostream<char_type, traits_type> &os, const coloured_string_t &cs)
+			{
+				bool use_colour = false;
+
+				if (os.rdbuf() == std::cout.rdbuf() and isatty(STDOUT_FILENO))
+					use_colour = true;
+				else if (os.rdbuf() == std::cerr.rdbuf() and isatty(STDERR_FILENO))
+					use_colour = true;
+
+				if (use_colour)
+				{
+					os << "\033[" << cs.m_fore_colour << ';' << cs.m_style << ';' << cs.m_back_colour << 'm'
+					   << cs.m_str
+					   << "\033[0m";
+				}
+
+				return os;
+			}
+
+			/// @cond
+			StringType m_str;
+			int m_fore_colour, m_back_colour;
+			int m_style;
+			/// @endcond
+		};
+
+	} // namespace detail
+} // namespace colour
+
+/**
+ * @brief Manipulator for coloured strings.
+ * 
+ * When writing out text to the terminal it is often useful to have
+ * some of the text colourised. But only if the output is really a
+ * terminal since colouring text is done using escape sequences
+ * an if output is redirected to a file, these escape sequences end up
+ * in the file making the real text less easy to read.
+ *
+ * The code presented here is rather basic. It mimics the std::quoted
+ * manipulator in that it will colour a string with optionally
+ * requested colours and text style.
+ *
+ * Example:
+ *
+ * @code {.cpp}
+ * using namespace cif::colour;
+ * std::cout << cif::coloured("Hello, world!", white, red, bold) << '\n';
+ * @endcode
+ * @param str String to quote.
+ * @param fg Foreground (=text) colour to use
+ * @param bg Background colour to use
+ * @param st Text style to use
+ */
+
+template <typename char_type>
+inline auto coloured(const char_type *str,
+	colour::colour_type fg, colour::colour_type bg = colour::colour_type::none,
+	colour::style_type st = colour::style_type::regular)
+{
+	return colour::detail::coloured_string_t<const char_type *>(str, fg, bg, st);
 }

-template <typename CharT>
-inline auto coloured(const CharT *s, StringColour fore = scWHITE, StringColour back = scRED, bool bold = true)
+/// @brief Manipulator for coloured strings.
+template <typename char_type, typename traits_type, typename allocator_type>
+inline auto coloured(const std::basic_string<char_type, traits_type, allocator_type> &str,
+	colour::colour_type fg, colour::colour_type bg = colour::colour_type::none,
+	colour::style_type st = colour::style_type::regular)
 {
-	return ColouredString<const CharT *, CharT>(s, fore, back, bold);
+	return colour::detail::coloured_string_t<const std::basic_string<char_type, traits_type, allocator_type> &>(str, fg, bg, st);
 }

-template <typename CharT, typename Traits, typename Alloc>
-inline auto coloured(const std::basic_string<CharT, Traits, Alloc> &s, StringColour fore = scWHITE, StringColour back = scRED, bool bold = true)
+/// @brief Manipulator for coloured strings.
+template <typename char_type, typename traits_type, typename allocator_type>
+inline auto coloured(std::basic_string<char_type, traits_type, allocator_type> &str,
+	colour::colour_type fg, colour::colour_type bg = colour::colour_type::none,
+	colour::style_type st = colour::style_type::regular)
 {
-	return ColouredString<const std::basic_string<CharT, Traits, Alloc>, CharT>(s, fore, back, bold);
+	return colour::detail::coloured_string_t<std::basic_string<char_type, traits_type, allocator_type> &>(str, fg, bg, st);
 }

-template <typename CharT, typename Traits, typename Alloc>
-inline auto coloured(std::basic_string<CharT, Traits, Alloc> &s, StringColour fore = scWHITE, StringColour back = scRED, bool bold = true)
+/// @brief Manipulator for coloured strings.
+template <typename char_type, typename traits_type>
+inline auto coloured(std::basic_string_view<char_type, traits_type> &str,
+	colour::colour_type fg, colour::colour_type bg = colour::colour_type::none,
+	colour::style_type st = colour::style_type::regular)
 {
-	return ColouredString<std::basic_string<CharT, Traits, Alloc>, CharT>(s, fore, back, bold);
+	return colour::detail::coloured_string_t<std::basic_string_view<char_type, traits_type> &>(str, fg, bg, st);
 }

 // --------------------------------------------------------------------
 //	A progress bar

-class Progress
+/**
+ * @brief A simple progress bar class for terminal based output
+ * 
+ * Using a progress bar is very convenient for the end user when
+ * you have long running code. It gives feed back on how fast an
+ * operation is performed and may give an indication how long it
+ * will take before it is finished.
+ * 
+ * Using this cif::progress_bar implementation is straightforward:
+ * 
+ * @code {.cpp}
+ * using namespace std::chrono_literals;
+ * 
+ * cif::progress_bar pb(10, "counting to ten");
+ * 
+ * for (int i = 1; i <= 10; ++i)
+ * {
+ *   pb.consumed(1);
+ *   std::this_thread::sleep_for(1s);
+ * }
+ * 
+ * @endcode
+ * 
+ * When the progress_bar is created, it first checks
+ * to see if stdout is to a real TTY and if the VERBOSE
+ * flag is not less than zero (quiet mode). If this passes
+ * a thread is started that waits for updates.
+ * 
+ * The first two seconds, nothing is written to the screen
+ * so if the work is finished within those two seconds
+ * the screen stays clean.
+ * 
+ * After this time, a progress bar is printed that may look
+ * like this:
+ * 
+ * @code
+ * step 3           ========================--------------------------------  40% ⢁
+ * @endcode
+ * 
+ * The first characters contain the initial action name or
+ * the message text if it was used afterwards.
+ * 
+ * The thermometer is made up with '=' and '-' characters.
+ * 
+ * A percentage is also shown and at the end there is a spinner
+ * that gives feedback that the program is really still working.
+ * 
+ * The progress bar is removed if the max has been reached
+ * or if the progress bar is destructed. If any output has
+ * been generated, the initial action is printed out along
+ * with the total time spent.
+ */
+
+class progress_bar
 {
  public:
-	Progress(int64_t inMax, const std::string &inAction);
-	virtual ~Progress();
+	/**
+	 * @brief Construct a new progress bar object
+	 * 
+	 * Progress ranges from 0 (zero) to @a inMax
+	 * 
+	 * The action in @a inAction is used for display
+	 * 
+	 * @param inMax The maximum value
+	 * @param inAction The description of what is
+	 * going on
+	 */

+	progress_bar(int64_t inMax, const std::string &inAction);
+
+	/**
+	 * @brief Destroy the progress bar object
+	 * 
+	 */
+	~progress_bar();
+
+	/**
+	 * @brief Notify the progress bar that @a inConsumed
+	 * should be added to the internal progress counter
+	 */
 	void consumed(int64_t inConsumed); // consumed is relative
+
+	/**
+	 * @brief Notify the progress bar that the internal
+	 * progress counter should be updated to @a inProgress
+	 */
 	void progress(int64_t inProgress); // progress is absolute

+	/**
+	 * @brief Replace the action string in the progress bar
+	 * with @a inMessage
+	 */
 	void message(const std::string &inMessage);

  private:
-	Progress(const Progress &) = delete;
-	Progress &operator=(const Progress &) = delete;
+	progress_bar(const progress_bar &) = delete;
+	progress_bar &operator=(const progress_bar &) = delete;

-	struct ProgressImpl *m_impl;
+	struct progress_bar_impl *m_impl;
 };

 // --------------------------------------------------------------------
 // Resources

+/**
+ * @brief Load a resource from disk or the compiled in resources
+ * 
+ * @verbatim embed:rst
+.. note::
+
+   See the :doc:`documentation on resources </resources>` for more information.
+
+   @endverbatim
+ * 
+ * @param name The named resource to load
+ * @return std::unique_ptr<std::istream> A pointer to the std::istream or empty if not found
+ */
+
 std::unique_ptr<std::istream> load_resource(std::filesystem::path name);
+
+/**
+ * @brief Add a file specified by @a dataFile as the data for resource @a name
+ * 
+ * @verbatim embed:rst
+.. note::
+
+   See the :doc:`documentation on resources </resources>` for more information.
+
+   @endverbatim
+ * 
+ * @param name The name of the resource to specify
+ * @param dataFile Path to a file containing the data
+ */
+
 void add_file_resource(const std::string &name, std::filesystem::path dataFile);
+
+/**
+ * @brief Add a directory to the list of search directories. This list is
+ * searched in a last-in-first-out order.
+ * 
+ * @verbatim embed:rst
+.. note::
+
+   See the :doc:`documentation on resources </resources>` for more information.
+
+   @endverbatim
+ */
+
 void add_data_directory(std::filesystem::path dataDir);

 } // namespace cif
--- a/include/cif++/validate.hpp
+++ b/include/cif++/validate.hpp
@@ -26,13 +26,23 @@

 #pragma once

-#include <cif++/text.hpp>
+#include "cif++/text.hpp"

 #include <filesystem>
 #include <list>
 #include <mutex>
 #include <utility>

+/**
+ * @file validate.hpp
+ *
+ * Support for validating mmCIF files based on a dictionary. These dictionaries
+ * contain information about the categories and items therein, what they may
+ * contain and how this should be formatted. There's also information on links
+ * between parent and child categories.
+ *
+ */
+
 namespace cif
 {

@@ -40,39 +50,67 @@ struct category_validator;

 // --------------------------------------------------------------------

+/**
+ * @brief The exception thrown when a validation error occurs
+ *
+ */
 class validation_error : public std::exception
 {
  public:
+	/// @brief Constructor
 	validation_error(const std::string &msg);
+
+	/// @brief Constructor
 	validation_error(const std::string &cat, const std::string &item,
 		const std::string &msg);
+
+	/// @brief The description of the error
 	const char *what() const noexcept { return m_msg.c_str(); }
+
+	/// @cond
 	std::string m_msg;
+	/// @endcond
 };

 // --------------------------------------------------------------------

+/** @brief the primitive types known */
 enum class DDL_PrimitiveType
 {
-	Char,
-	UChar,
-	Numb
+	Char,  ///< Text
+	UChar, ///< Text that is compared ignoring the character case
+	Numb   ///< Nummeric values
 };

+/// @brief Return the DDL_PrimitiveType encoded in @a s
 DDL_PrimitiveType map_to_primitive_type(std::string_view s);

 struct regex_impl;

+/**
+ * @brief For each defined type in a dictionary a type_validator is created
+ *
+ * A type validator can check if the contents of an item are conforming the
+ * specification. The check is done using regular expressions.
+ *
+ * A type_validator can also be used to compare two values that conform to
+ * this type. Comparison is of course based on the primitive type.
+ *
+ */
 struct type_validator
 {
-	std::string m_name;
-	DDL_PrimitiveType m_primitive_type;
-	regex_impl *m_rx;
+	std::string m_name;                 ///< The name of the type
+	DDL_PrimitiveType m_primitive_type; ///< The primitive_type of the type
+	regex_impl *m_rx;                   ///< The regular expression for the type

 	type_validator() = delete;
+
+	/// @brief Constructor
 	type_validator(std::string_view name, DDL_PrimitiveType type, std::string_view rx);

 	type_validator(const type_validator &) = delete;
+
+	/// @brief Copy constructor
 	type_validator(type_validator &&rhs)
 		: m_name(std::move(rhs.m_name))
 		, m_primitive_type(rhs.m_primitive_type)
@@ -81,6 +119,8 @@ struct type_validator
 	}

 	type_validator &operator=(const type_validator &) = delete;
+
+	/// @brief Move constructor
 	type_validator &operator=(type_validator &&rhs)
 	{
 		m_name = std::move(rhs.m_name);
@@ -90,119 +130,167 @@ struct type_validator
 		return *this;
 	}

+	/// @brief Destructor
 	~type_validator();

+	/// @brief Return the sorting order
 	bool operator<(const type_validator &rhs) const
 	{
 		return icompare(m_name, rhs.m_name) < 0;
 	}

+	/// @brief Compare the contents of @a a and @a b based on the
+	/// primitive type of this type. A value of zero indicates the
+	/// values are equal. Less than zero means @a a sorts before @a b
+	/// and a value larger than zero likewise means the opposite
 	int compare(std::string_view a, std::string_view b) const;
 };

+/**
+ * @brief An item_validator binds a type_validator to an item in
+ * a category along with other information found in the dictionary.
+ *
+ * mmCIF dictionaries may indicate an item is e.g. mandatory or
+ * consists of a certain list of allowed values. Even default
+ * values can be provided.
+ *
+ */
 struct item_validator
 {
-	std::string m_tag;
-	bool m_mandatory;
-	const type_validator *m_type;
-	cif::iset m_enums;
-	std::string m_default;
-	bool m_default_is_null;
-	category_validator *m_category = nullptr;
-
-	// ItemLinked is used for non-key links
-	struct item_link
-	{
-		item_validator *m_parent;
-		std::string m_parent_item;
-		std::string m_child_item;
-	};
-
-	std::vector<item_link> mLinked;
+	std::string m_tag;                        ///< The item name
+	bool m_mandatory;                         ///< Flag indicating this item is mandatory
+	const type_validator *m_type;             ///< The type for this item
+	cif::iset m_enums;                        ///< If filled, the set of allowed values
+	std::string m_default;                    ///< If filled, a default value for this item
+	category_validator *m_category = nullptr; ///< The category_validator this item_validator belongs to

+	/// @brief Compare based on the name
 	bool operator<(const item_validator &rhs) const
 	{
 		return icompare(m_tag, rhs.m_tag) < 0;
 	}

+	/// @brief Compare based on the name
 	bool operator==(const item_validator &rhs) const
 	{
 		return iequals(m_tag, rhs.m_tag);
 	}

+	/// @brief Validate the value in @a value for this item
+	/// Will throw a validation_error exception if it fails
 	void operator()(std::string_view value) const;
 };

+/**
+ * @brief A validator for categories
+ *
+ * Categories can have a key, a set of items that in combination
+ * should be unique.
+ */
 struct category_validator
 {
-	std::string m_name;
-	std::vector<std::string> m_keys;
-	cif::iset m_groups;
-	cif::iset m_mandatory_fields;
-	std::set<item_validator> m_item_validators;
+	std::string m_name;                         ///< The name of the category
+	std::vector<std::string> m_keys;            ///< The list of items that make up the key
+	cif::iset m_groups;							///< The category groups this category belongs to
+	cif::iset m_mandatory_fields;               ///< The mandatory fields for this category
+	std::set<item_validator> m_item_validators; ///< The item validators for the items in this category

+	/// @brief return true if this category sorts before @a rhs
 	bool operator<(const category_validator &rhs) const
 	{
 		return icompare(m_name, rhs.m_name) < 0;
 	}

+	/// @brief Add item_validator @a v to the list of item validators
 	void addItemValidator(item_validator &&v);

+	/// @brief Return the item_validator for item @a tag, may return nullptr
 	const item_validator *get_validator_for_item(std::string_view tag) const;
-
-	const std::set<item_validator> &item_validators() const
-	{
-		return m_item_validators;
-	}
 };

+/**
+ * @brief A validator for links between categories
+ *
+ * Links are defined as a set of pairs of item names in a
+ * parent category and a corresponding item in a child
+ * category. This means that the size of m_parent_keys
+ * is always equal to the size of m_child_keys.
+ *
+ * Multiple links may be defined between two categories.
+ *
+ */
 struct link_validator
 {
-	int m_link_group_id;
-	std::string m_parent_category;
-	std::vector<std::string> m_parent_keys;
-	std::string m_child_category;
-	std::vector<std::string> m_child_keys;
-	std::string m_link_group_label;
+	int m_link_group_id;                    ///< The link group ID
+	std::string m_parent_category;          ///< The name of the parent category
+	std::vector<std::string> m_parent_keys; ///< The items in the parent category making up the set of linked items
+	std::string m_child_category;           ///< The name of the child category
+	std::vector<std::string> m_child_keys;  ///< The items in the child category making up the set of linked items
+	std::string m_link_group_label;         ///< The group label assigned to this link
 };

 // --------------------------------------------------------------------

+/**
+ * @brief The validator class combines all the link, category and item validator classes
+ *
+ */
 class validator
 {
  public:
+	/**
+	 * @brief Construct a new validator object
+	 *
+	 * @param name The name of the underlying dictionary
+	 */
 	validator(std::string_view name)
 		: m_name(name)
 	{
 	}

+	/// @brief destructor
 	~validator() = default;

 	validator(const validator &rhs) = delete;
 	validator &operator=(const validator &rhs) = delete;

+	/// @brief move constructor
 	validator(validator &&rhs) = default;
+
+	/// @brief move assignment operator
 	validator &operator=(validator &&rhs) = default;

 	friend class dictionary_parser;

+	/// @brief Add type_validator @a v to the list of type validators
 	void add_type_validator(type_validator &&v);
+
+	/// @brief Return the type validator for @a type_code, may return nullptr
 	const type_validator *get_validator_for_type(std::string_view type_code) const;

+	/// @brief Add category_validator @a v to the list of category validators
 	void add_category_validator(category_validator &&v);
+
+	/// @brief Return the category validator for @a category, may return nullptr
 	const category_validator *get_validator_for_category(std::string_view category) const;

+	/// @brief Add link_validator @a v to the list of link validators
 	void add_link_validator(link_validator &&v);
+
+	/// @brief Return the list of link validators for which the parent is @a category
 	std::vector<const link_validator *> get_links_for_parent(std::string_view category) const;
+
+	/// @brief Return the list of link validators for which the child is @a category
 	std::vector<const link_validator *> get_links_for_child(std::string_view category) const;

+	/// @brief Bottleneck function to report an error in validation
 	void report_error(const std::string &msg, bool fatal) const;

-	const std::string &name() const { return m_name; }
-	void set_name(const std::string &name) { m_name = name; }
+	const std::string &name() const { return m_name; }        ///< Get the name of this validator
+	void set_name(const std::string &name) { m_name = name; } ///< Set the name of this validator

-	const std::string &version() const { return m_version; }
-	void version(const std::string &version) { m_version = version; }
+	const std::string &version() const { return m_version; }              ///< Get the version of this validator
+	void set_version(const std::string &version) { m_version = version; } ///< Set the version of this validator

  private:
 	// name is fully qualified here:
@@ -217,20 +305,29 @@ class validator
 };

 // --------------------------------------------------------------------
+
+/**
+ * @brief Validators are globally unique objects, use the validator_factory
+ * class to construct them. This class is a singleton.
+ */
+
 class validator_factory
 {
  public:
+	/// @brief Return the singleton instance
 	static validator_factory &instance()
 	{
 		static validator_factory s_instance;
 		return s_instance;
 	}

+	/// @brief Return the validator with name @a dictionary_name
 	const validator &operator[](std::string_view dictionary_name);

-  private:
-	void construct_validator(std::string_view name, std::istream &is);
+	/// @brief Construct a new validator with name @a name from the data in @a is
+	const validator &construct_validator(std::string_view name, std::istream &is);

+  private:
 	// --------------------------------------------------------------------

 	validator_factory() = default;
--- a/libcifpp.pc.in
+++ b/libcifpp.pc.in
@@ -1,13 +0,0 @@
-prefix=@prefix@
-exec_prefix=@exec_prefix@
-libdir=@libdir@
-includedir=@includedir@
-datalibdir=@datarootdir@/libcifpp
-
-Name: libcifpp
-Description: C++ library for the manipulation of mmCIF files.
-Version: @PACKAGE_VERSION@
-
-Requires.private: zlib
-Libs: -L${libdir} -lcifpp
-Cflags: -I${includedir} -pthread
--- a/src/atom_type.cpp
+++ b/src/atom_type.cpp
@@ -26,8 +26,7 @@

 #include <cmath>

-#include <cif++.hpp>
-#include <cif++/atom_type.hpp>
+#include "cif++.hpp"

 namespace cif
 {
@@ -35,8 +34,6 @@ namespace cif
 namespace data
 {

-const float kNA = std::nanf("1");
-
 const atom_type_info kKnownAtoms[] =
 {
 	{ Nn,	"Unknown",			"Nn",	0,			false, {	kNA,	kNA,	kNA,	kNA,	kNA,	kNA,	kNA } },  //	0	Nn	 Unknown        
@@ -1078,6 +1075,26 @@ bool atom_type_traits::is_metal(const std::string& symbol)
 	return result;
 }

+bool atom_type_traits::has_sf(int charge) const
+{
+	auto type = m_info->type;
+	if (type == D)
+		type = H;
+
+	bool result = false;
+
+	for (auto& sf: data::kWKSFData)
+	{
+		if (sf.symbol == type and sf.charge == charge)
+		{
+			result = true;
+			break;
+		}
+	}
+
+	return result;
+}
+
 auto atom_type_traits::wksf(int charge) const -> const SFData&
 {
 	auto type = m_info->type;
@@ -1095,7 +1112,7 @@ auto atom_type_traits::wksf(int charge) const -> const SFData&
 		// Oops, not found. Fall back to zero charge and see if we can use that

 		if (cif::VERBOSE > 0)
-			std::cerr << "No scattering factor found for " << name() << " with charge " << charge << " will try to fall back to zero charge..." << std::endl;
+			std::cerr << "No scattering factor found for " << name() << " with charge " << charge << " will try to fall back to zero charge...\n";

 		for (auto& sf: data::kWKSFData)
 		{
@@ -1126,7 +1143,7 @@ auto atom_type_traits::elsf() const -> const SFData&

 float atom_type_traits::crystal_ionic_radius(int charge) const
 {
-	float result = data::kNA;
+	float result = kNA;

 	if (charge >= -3 and charge <= 8)
 	{
@@ -1145,7 +1162,7 @@ float atom_type_traits::crystal_ionic_radius(int charge) const

 float atom_type_traits::effective_ionic_radius(int charge) const
 {
-	float result = data::kNA;
+	float result = kNA;

 	if (charge >= -3 and charge <= 8)
 	{
--- a/src/category.cpp
+++ b/src/category.cpp
@@ -24,10 +24,10 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <cif++/category.hpp>
-#include <cif++/datablock.hpp>
-#include <cif++/parser.hpp>
-#include <cif++/utilities.hpp>
+#include "cif++/category.hpp"
+#include "cif++/datablock.hpp"
+#include "cif++/parser.hpp"
+#include "cif++/utilities.hpp"

 #include <numeric>
 #include <stack>
@@ -51,7 +51,7 @@ class row_comparator
 	{
 		auto cv = cat.get_cat_validator();

-		for (auto k : cv->m_keys)
+		for (auto &k : cv->m_keys)
 		{
 			uint16_t ix = cat.add_column(k);

@@ -78,13 +78,8 @@ class row_comparator
 		row_handle rhb(m_category, *b);

 		int d = 0;
-		for (auto &c : m_comparator)
+		for (const auto &[k, f] : m_comparator)
 		{
-			uint16_t k;
-			compareFunc f;
-
-			std::tie(k, f) = c;
-
 			std::string_view ka = rha[k].text();
 			std::string_view kb = rhb[k].text();

@@ -103,29 +98,30 @@ class row_comparator

 		row_handle rhb(m_category, *b);

-		int d = 0, i = 0;
-		for (auto &c : m_comparator)
+		int d = 0;
+		auto ai = a.begin();
+
+		for (const auto &[k, f] : m_comparator)
 		{
-			uint16_t k;
-			compareFunc f;
+			assert(ai != a.end());

-			std::tie(k, f) = c;
-
-			std::string_view ka = a[i++].value();
+			std::string_view ka = ai->value();
 			std::string_view kb = rhb[k].text();

 			d = f(ka, kb);

 			if (d != 0)
 				break;
+			
+			++ai;
 		}

 		return d;
 	}

  private:
-	typedef std::function<int(std::string_view, std::string_view)> compareFunc;
-	typedef std::tuple<uint16_t, compareFunc> key_comparator;
+	using compareFunc = std::function<int(std::string_view, std::string_view)>;
+	using key_comparator = std::tuple<uint16_t, compareFunc>;

 	std::vector<key_comparator> m_comparator;
 	category &m_category;
@@ -139,13 +135,7 @@ class row_comparator
 class category_index
 {
  public:
-	category_index(category *cat)
-		: m_category(*cat)
-		, m_row_comparator(m_category)
-		, m_root(nullptr)
-	{
-		reconstruct();
-	}
+	category_index(category *cat);

 	~category_index()
 	{
@@ -158,9 +148,6 @@ class category_index
 	void insert(row *r);
 	void erase(row *r);

-	// batch create
-	void reconstruct();
-
 	// reorder the row's and returns new head and tail
 	std::tuple<row *, row *> reorder()
 	{
@@ -241,7 +228,7 @@ class category_index
 			h->m_right->m_red = not h->m_right->m_red;
 	}

-	bool is_red(entry *h) const
+	constexpr bool is_red(entry *h) const
 	{
 		return h != nullptr and h->m_red;
 	}
@@ -342,6 +329,15 @@ class category_index
 	entry *m_root;
 };

+category_index::category_index(category *cat)
+	: m_category(*cat)
+	, m_row_comparator(m_category)
+	, m_root(nullptr)
+{
+	for (auto r : m_category)
+		insert(r.get_row());
+}
+
 row *category_index::find(row *k) const
 {
 	const entry *r = m_root;
@@ -482,83 +478,6 @@ category_index::entry *category_index::erase(entry *h, row *k)
 	return fix_up(h);
 }

-void category_index::reconstruct()
-{
-	delete m_root;
-	m_root = nullptr;
-
-	for (auto r : m_category)
-		insert(r.get_row());
-
-	// maybe reconstruction can be done quicker by using the following commented code.
-	// however, I've not had the time to think of a way to set the red/black flag correctly in that case.
-
-	//	std::vector<row*> rows;
-	//	transform(mCat.begin(), mCat.end(), backInserter(rows),
-	//		[](Row r) -> row* { assert(r.mData); return r.mData; });
-	//
-	//	assert(std::find(rows.begin(), rows.end(), nullptr) == rows.end());
-	//
-	//	// don't use sort here, it will run out of the stack of something.
-	//	// quicksort is notorious for using excessive recursion.
-	//	// Besides, most of the time, the data is ordered already anyway.
-	//
-	//	stable_sort(rows.begin(), rows.end(), [this](row* a, row* b) -> bool { return this->mComp(a, b) < 0; });
-	//
-	//	for (size_t i = 0; i < rows.size() - 1; ++i)
-	//		assert(mComp(rows[i], rows[i + 1]) < 0);
-	//
-	//	deque<entry*> e;
-	//	transform(rows.begin(), rows.end(), back_inserter(e),
-	//		[](row* r) -> entry* { return new entry(r); });
-	//
-	//	while (e.size() > 1)
-	//	{
-	//		deque<entry*> ne;
-	//
-	//		while (not e.empty())
-	//		{
-	//			entry* a = e.front();
-	//			e.pop_front();
-	//
-	//			if (e.empty())
-	//				ne.push_back(a);
-	//			else
-	//			{
-	//				entry* b = e.front();
-	//				b->mLeft = a;
-	//
-	//				assert(mComp(a->mRow, b->mRow) < 0);
-	//
-	//				e.pop_front();
-	//
-	//				if (not e.empty())
-	//				{
-	//					entry* c = e.front();
-	//					e.pop_front();
-	//
-	//					assert(mComp(b->mRow, c->mRow) < 0);
-	//
-	//					b->mRight = c;
-	//				}
-	//
-	//				ne.push_back(b);
-	//
-	//				if (not e.empty())
-	//				{
-	//					ne.push_back(e.front());
-	//					e.pop_front();
-	//				}
-	//			}
-	//		}
-	//
-	//		swap (e, ne);
-	//	}
-	//
-	//	assert(e.size() == 1);
-	//	mRoot = e.front();
-}
-
 size_t category_index::size() const
 {
 	std::stack<entry *> s;
@@ -600,7 +519,7 @@ category::category(const category &rhs)
 	for (auto r = rhs.m_head; r != nullptr; r = r->m_next)
 		insert_impl(end(), clone_row(*r));

-	if (m_cat_validator != nullptr)
+	if (m_cat_validator != nullptr and m_index == nullptr)
 		m_index = new category_index(this);
 }

@@ -644,7 +563,7 @@ category &category::operator=(const category &rhs)
 		m_validator = rhs.m_validator;
 		m_cat_validator = rhs.m_cat_validator;

-		if (m_cat_validator != nullptr)
+		if (m_cat_validator != nullptr and m_index == nullptr)
 			m_index = new category_index(this);
 	}

@@ -655,9 +574,6 @@ category &category::operator=(category &&rhs)
 {
 	if (this != &rhs)
 	{
-		if (not empty())
-			clear();
-
 		m_name = std::move(rhs.m_name);
 		m_columns = std::move(rhs.m_columns);
 		m_cascade = rhs.m_cascade;
@@ -665,12 +581,10 @@ category &category::operator=(category &&rhs)
 		m_cat_validator = rhs.m_cat_validator;
 		m_parent_links = rhs.m_parent_links;
 		m_child_links = rhs.m_child_links;
-		m_index = rhs.m_index;
-		m_head = rhs.m_head;
-		m_tail = rhs.m_tail;

-		rhs.m_head = rhs.m_tail = nullptr;
-		rhs.m_index = nullptr;
+		std::swap(m_index, rhs.m_index);
+		std::swap(m_head, rhs.m_head);
+		std::swap(m_tail, rhs.m_tail);
 	}

 	return *this;
@@ -756,9 +670,13 @@ void category::set_validator(const validator *v, datablock &db)

 			if (missing.empty())
 				m_index = new category_index(this);
-			else if (VERBOSE > 0)
-				std::cerr << "Cannot construct index since the key field" << (missing.size() > 1 ? "s" : "") << " "
-							<< cif::join(missing, ", ") + " in " + m_name + " " + (missing.size() == 1 ? "is" : "are") << " missing" << std::endl;
+			else
+			{
+				std::ostringstream msg;
+				msg << "Cannot construct index since the key field" << (missing.size() > 1 ? "s" : "") << " "
+							<< cif::join(missing, ", ") << " in " << m_name << " " << (missing.size() == 1 ? "is" : "are") << " missing\n";
+				throw std::runtime_error(msg.str());
+			}
 		}
 	}
 	else
@@ -805,7 +723,7 @@ bool category::is_valid() const
 	if (empty())
 	{
 		if (VERBOSE > 2)
-			std::cerr << "Skipping validation of empty category " << m_name << std::endl;
+			std::cerr << "Skipping validation of empty category " << m_name << '\n';
 		return true;
 	}

@@ -957,17 +875,17 @@ bool category::validate_links() const
 		{
 			result = false;

-			std::cerr << "Links for " << link.v->m_link_group_label << " are incomplete" << std::endl
-					<< "  There are " << missing << " items in " << m_name << " that don't have matching parent items in " << parent->m_name << std::endl;
+			std::cerr << "Links for " << link.v->m_link_group_label << " are incomplete\n"
+					<< "  There are " << missing << " items in " << m_name << " that don't have matching parent items in " << parent->m_name << '\n';
 			
 			if (VERBOSE)
 			{
-				std::cerr << "showing first " << first_missing_rows.size() <<  " rows" << std::endl
-						<< std::endl;
+				std::cerr << "showing first " << first_missing_rows.size() <<  " rows\n"
+						<< '\n';

 				first_missing_rows.write(std::cerr, link.v->m_child_keys, false);

-				std::cerr << std::endl;
+				std::cerr << '\n';
 			}
 		}
 	}
@@ -1003,25 +921,30 @@ condition category::get_parents_condition(row_handle rh, const category &parentC

 	condition result;

-	for (auto &link : m_validator->get_links_for_child(m_name))
+	auto links = m_validator->get_links_for_child(m_name);
+	links.erase(remove_if(links.begin(), links.end(), [n=parentCat.m_name](auto &l) { return l->m_parent_category != n; }), links.end());
+
+	if (not links.empty())
 	{
-		if (link->m_parent_category != parentCat.m_name)
-			continue;
-
-		condition cond;
-
-		for (size_t ix = 0; ix < link->m_child_keys.size(); ++ix)
+		for (auto &link : links)
 		{
-			auto childValue = rh[link->m_child_keys[ix]];
+			condition cond;

-			if (childValue.empty())
-				continue;
+			for (size_t ix = 0; ix < link->m_child_keys.size(); ++ix)
+			{
+				auto childValue = rh[link->m_child_keys[ix]];

-			cond = std::move(cond) and key(link->m_parent_keys[ix]) == childValue.text();
+				if (childValue.empty())
+					continue;
+
+				cond = std::move(cond) and key(link->m_parent_keys[ix]) == childValue.text();
+			}
+
+			result = std::move(result) or std::move(cond);
 		}
-
-		result = std::move(result) or std::move(cond);
 	}
+	else if (cif::VERBOSE > 0)
+		std::cerr << "warning: no child to parent links were found for child " << parentCat.name() << " and parent " << name() << '\n';

 	return result;
 }
@@ -1038,30 +961,35 @@ condition category::get_children_condition(row_handle rh, const category &childC
 	if (childCatValidator != nullptr)
 		mandatoryChildFields = childCatValidator->m_mandatory_fields;

-	for (auto &link : m_validator->get_links_for_parent(m_name))
+	auto links = m_validator->get_links_for_parent(m_name);
+	links.erase(remove_if(links.begin(), links.end(), [n=childCat.m_name](auto &l) { return l->m_child_category != n; }), links.end());
+
+	if (not links.empty())
 	{
-		if (link->m_child_category != childCat.m_name)
-			continue;
-
-		condition cond;
-
-		for (size_t ix = 0; ix < link->m_parent_keys.size(); ++ix)
+		for (auto &link : links)
 		{
-			auto childKey = link->m_child_keys[ix];
-			auto parentKey = link->m_parent_keys[ix];
+			condition cond;

-			auto parentValue = rh[parentKey];
+			for (size_t ix = 0; ix < link->m_parent_keys.size(); ++ix)
+			{
+				auto childKey = link->m_child_keys[ix];
+				auto parentKey = link->m_parent_keys[ix];

-			if (parentValue.empty())
-				cond = std::move(cond) and key(childKey) == null;
-			else if (link->m_parent_keys.size() > 1 and not mandatoryChildFields.contains(childKey))
-				cond = std::move(cond) and (key(childKey) == parentValue.text() or key(childKey) == null);
-			else
-				cond = std::move(cond) and key(childKey) == parentValue.text();
+				auto parentValue = rh[parentKey];
+
+				if (parentValue.empty())
+					cond = std::move(cond) and key(childKey) == null;
+				else if (link->m_parent_keys.size() > 1 and not mandatoryChildFields.contains(childKey))
+					cond = std::move(cond) and (key(childKey) == parentValue.text() or key(childKey) == null);
+				else
+					cond = std::move(cond) and key(childKey) == parentValue.text();
+			}
+
+			result = std::move(result) or std::move(cond);
 		}
-
-		result = std::move(result) or std::move(cond);
 	}
+	else if (cif::VERBOSE > 0)
+		std::cerr << "warning: no parent to child links were found for parent " << name() << " and child " << childCat.name() << '\n';

 	return result;
 }
@@ -1296,9 +1224,9 @@ void category::erase_orphans(condition &&cond, category &parent)
 		{
 			category c(m_name);
 			c.emplace(r);
-			std::cerr << "Removing orphaned record: " << std::endl
-						<< c << std::endl
-						<< std::endl;
+			std::cerr << "Removing orphaned record: \n"
+						<< c << '\n'
+						<< '\n';

 		}
 		
@@ -1313,23 +1241,37 @@ std::string category::get_unique_id(std::function<std::string(int)> generator)
 {
 	using namespace cif::literals;

-	std::string id_tag = "id";
-	if (m_cat_validator != nullptr and m_cat_validator->m_keys.size() == 1)
-		id_tag = m_cat_validator->m_keys.front();
-
 	// calling size() often is a waste of resources
 	if (m_last_unique_num == 0)
 		m_last_unique_num = static_cast<uint32_t>(size());

-	for (;;)
+	std::string result = generator(static_cast<int>(m_last_unique_num++));
+
+	std::string id_tag = "id";
+	if (m_cat_validator != nullptr and m_cat_validator->m_keys.size() == 1)
 	{
-		std::string result = generator(static_cast<int>(m_last_unique_num++));
-
-		if (exists(key(id_tag) == result))
-			continue;
-
-		return result;
+		if (m_index == nullptr and m_cat_validator != nullptr)
+			m_index = new category_index(this);
+		
+		for (;;)
+		{
+			if (m_index->find_by_value({{ id_tag, result }}) == nullptr)
+				break;
+			result = generator(static_cast<int>(m_last_unique_num++));
+		}
 	}
+	else
+	{
+		for (;;)
+		{
+			if (not exists(key(id_tag) == result))
+				break;
+			
+			result = generator(static_cast<int>(m_last_unique_num++));
+		}
+	}
+
+	return result;
 }

 void category::update_value(const std::vector<row_handle> &rows, std::string_view tag, std::string_view value)
@@ -1451,7 +1393,7 @@ void category::update_value(const std::vector<row_handle> &rows, std::string_vie

 				// cannot update this...
 				if (cif::VERBOSE > 0)
-					std::cerr << "Cannot update child " << childCat->m_name << "." << childTag << " with value " << value << std::endl;
+					std::cerr << "Cannot update child " << childCat->m_name << "." << childTag << " with value " << value << '\n';
 			}

 			// finally, update the children
@@ -1548,8 +1490,8 @@ void category::update_value(row *row, uint16_t column, std::string_view value, b

 			// if (cif::VERBOSE > 2)
 			// {
-			// 	std::cerr << "Parent: " << linked->mParentcategory << " Child: " << linked->m_child_category << std::endl
-			// 			  << cond << std::endl;
+			// 	std::cerr << "Parent: " << linked->mParentcategory << " Child: " << linked->m_child_category << '\n'
+			// 			  << cond << '\n';
 			// }

 			// Now, suppose there are already rows in child that conform to the new value,
@@ -1578,7 +1520,7 @@ void category::update_value(row *row, uint16_t column, std::string_view value, b
 			if (not rows_n.empty())
 			{
 				if (cif::VERBOSE > 0)
-					std::cerr << "Will not rename in child category since there are already rows that link to the parent" << std::endl;
+					std::cerr << "Will not rename in child category since there are already rows that link to the parent\n";

 				continue;
 			}
@@ -1780,8 +1722,7 @@ void category::reorder_by_index()

 namespace detail
 {
-
-	size_t write_value(std::ostream &os, std::string_view value, size_t offset, size_t width)
+	size_t write_value(std::ostream &os, std::string_view value, size_t offset, size_t width, bool right_aligned)
 	{
 		if (value.find('\n') != std::string::npos or width == 0 or value.length() > 132) // write as text field
 		{
@@ -1805,17 +1746,33 @@ namespace detail
 		}
 		else if (sac_parser::is_unquoted_string(value))
 		{
+			if (right_aligned)
+			{
+				if (value.length() < width)
+				{
+					os << std::string(width - value.length() - 1, ' ');
+					offset += width;
+				}
+				else
+					offset += value.length() + 1;
+			}
+
 			os << value;

-			if (value.length() < width)
-			{
-				os << std::string(width - value.length(), ' ');
-				offset += width;
-			}
+			if (right_aligned)
+				os << ' ';
 			else
 			{
-				os << ' ';
-				offset += value.length() + 1;
+				if (value.length() < width)
+				{
+					os << std::string(width - value.length(), ' ');
+					offset += width;
+				}
+				else
+				{
+					os << ' ';
+					offset += value.length() + 1;
+				}
 			}
 		}
 		else
@@ -1909,9 +1866,22 @@ void category::write(std::ostream &os, const std::vector<uint16_t> &order, bool
 	// If the first Row has a next, we need a loop_
 	bool needLoop = (m_head->m_next != nullptr);

+	std::vector<bool> right_aligned(m_columns.size(), false);
+
+	if (m_cat_validator != nullptr)
+	{
+		for (auto cix : order)
+		{
+			auto &col = m_columns[cix];
+			right_aligned[cix] = col.m_validator != nullptr and
+				col.m_validator->m_type != nullptr and
+				col.m_validator->m_type->m_primitive_type == cif::DDL_PrimitiveType::Numb;
+		}
+	}
+
 	if (needLoop)
 	{
-		os << "loop_" << '\n';
+		os << "loop_\n";

 		std::vector<size_t> columnWidths(m_columns.size());

@@ -1977,7 +1947,7 @@ void category::write(std::ostream &os, const std::vector<uint16_t> &order, bool
 					offset = 0;
 				}

-				offset = detail::write_value(os, s, offset, w);
+				offset = detail::write_value(os, s, offset, w, right_aligned[cix]);

 				if (offset > 132)
 				{
@@ -2005,6 +1975,30 @@ void category::write(std::ostream &os, const std::vector<uint16_t> &order, bool

 		l += 3;

+		size_t width = 1;
+
+		for (auto cix : order)
+		{
+			if (not right_aligned[cix])
+				continue;
+
+			std::string_view s;
+			auto iv = m_head->get(cix);
+			if (iv != nullptr)
+				s = iv->text();
+
+			if (s.empty())
+				s = "?";
+
+			size_t l2 = s.length();
+
+			if (not sac_parser::is_unquoted_string(s))
+				l2 += 2;
+
+			if (width < l2)
+				width = l2;
+		}
+
 		for (uint16_t cix : order)
 		{
 			auto &col = m_columns[cix];
@@ -2029,12 +2023,12 @@ void category::write(std::ostream &os, const std::vector<uint16_t> &order, bool
 				offset = 0;
 			}

-			if (detail::write_value(os, s, offset, 1) != 0)
+			if (detail::write_value(os, s, offset, width, s.empty() or right_aligned[cix]) != 0)
 				os << '\n';
 		}
 	}

-	os << "# " << '\n';
+	os << "# \n";
 }

 bool category::operator==(const category &rhs) const
@@ -2047,7 +2041,7 @@ bool category::operator==(const category &rhs) const
 //	set<std::string> tagsA(a.fields()), tagsB(b.fields());
 //	
 //	if (tagsA != tagsB)
-//		std::cout << "Unequal number of fields" << std::endl;
+//		std::cout << "Unequal number of fields\n";

 	const category_validator *catValidator = nullptr;

--- a/src/compound.cpp
+++ b/src/compound.cpp
@@ -24,7 +24,7 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <cif++/compound.hpp>
+#include "cif++.hpp"

 #include <filesystem>
 #include <fstream>
@@ -56,7 +56,7 @@ std::string to_string(bond_type bondType)
 	throw std::invalid_argument("Invalid bondType");
 }

-bond_type from_string(const std::string &bondType)
+bond_type parse_bond_type_from_string(const std::string &bondType)
 {
 	if (cif::iequals(bondType, "sing"))
 		return bond_type::sing;
@@ -77,6 +77,28 @@ bond_type from_string(const std::string &bondType)
 	throw std::invalid_argument("Invalid bondType: " + bondType);
 }

+std::string to_string(stereo_config_type stereoConfig)
+{
+	switch (stereoConfig)
+	{
+		case stereo_config_type::N: return "N";
+		case stereo_config_type::R: return "R";
+		case stereo_config_type::S: return "S";
+	}
+	throw std::invalid_argument("Invalid stereoConfig");
+}
+
+stereo_config_type parse_stereo_config_from_string(const std::string &stereoConfig)
+{
+	if (cif::iequals(stereoConfig, "N"))
+		return stereo_config_type::N;
+	if (cif::iequals(stereoConfig, "R"))
+		return stereo_config_type::R;
+	if (cif::iequals(stereoConfig, "S"))
+		return stereo_config_type::S;
+	throw std::invalid_argument("Invalid stereoConfig: " + stereoConfig);
+}
+
 // --------------------------------------------------------------------
 // compound helper classes

@@ -126,11 +148,12 @@ compound::compound(cif::datablock &db)
 	for (auto row : chemCompAtom)
 	{
 		compound_atom atom;
-		std::string type_symbol;
-		cif::tie(atom.id, type_symbol, atom.charge, atom.aromatic, atom.leaving_atom, atom.stereo_config, atom.x, atom.y, atom.z) =
+		std::string type_symbol, stereo_config;
+		cif::tie(atom.id, type_symbol, atom.charge, atom.aromatic, atom.leaving_atom, stereo_config, atom.x, atom.y, atom.z) =
 			row.get("atom_id", "type_symbol", "charge", "pdbx_aromatic_flag", "pdbx_leaving_atom_flag", "pdbx_stereo_config",
 				"model_Cartn_x", "model_Cartn_y", "model_Cartn_z");
 		atom.type_symbol = atom_type_traits(type_symbol).type();
+		atom.stereo_config = parse_stereo_config_from_string(stereo_config);
 		m_atoms.push_back(std::move(atom));
 	}

@@ -140,7 +163,7 @@ compound::compound(cif::datablock &db)
 		compound_bond bond;
 		std::string valueOrder;
 		cif::tie(bond.atom_id[0], bond.atom_id[1], valueOrder, bond.aromatic, bond.stereo_config) = row.get("atom_id_1", "atom_id_2", "value_order", "pdbx_aromatic_flag", "pdbx_stereo_config");
-		bond.type = from_string(valueOrder);
+		bond.type = parse_bond_type_from_string(valueOrder);
 		m_bonds.push_back(std::move(bond));
 	}
 }
@@ -186,7 +209,7 @@ compound::compound(cif::datablock &db, const std::string &id, const std::string
 		else
 		{
 			if (cif::VERBOSE > 0)
-				std::cerr << "Unimplemented chem_comp_bond.type " << btype << " in " << id << std::endl;
+				std::cerr << "Unimplemented chem_comp_bond.type " << btype << " in " << id << '\n';
 			bond.type = bond_type::sing;
 		}
 		m_bonds.push_back(std::move(bond));
@@ -313,7 +336,7 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto
 		{
 			for (auto cmp : impl->m_compounds)
 			{
-				if (cmp->id() == id)
+				if (iequals(cmp->id(), id))
 				{
 					result = cmp;
 					break;
@@ -443,15 +466,15 @@ compound_factory_impl::compound_factory_impl(const fs::path &file, std::shared_p

 			if (not cifFile.is_valid())
 			{
-				std::cerr << "The components file " << file << " is not valid" << std::endl;
+				std::cerr << "The components file " << file << " is not valid\n";
 				if (cif::VERBOSE < 1)
-					std::cerr << "(use --verbose to see why)" << std::endl;
+					std::cerr << "(use --verbose to see why)\n";
 			}
 		}
 		catch (const std::exception &e)
 		{
-			std::cerr << "When trying to load the components file " << file << " there was an exception:" << std::endl
-					  << e.what() << std::endl;
+			std::cerr << "When trying to load the components file " << file << " there was an exception:\n"
+					  << e.what() << '\n';
 		}

 		for (auto &db : cifFile)
@@ -493,7 +516,7 @@ compound *CCD_compound_factory_impl::create(const std::string &id)
 		ccd = cif::load_resource("components.cif");
 		if (not ccd)
 		{
-			std::cerr << "Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data." << std::endl;
+			std::cerr << "Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-libcifpp-data to fetch the data.\n";
 			return nullptr;
 		}
 	}
@@ -553,7 +576,7 @@ compound *CCD_compound_factory_impl::create(const std::string &id)
 	}

 	if (result == nullptr and cif::VERBOSE > 0)
-		std::cerr << "Could not locate compound " << id << " in the CCD components file" << std::endl;
+		std::cerr << "Could not locate compound " << id << " in the CCD components file\n";

 	return result;
 }
@@ -582,12 +605,12 @@ CCP4_compound_factory_impl::CCP4_compound_factory_impl(const fs::path &clibd_mon

 	auto &chemComps = m_file["comp_list"]["chem_comp"];

-	for (const auto &[group, threeLetterCode] : chemComps.rows<std::string, std::string>("group", "three_letter_code"))
+	for (const auto &[group, comp_id] : chemComps.rows<std::string, std::string>("group", "id"))
 	{
 		if (std::regex_match(group, peptideRx))
-			m_known_peptides.insert(threeLetterCode);
+			m_known_peptides.insert(comp_id);
 		else if (cif::iequals(group, "DNA") or cif::iequals(group, "RNA"))
-			m_known_bases.insert(threeLetterCode);
+			m_known_bases.insert(comp_id);
 	}
 }

@@ -597,7 +620,7 @@ compound *CCP4_compound_factory_impl::create(const std::string &id)

 	auto &cat = m_file["comp_list"]["chem_comp"];

-	auto rs = cat.find(cif::key("three_letter_code") == id);
+	auto rs = cat.find(cif::key("id") == id);

 	if (rs.size() == 1)
 	{
@@ -678,13 +701,13 @@ compound_factory::compound_factory()
 	if (ccd)
 		m_impl = std::make_shared<CCD_compound_factory_impl>(m_impl);
 	else if (cif::VERBOSE > 0)
-		std::cerr << "CCD components.cif file was not found" << std::endl;
+		std::cerr << "CCD components.cif file was not found\n";

 	const char *clibd_mon = getenv("CLIBD_MON");
 	if (clibd_mon != nullptr and fs::is_directory(clibd_mon))
 		m_impl = std::make_shared<CCP4_compound_factory_impl>(clibd_mon, m_impl);
 	else if (cif::VERBOSE > 0)
-		std::cerr << "CCP4 monomers library not found, CLIBD_MON is not defined" << std::endl;
+		std::cerr << "CCP4 monomers library not found, CLIBD_MON is not defined\n";
 }

 compound_factory::~compound_factory()
--- a/src/condition.cpp
+++ b/src/condition.cpp
@@ -24,8 +24,8 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <cif++/category.hpp>
-#include <cif++/condition.hpp>
+#include "cif++/category.hpp"
+#include "cif++/condition.hpp"

 namespace cif
 {
--- a/src/datablock.cpp
+++ b/src/datablock.cpp
@@ -24,7 +24,7 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <cif++/datablock.hpp>
+#include "cif++/datablock.hpp"

 namespace cif
 {
@@ -198,8 +198,8 @@ std::vector<std::string> datablock::get_tag_order() const

 void datablock::write(std::ostream &os) const
 {
-	os << "data_" << m_name << std::endl
-	   << "# " << std::endl;
+	os << "data_" << m_name << '\n'
+	   << "# \n";

 	// mmcif support, sort of. First write the 'entry' Category
 	// and if it exists, _AND_ we have a Validator, write out the
@@ -237,8 +237,8 @@ void datablock::write(std::ostream &os) const

 void datablock::write(std::ostream &os, const std::vector<std::string> &tag_order)
 {
-	os << "data_" << m_name << std::endl
-	   << "# " << std::endl;
+	os << "data_" << m_name << '\n'
+	   << "# \n";

 	std::vector<std::string> cat_order;
 	for (auto &o : tag_order)
--- a/src/dictionary_parser.cpp
+++ b/src/dictionary_parser.cpp
@@ -24,10 +24,10 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <cif++/condition.hpp>
-#include <cif++/dictionary_parser.hpp>
-#include <cif++/file.hpp>
-#include <cif++/parser.hpp>
+#include "cif++/condition.hpp"
+#include "cif++/dictionary_parser.hpp"
+#include "cif++/file.hpp"
+#include "cif++/parser.hpp"

 namespace cif
 {
@@ -103,7 +103,7 @@ class dictionary_parser : public parser
 		{
 			auto r = info->front();
 			m_validator.set_name(r["title"].as<std::string>());
-			m_validator.version(r["version"].as<std::string>());
+			m_validator.set_version(r["version"].as<std::string>());
 		}

 		m_datablock = savedDatablock;
@@ -117,7 +117,7 @@ class dictionary_parser : public parser
 		if (not m_collected_item_types)
 			m_collected_item_types = collect_item_types();

-		std::string saveFrameName = m_token_value;
+		std::string saveFrameName { m_token_value };

 		if (saveFrameName.empty())
 			error("Invalid save frame, should contain more than just 'save_' here");
@@ -127,7 +127,7 @@ class dictionary_parser : public parser
 		datablock dict(m_token_value);
 		datablock::iterator cat = dict.end();

-		match(CIFToken::SAVE);
+		match(CIFToken::SAVE_NAME);
 		while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag)
 		{
 			if (m_lookahead == CIFToken::LOOP)
@@ -183,7 +183,7 @@ class dictionary_parser : public parser
 			}
 		}

-		match(CIFToken::SAVE);
+		match(CIFToken::SAVE_);

 		if (isCategorySaveFrame)
 		{
@@ -213,16 +213,16 @@ class dictionary_parser : public parser
 				ess.insert(e["value"].as<std::string>());

 			std::string defaultValue = dict["item_default"].front().get<std::string>("value");
-			bool defaultIsNull = false;
-			if (defaultValue.empty())
-			{
-				// TODO: Is this correct???
-				for (auto r : dict["_item_default"])
-				{
-					defaultIsNull = r["value"].is_null();
-					break;
-				}
-			}
+			// bool defaultIsNull = false;
+			// if (defaultValue.empty())
+			// {
+			// 	// TODO: Is this correct???
+			// 	for (auto r : dict["_item_default"])
+			// 	{
+			// 		defaultIsNull = r["value"].is_null();
+			// 		break;
+			// 	}
+			// }

 			// collect the dict from our dataBlock and construct validators
 			for (auto i : dict["item"])
@@ -245,7 +245,7 @@ class dictionary_parser : public parser

 				auto vi = find(ivs.begin(), ivs.end(), item_validator{ item_name });
 				if (vi == ivs.end())
-					ivs.push_back(item_validator{ item_name, iequals(mandatory, "yes"), tv, ess, defaultValue, defaultIsNull });
+					ivs.push_back(item_validator{ item_name, iequals(mandatory, "yes"), tv, ess, defaultValue /*, defaultIsNull*/ });
 				else
 				{
 					// need to update the itemValidator?
@@ -253,12 +253,12 @@ class dictionary_parser : public parser
 					{
 						if (VERBOSE > 2)
 						{
-							std::cerr << "inconsistent mandatory value for " << tagName << " in dictionary" << std::endl;
+							std::cerr << "inconsistent mandatory value for " << tagName << " in dictionary\n";

 							if (iequals(tagName, saveFrameName))
-								std::cerr << "choosing " << mandatory << std::endl;
+								std::cerr << "choosing " << mandatory << '\n';
 							else
-								std::cerr << "choosing " << (vi->m_mandatory ? "Y" : "N") << std::endl;
+								std::cerr << "choosing " << (vi->m_mandatory ? "Y" : "N") << '\n';
 						}

 						if (iequals(tagName, saveFrameName))
@@ -268,7 +268,7 @@ class dictionary_parser : public parser
 					if (vi->m_type != nullptr and tv != nullptr and vi->m_type != tv)
 					{
 						if (VERBOSE > 1)
-							std::cerr << "inconsistent type for " << tagName << " in dictionary" << std::endl;
+							std::cerr << "inconsistent type for " << tagName << " in dictionary\n";
 					}

 					//				vi->mMandatory = (iequals(mandatory, "yes"));
@@ -410,7 +410,7 @@ class dictionary_parser : public parser
 			for (auto &iv : cv.m_item_validators)
 			{
 				if (iv.m_type == nullptr and cif::VERBOSE >= 0)
-					std::cerr << "Missing item_type for " << iv.m_tag << std::endl;
+					std::cerr << "Missing item_type for " << iv.m_tag << '\n';
 			}
 		}
 	}
@@ -452,7 +452,7 @@ class dictionary_parser : public parser
 			//			mFileImpl.mTypeValidators.erase(v);

 			if (VERBOSE >= 5)
-				std::cerr << "Added type " << code << " (" << primitiveCode << ") => " << construct << std::endl;
+				std::cerr << "Added type " << code << " (" << primitiveCode << ") => " << construct << '\n';

 			result = true;
 		}
@@ -481,4 +481,11 @@ validator parse_dictionary(std::string_view name, std::istream &is)
 	return result;
 }

-} // namespace cif
+void extend_dictionary(validator &v, std::istream &is)
+{
+	file f;
+	dictionary_parser p(v, is, f);
+	p.load_dictionary();
+}
+
+} // namespace cif
--- a/src/file.cpp
+++ b/src/file.cpp
@@ -24,8 +24,8 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <cif++/file.hpp>
-#include <cif++/gzio.hpp>
+#include "cif++/file.hpp"
+#include "cif++/gzio.hpp"

 namespace cif
 {
@@ -58,7 +58,7 @@ bool file::is_valid()
 	if (m_validator == nullptr)
 	{
 		if (VERBOSE > 0)
-			std::cerr << "No dictionary loaded explicitly, loading default" << std::endl;
+			std::cerr << "No dictionary loaded explicitly, loading default\n";

 		load_dictionary();
 	}
@@ -108,7 +108,7 @@ void file::load_dictionary()
 				catch (const std::exception &ex)
 				{
 					if (VERBOSE)
-						std::cerr << "Failed to load dictionary " << std::quoted(name) << ": " << ex.what() << std::endl;
+						std::cerr << "Failed to load dictionary " << std::quoted(name) << ": " << ex.what() << '\n';
 				}
 			}
 		}
@@ -182,17 +182,17 @@ std::tuple<file::iterator, bool> file::emplace(std::string_view name)

 void file::load(const std::filesystem::path &p)
 {
+	gzio::ifstream in(p);
+	if (not in.is_open())
+		throw std::runtime_error("Could not open file '" + p.string() + '\'');
+
 	try
 	{
-		gzio::ifstream in(p);
-		if (not in.is_open())
-			throw std::runtime_error("Could not open file " + p.string());
-
 		load(in);
 	}
 	catch (const std::exception &)
 	{
-		throw_with_nested(std::runtime_error("Error reading file " + p.string()));
+		throw_with_nested(std::runtime_error("Error reading file '" + p.string() + '\''));
 	}
 }

@@ -219,7 +219,7 @@ void file::save(const std::filesystem::path &p) const
 void file::save(std::ostream &os) const
 {
 	// if (not is_valid())
-	// 	std::cout << "File is not valid!" << std::endl;
+	// 	std::cout << "File is not valid!\n";

 	for (auto &db : *this)
 		db.write(os);
--- a/src/item.cpp
+++ b/src/item.cpp
@@ -24,7 +24,7 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <cif++/row.hpp>
+#include "cif++/row.hpp"

 #include <cassert>

--- a/src/model.cpp
+++ b/src/model.cpp
@@ -24,7 +24,7 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <cif++.hpp>
+#include "cif++.hpp"

 #include <filesystem>
 #include <fstream>
@@ -75,7 +75,7 @@ int atom::atom_impl::get_property_int(std::string_view name) const

 		std::from_chars_result r = std::from_chars(s.data(), s.data() + s.length(), result);
 		if (r.ec != std::errc() and VERBOSE > 0)
-			std::cerr << "Error converting " << s << " to number for property " << name << std::endl;
+			std::cerr << "Error converting " << s << " to number for property " << name << '\n';
 	}
 	return result;
 }
@@ -89,7 +89,7 @@ float atom::atom_impl::get_property_float(std::string_view name) const

 		std::from_chars_result r = cif::from_chars(s.data(), s.data() + s.length(), result);
 		if (r.ec != std::errc() and VERBOSE > 0)
-			std::cerr << "Error converting " << s << " to number for property " << name << std::endl;
+			std::cerr << "Error converting " << s << " to number for property " << name << '\n';
 	}
 	return result;
 }
@@ -219,7 +219,7 @@ int atom::atom_impl::get_charge() const
 // 	if (result == nullptr)
 // 	{
 // 		if (VERBOSE > 0)
-// 			std::cerr << "Compound not found: '" << get_property<std::string>("label_comp_id") << '\'' << std::endl;
+// 			std::cerr << "Compound not found: '" << get_property<std::string>("label_comp_id") << '\'' << '\n';

 // 		throw std::runtime_error("no compound");
 // 	}
@@ -290,12 +290,17 @@ int atom::atom_impl::get_charge() const

 std::ostream &operator<<(std::ostream &os, const atom &atom)
 {
-	os << atom.get_label_comp_id() << ' ' << atom.get_label_asym_id() << ':' << atom.get_label_seq_id() << ' ' << atom.get_label_atom_id();
+	if (atom.is_water())
+		os << atom.get_label_comp_id() << ' ' << atom.get_label_asym_id() << ':' << atom.get_auth_seq_id() << ' ' << atom.get_label_atom_id();
+	else
+	{
+		os << atom.get_label_comp_id() << ' ' << atom.get_label_asym_id() << ':' << atom.get_label_seq_id() << ' ' << atom.get_label_atom_id();

-	if (atom.is_alternate())
-		os << '(' << atom.get_label_alt_id() << ')';
-	if (atom.get_auth_asym_id() != atom.get_label_asym_id() or atom.get_auth_seq_id() != std::to_string(atom.get_label_seq_id()) or atom.get_pdb_ins_code().empty() == false)
-		os << " [" << atom.get_auth_asym_id() << ':' << atom.get_auth_seq_id() << atom.get_pdb_ins_code() << ']';
+		if (atom.is_alternate())
+			os << '(' << atom.get_label_alt_id() << ')';
+		if (atom.get_auth_asym_id() != atom.get_label_asym_id() or atom.get_auth_seq_id() != std::to_string(atom.get_label_seq_id()) or atom.get_pdb_ins_code().empty() == false)
+			os << " [" << atom.get_auth_asym_id() << ':' << atom.get_auth_seq_id() << atom.get_pdb_ins_code() << ']';
+	}

 	return os;
 }
@@ -322,37 +327,6 @@ residue::residue(structure &structure, const std::vector<atom> &atoms)
 		m_atoms.push_back(atom);
 }

-// residue::residue(residue &&rhs)
-// 	: m_structure(rhs.m_structure)
-// 	, m_compound_id(std::move(rhs.m_compound_id))
-// 	, m_asym_id(std::move(rhs.m_asym_id))
-// 	, m_seq_id(rhs.m_seq_id)
-// 	, m_auth_seq_id(rhs.m_auth_seq_id)
-// 	, m_atoms(std::move(rhs.m_atoms))
-// {
-// 	// std::cerr << "move constructor residue" << std::endl;
-// 	rhs.m_structure = nullptr;
-// }
-
-// residue &residue::operator=(residue &&rhs)
-// {
-// 	// std::cerr << "move assignment residue" << std::endl;
-// 	m_structure = rhs.m_structure;
-// 	rhs.m_structure = nullptr;
-// 	m_compound_id = std::move(rhs.m_compound_id);
-// 	m_asym_id = std::move(rhs.m_asym_id);
-// 	m_seq_id = rhs.m_seq_id;
-// 	m_auth_seq_id = rhs.m_auth_seq_id;
-// 	m_atoms = std::move(rhs.m_atoms);
-
-// 	return *this;
-// }
-
-// residue::~residue()
-// {
-// 	// std::cerr << "~residue" << std::endl;
-// }
-
 std::string residue::get_entity_id() const
 {
 	std::string result;
@@ -376,68 +350,13 @@ EntityType residue::entity_type() const
 	return m_structure->get_entity_type_for_entity_id(get_entity_id());
 }

-// std::string residue::authInsCode() const
-// {
-// 	assert(m_structure);
-
-// 	std::string result;
-// 	if (not m_atoms.empty())
-// 		result = m_atoms.front().get_property("pdbx_PDB_ins_code");
-
-// 	return result;
-// }
-
-// std::string residue::get_auth_asym_id() const
-// {
-// 	assert(m_structure);
-
-// 	std::string result;
-// 	if (not m_atoms.empty())
-// 		result = m_atoms.front().get_property("auth_asym_id");
-
-// 	return result;
-// }
-
-// std::string residue::authSeqID() const
-// {
-// 	return m_auth_seq_id;
-// }
-
-// const Compound &residue::compound() const
-// {
-// 	auto result = compound_factory::instance().create(m_compound_id);
-// 	if (result == nullptr)
-// 		throw std::runtime_error("Failed to create compound " + m_compound_id);
-// 	return *result;
-// }
-
-// std::string residue::unique_alt_id() const
-// {
-// 	if (m_structure == nullptr)
-// 		throw std::runtime_error("Invalid residue object");
-
-// 	auto firstAlt = std::find_if(m_atoms.begin(), m_atoms.end(), [](auto &a)
-// 		{ return not a.get_label_alt_id().empty(); });
-
-// 	return firstAlt != m_atoms.end() ? firstAlt->get_label_alt_id() : "";
-// }
-
 void residue::add_atom(atom &atom)
 {
-	// atom.set_property("label_comp_id", m_compound_id);
-	// atom.set_property("label_asym_id", m_asym_id);
-	// if (m_seq_id != 0)
-	// 	atom.set_property("label_seq_id", std::to_string(m_seq_id));
-	// atom.set_property("auth_seq_id", m_auth_seq_id);
-
 	m_atoms.push_back(atom);
 }

 std::vector<atom> residue::unique_atoms() const
 {
-	// if (m_structure == nullptr)
-	// 	throw std::runtime_error("Invalid residue object");
-
 	std::vector<atom> result;
 	std::string firstAlt;

@@ -455,7 +374,7 @@ std::vector<atom> residue::unique_atoms() const
 		else if (alt != firstAlt)
 		{
 			if (VERBOSE > 0)
-				std::cerr << "skipping alternate atom " << atom << std::endl;
+				std::cerr << "skipping alternate atom " << atom << '\n';
 			continue;
 		}

@@ -493,7 +412,7 @@ atom residue::get_atom_by_atom_id(const std::string &atom_id) const
 	}

 	if (not result and VERBOSE > 1)
-		std::cerr << "atom with atom_id " << atom_id << " not found in residue " << m_asym_id << ':' << m_seq_id << std::endl;
+		std::cerr << "atom with atom_id " << atom_id << " not found in residue " << m_asym_id << ':' << m_seq_id << '\n';

 	return result;
 }
@@ -676,7 +595,7 @@ float monomer::alpha() const
 	catch (const std::exception &ex)
 	{
 		if (VERBOSE > 0)
-			std::cerr << ex.what() << std::endl;
+			std::cerr << ex.what() << '\n';
 	}

 	return result;
@@ -705,7 +624,7 @@ float monomer::kappa() const
 	{
 		if (VERBOSE > 0)
 			std::cerr << "When trying to calculate kappa for " << m_asym_id << ':' << m_seq_id << ": "
-					  << ex.what() << std::endl;
+					  << ex.what() << '\n';
 	}

 	return result;
@@ -728,7 +647,7 @@ float monomer::tco() const
 	{
 		if (VERBOSE > 0)
 			std::cerr << "When trying to calculate tco for " << get_asym_id() << ':' << get_seq_id() << ": "
-					  << ex.what() << std::endl;
+					  << ex.what() << '\n';
 	}

 	return result;
@@ -747,7 +666,7 @@ float monomer::omega() const
 	{
 		if (VERBOSE > 0)
 			std::cerr << "When trying to calculate omega for " << get_asym_id() << ':' << get_seq_id() << ": "
-					  << ex.what() << std::endl;
+					  << ex.what() << '\n';
 	}

 	return result;
@@ -807,17 +726,23 @@ float monomer::chi(size_t nr) const
 					atoms.back() = "CG2";
 			}

-			result = static_cast<float>(dihedral_angle(
-				get_atom_by_atom_id(atoms[nr + 0]).get_location(),
-				get_atom_by_atom_id(atoms[nr + 1]).get_location(),
-				get_atom_by_atom_id(atoms[nr + 2]).get_location(),
-				get_atom_by_atom_id(atoms[nr + 3]).get_location()));
+			auto atom_0 = get_atom_by_atom_id(atoms[nr + 0]);
+			auto atom_1 = get_atom_by_atom_id(atoms[nr + 1]);
+			auto atom_2 = get_atom_by_atom_id(atoms[nr + 2]);
+			auto atom_3 = get_atom_by_atom_id(atoms[nr + 3]);
+
+			if (atom_0 and atom_1 and atom_2 and atom_3)
+				result = static_cast<float>(dihedral_angle(
+					atom_0.get_location(),
+					atom_1.get_location(),
+					atom_2.get_location(),
+					atom_3.get_location()));
 		}
 	}
 	catch (const std::exception &e)
 	{
 		if (VERBOSE > 0)
-			std::cerr << e.what() << std::endl;
+			std::cerr << e.what() << '\n';
 		result = 0;
 	}

@@ -992,7 +917,7 @@ polymer::polymer(structure &s, const std::string &entityID, const std::string &a
 		else if (VERBOSE > 0)
 		{
 			monomer m{*this, index, seqID, authSeqID, pdbInsCode, compoundID};
-			std::cerr << "Dropping alternate residue " << m << std::endl;
+			std::cerr << "Dropping alternate residue " << m << '\n';
 		}
 	}
 }
@@ -1348,7 +1273,7 @@ structure::structure(datablock &db, size_t modelNr, StructureOpenOptions options
 		if (model_nr and *model_nr != m_model_nr)
 		{
 			if (VERBOSE > 0)
-				std::cerr << "No atoms loaded for model 1, trying model " << *model_nr << std::endl;
+				std::cerr << "No atoms loaded for model 1, trying model " << *model_nr << '\n';
 			m_model_nr = *model_nr;
 			load_atoms_for_model(options);
 		}
@@ -1357,7 +1282,7 @@ structure::structure(datablock &db, size_t modelNr, StructureOpenOptions options
 	if (m_atoms.empty())
 	{
 		if (VERBOSE >= 0)
-			std::cerr << "Warning: no atoms loaded" << std::endl;
+			std::cerr << "Warning: no atoms loaded\n";
 	}
 	else
 		load_data();
@@ -1365,23 +1290,16 @@ structure::structure(datablock &db, size_t modelNr, StructureOpenOptions options

 void structure::load_atoms_for_model(StructureOpenOptions options)
 {
+	using namespace literals;
+
 	auto &atomCat = m_db["atom_site"];

-	for (const auto &a : atomCat)
-	{
-		std::string id, type_symbol;
-		std::optional<size_t> model_nr;
-
-		cif::tie(id, type_symbol, model_nr) = a.get("id", "type_symbol", "pdbx_PDB_model_num");
-
-		if (model_nr and *model_nr != m_model_nr)
-			continue;
-
-		if ((options bitand StructureOpenOptions::SkipHydrogen) and (type_symbol == "H" or type_symbol == "D"))
-			continue;
+	condition c = "pdbx_PDB_model_num"_key == null or "pdbx_PDB_model_num"_key == m_model_nr;
+	if (options bitand StructureOpenOptions::SkipHydrogen)
+		c = std::move(c) and ("type_symbol"_key != "H" and "type_symbol"_key != "D");

+	for (auto id : atomCat.find<std::string>(std::move(c), "id"))
 		emplace_atom(std::make_shared<atom::atom_impl>(m_db, id));
-	}
 }

 // structure::structure(const structure &s)
@@ -1455,7 +1373,7 @@ void structure::load_data()
 		if (ri == resMap.end())
 		{
 			if (VERBOSE > 0)
-				std::cerr << "Missing residue for atom " << atom << std::endl;
+				std::cerr << "Missing residue for atom " << atom << '\n';

 			// see if it might match a non poly
 			for (auto &res : m_non_polymers)
@@ -1490,7 +1408,7 @@ EntityType structure::get_entity_type_for_entity_id(const std::string entityID)
 	EntityType result;

 	if (iequals(entity_type, "polymer"))
-		result = EntityType::polymer;
+		result = EntityType::Polymer;
 	else if (iequals(entity_type, "non-polymer"))
 		result = EntityType::NonPolymer;
 	else if (iequals(entity_type, "macrolide"))
@@ -1539,6 +1457,36 @@ EntityType structure::get_entity_type_for_asym_id(const std::string asym_id) con
 // 	return result;
 // }

+bool structure::has_atom_id(const std::string &id) const
+{
+	assert(m_atoms.size() == m_atom_index.size());
+
+	bool result = false;
+
+	int L = 0, R = static_cast<int>(m_atoms.size() - 1);
+	while (L <= R)
+	{
+		int i = (L + R) / 2;
+
+		const atom &atom = m_atoms[m_atom_index[i]];
+
+		int d = atom.id().compare(id);
+
+		if (d == 0)
+		{
+			result = true;
+			break;
+		}
+
+		if (d < 0)
+			L = i + 1;
+		else
+			R = i - 1;
+	}
+
+	return result;
+}
+
 atom structure::get_atom_by_id(const std::string &id) const
 {
 	assert(m_atoms.size() == m_atom_index.size());
@@ -1845,9 +1793,19 @@ void structure::remove_atom(atom &a, bool removeFromResidue)
 {
 	using namespace literals;

-	auto &atomSites = m_db["atom_site"];
+	auto &atomSite = m_db["atom_site"];

-	if (removeFromResidue)
+	if (a.is_water())
+	{
+		auto ra = atomSite.find1("id"_key == a.id());
+		if (ra)
+		{
+			auto &nps = m_db["pdbx_nonpoly_scheme"];
+			for (auto rnp : atomSite.get_children(ra, nps))
+				nps.erase(rnp);
+		}
+	}
+	else if (removeFromResidue)
 	{
 		try
 		{
@@ -1857,11 +1815,41 @@ void structure::remove_atom(atom &a, bool removeFromResidue)
 		catch (const std::exception &ex)
 		{
 			if (VERBOSE > 0)
-				std::cerr << "Error removing atom from residue: " << ex.what() << std::endl;
+				std::cerr << "Error removing atom from residue: " << ex.what() << '\n';
 		}
 	}

-	atomSites.erase("id"_key == a.id());
+	for (auto ri : atomSite.find("id"_key == a.id()))
+	{
+		// also remove struct_conn records for this atom
+		auto &structConn = m_db["struct_conn"];
+
+		condition cond;
+
+		for (std::string prefix : { "ptnr1_", "ptnr2_", "pdbx_ptnr3_" })
+		{
+			if (a.get_label_seq_id() == 0)
+				cond = std::move(cond) or (
+					cif::key(prefix + "label_asym_id") == a.get_label_asym_id() and
+					cif::key(prefix + "label_seq_id") == null and
+					cif::key(prefix + "auth_seq_id") == a.get_auth_seq_id() and
+					cif::key(prefix + "label_atom_id") == a.get_label_atom_id()
+				);
+			else
+				cond = std::move(cond) or (
+					cif::key(prefix + "label_asym_id") == a.get_label_asym_id() and
+					cif::key(prefix + "label_seq_id") == a.get_label_seq_id() and
+					cif::key(prefix + "auth_seq_id") == a.get_auth_seq_id() and
+					cif::key(prefix + "label_atom_id") == a.get_label_atom_id()
+				);
+		}
+
+		if (cond)
+			structConn.erase(std::move(cond));
+
+		atomSite.erase(ri);
+		break;
+	}

 	assert(m_atom_index.size() == m_atoms.size());

@@ -2008,7 +1996,7 @@ void structure::change_residue(residue &res, const std::string &newCompound,
 		if (i == atoms.end())
 		{
 			if (VERBOSE >= 0)
-				std::cerr << "Missing atom for atom ID " << a1 << std::endl;
+				std::cerr << "Missing atom for atom ID " << a1 << '\n';
 			continue;
 		}

@@ -2088,7 +2076,7 @@ void structure::remove_residue(residue &res)

 	switch (res.entity_type())
 	{
-		case EntityType::polymer:
+		case EntityType::Polymer:
 		{
 			auto &m = dynamic_cast<monomer &>(res);

@@ -2224,6 +2212,7 @@ void structure::remove_branch(branch &branch)

 	m_db["pdbx_branch_scheme"].erase("asym_id"_key == branch.get_asym_id());
 	m_db["struct_asym"].erase("id"_key == branch.get_asym_id());
+	m_db["struct_conn"].erase("ptnr1_label_asym_id"_key == branch.get_asym_id() or "ptnr2_label_asym_id"_key == branch.get_asym_id());

 	m_branches.erase(remove(m_branches.begin(), m_branches.end(), branch), m_branches.end());
 }
@@ -2329,18 +2318,19 @@ std::string structure::create_non_poly(const std::string &entity_id, std::vector
 	{
 		auto atom_id = atom_site.get_unique_id("");

-		atom.set_value("name", atom_id);
+		atom.set_value("id", atom_id);
 		atom.set_value("label_asym_id", asym_id);
 		atom.set_value("auth_asym_id", asym_id);
 		atom.set_value("label_entity_id", entity_id);

 		atom.set_value_if_empty({"group_PDB", "HETATM"});
 		atom.set_value_if_empty({"label_comp_id", comp_id});
-		atom.set_value_if_empty({"label_seq_id", ""});
+		atom.set_value_if_empty({"label_seq_id", "."});
 		atom.set_value_if_empty({"auth_comp_id", comp_id});
 		atom.set_value_if_empty({"auth_seq_id", 1});
 		atom.set_value_if_empty({"pdbx_PDB_model_num", 1});
 		atom.set_value_if_empty({"label_alt_id", ""});
+		atom.set_value_if_empty({"occupancy", 1.0, 2});

 		auto row = atom_site.emplace(atom.begin(), atom.end());

@@ -2366,6 +2356,72 @@ std::string structure::create_non_poly(const std::string &entity_id, std::vector
 	return asym_id;
 }

+void structure::create_water(row_initializer atom)
+{
+	using namespace literals;
+
+	auto entity_id = insert_compound("HOH", true);
+
+	auto &struct_asym = m_db["struct_asym"];
+	std::string asym_id;
+	try
+	{
+		asym_id = struct_asym.find1<std::string>("entity_id"_key == entity_id, "id");
+	}
+	catch (const std::exception &)
+	{
+		asym_id = struct_asym.get_unique_id();
+
+		struct_asym.emplace({
+			{"id", asym_id},
+			{"pdbx_blank_PDB_chainid_flag", "N"},
+			{"pdbx_modified", "N"},
+			{"entity_id", entity_id},
+			{"details", "?"}
+		});
+	}
+	
+	auto &atom_site = m_db["atom_site"];
+	auto auth_seq_id = atom_site.find_max<int>("auth_seq_id", "label_entity_id"_key == entity_id) + 1;
+	if (auth_seq_id < 0)
+		auth_seq_id = 1;
+
+	auto atom_id = atom_site.get_unique_id("");
+
+	atom.set_value("id", atom_id);
+	atom.set_value("label_asym_id", asym_id);
+	atom.set_value("auth_asym_id", asym_id);
+	atom.set_value("label_entity_id", entity_id);
+	atom.set_value("auth_seq_id", std::to_string(auth_seq_id));
+
+	atom.set_value_if_empty({"group_PDB", "HETATM"});
+	atom.set_value_if_empty({"label_comp_id", "HOH"});
+	atom.set_value_if_empty({"label_seq_id", "."});
+	atom.set_value_if_empty({"auth_comp_id", "HOH"});
+	atom.set_value_if_empty({"pdbx_PDB_model_num", 1});
+	atom.set_value_if_empty({"label_alt_id", ""});
+	atom.set_value_if_empty({"occupancy", 1.0, 2});
+
+	auto row = atom_site.emplace(atom.begin(), atom.end());
+
+	emplace_atom(std::make_shared<atom::atom_impl>(m_db, atom_id));
+
+	auto &pdbx_nonpoly_scheme = m_db["pdbx_nonpoly_scheme"];
+	int ndb_nr = pdbx_nonpoly_scheme.find_max<int>("ndb_seq_num") + 1;
+	pdbx_nonpoly_scheme.emplace({
+		{"asym_id", asym_id},
+		{"entity_id", entity_id},
+		{"mon_id", "HOH"},
+		{"ndb_seq_num", ndb_nr},
+		{"pdb_seq_num", auth_seq_id},
+		{"auth_seq_num", auth_seq_id},
+		{"pdb_mon_id", "HOH"},
+		{"auth_mon_id", "HOH"},
+		{"pdb_strand_id", asym_id},
+		{"pdb_ins_code", "."},
+	});
+}
+
 branch &structure::create_branch()
 {
 	auto &entity = m_db["entity"];
@@ -2580,7 +2636,7 @@ std::string structure::create_entity_for_branch(branch &branch)
 		entityID = entity.get_unique_id("");

 		if (VERBOSE)
-			std::cout << "Creating new entity " << entityID << " for branched sugar " << entityName << std::endl;
+			std::cout << "Creating new entity " << entityID << " for branched sugar " << entityName << '\n';

 		entity.emplace({
 			{"id", entityID},
@@ -2605,10 +2661,10 @@ std::string structure::create_entity_for_branch(branch &branch)
 		{
 			auto l2 = s1.get_link();

-			if (not l2)
+			if (not l2 or l2.get_auth_seq_id().empty())
 				continue;

-			auto &s2 = branch.at(std::stoi(l2.get_auth_seq_id()) - 1);
+			auto &s2 = branch.at(stoi(l2.get_auth_seq_id()) - 1);
 			auto l1 = s2.get_atom_by_atom_id("C1");

 			pdbx_entity_branch_link.emplace({
@@ -2784,7 +2840,8 @@ void reconstruct_pdbx(datablock &db)
 	if (db.get("atom_site") == nullptr)
 		throw std::runtime_error("Cannot reconstruct PDBx file, atom data missing");
 	
-	
+	assert(false);
+	throw std::runtime_error("not implemented yet");
 }

 } // namespace pdbx
--- a/src/parser.cpp
+++ b/src/parser.cpp
@@ -24,15 +24,14 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <cif++/utilities.hpp>
-#include <cif++/forward_decl.hpp>
-#include <cif++/parser.hpp>
-#include <cif++/file.hpp>
+#include "cif++/utilities.hpp"
+#include "cif++/forward_decl.hpp"
+#include "cif++/parser.hpp"
+#include "cif++/file.hpp"

 #include <cassert>
 #include <iostream>
 #include <map>
-#include <regex>
 #include <stack>

 namespace cif
@@ -40,13 +39,152 @@ namespace cif

 // --------------------------------------------------------------------

+class reserved_words_automaton
+{
+  public:
+	reserved_words_automaton() {}
+
+	enum move_result
+	{
+		undefined,
+		no_keyword,
+		data,
+		global,
+		loop,
+		save,
+		save_plus,
+		stop
+	};
+
+	constexpr bool finished() const
+	{
+		return m_state <= 0; 
+	}
+
+	constexpr bool matched() const
+	{
+		return m_state < 0; 
+	}
+
+	constexpr move_result move(int ch)
+	{
+		move_result result = undefined;
+
+		switch (m_state)
+		{
+			case 0:
+				break;
+
+			case -1:		// data_
+				if (sac_parser::is_non_blank(ch))
+					m_seen_trailing_chars = true;
+				else if (m_seen_trailing_chars)
+					result = data;
+				else
+					result = no_keyword;
+				break;
+
+			case -2:		// global_
+				result = sac_parser::is_non_blank(ch) ? no_keyword : global;
+				break;
+
+			case -3:		// loop_
+				result = sac_parser::is_non_blank(ch) ? no_keyword : loop;
+				break;
+
+			case -4:		// save_
+				if (sac_parser::is_non_blank(ch))
+					m_seen_trailing_chars = true;
+				else if (m_seen_trailing_chars)
+					result = save_plus;
+				else
+					result = save;
+				break;
+
+			case -5:		// stop_
+				result = sac_parser::is_non_blank(ch) ? no_keyword : stop;
+				break;
+			
+			default:
+				assert(m_state > 0 and m_state < NODE_COUNT);
+
+				for (;;)
+				{
+					if (s_dag[m_state].ch == (ch & ~0x20))
+					{
+						m_state = s_dag[m_state].next_match;
+						break;
+					}
+
+					m_state = s_dag[m_state].next_nomatch;
+
+					if (m_state == 0)
+					{
+						result = no_keyword;
+						break;
+					}
+				}
+				break;
+		}
+
+		if (result != undefined)
+			m_state = 0;
+
+		return result;
+	}
+
+  private:
+	static constexpr struct node
+	{
+		int16_t ch;
+		int8_t next_match;
+		int8_t next_nomatch;
+	} s_dag[] = {
+		{ 0 },
+		{ 'D',  5, 2 },
+		{ 'G',  9, 3 },
+		{ 'L', 15, 4 },
+		{ 'S', 19, 0 },
+		{ 'A',  6, 0 },
+		{ 'T',  7, 0 },
+		{ 'A',  8, 0 },
+		{ '_', -1, 0 },
+		{ 'L', 10, 0 },
+		{ 'O', 11, 0 },
+		{ 'B', 12, 0 },
+		{ 'A', 13, 0 },
+		{ 'L', 14, 0 },
+		{ '_', -2, 0 },
+		{ 'O', 16, 0},
+		{ 'O', 17, 0 },
+		{ 'P', 18, 0 },
+		{ '_', -3, 0 },
+		{ 'A', 21, 20 },
+		{ 'T', 24, 0 },
+		{ 'V', 22, 0 },
+		{ 'E', 23, 0 },
+		{ '_', -4, 0 },
+		{ 'O', 25, 0 },
+		{ 'P', 26, 0 },
+		{ '_', -5, 0 },
+	};
+
+	static constexpr int NODE_COUNT = sizeof(s_dag) / sizeof(node);
+
+	int m_state = 1;
+	bool m_seen_trailing_chars = false;
+};
+
+// --------------------------------------------------------------------
+
 sac_parser::sac_parser(std::istream &is, bool init)
 	: m_source(*is.rdbuf())
 {
+	m_token_buffer.reserve(8192);
+
 	if (is.rdbuf() == nullptr)
 		throw std::runtime_error("Attempt to read from uninitialised stream");

-	m_validate = true;
 	m_line_nr = 1;
 	m_bol = true;

@@ -54,45 +192,54 @@ sac_parser::sac_parser(std::istream &is, bool init)
 		m_lookahead = get_next_token();
 }

+bool sac_parser::is_unquoted_string(std::string_view text)
+{
+	bool result = text.empty() or is_ordinary(text.front());
+	if (result)
+	{
+		reserved_words_automaton automaton;
+
+		for (char ch : text)
+		{
+			if (not is_non_blank(ch))
+			{
+				result = false;
+				break;
+			}
+
+			automaton.move(ch);
+		}
+
+		if (automaton.matched())
+			result = false;
+	}
+
+	return result;
+}
+
 // get_next_char takes a char from the buffer, or if it is empty
 // from the istream. This function also does carriage/linefeed
 // translation.
 int sac_parser::get_next_char()
 {
-	int result = std::char_traits<char>::eof();
-
-	if (m_buffer.empty())
-		result = m_source.sbumpc();
-	else
-	{
-		result = m_buffer.back();
-		m_buffer.pop_back();
-	}
-
-	// very simple CR/LF translation into LF
-	if (result == '\r')
-	{
-		int lookahead = m_source.sbumpc();
-		if (lookahead != '\n')
-			m_buffer.push_back(lookahead);
-		result = '\n';
-	}
+	int result = m_source.sbumpc();

 	if (result == std::char_traits<char>::eof())
-		m_token_value.push_back(0);
+		m_token_buffer.push_back(0);
 	else
-		m_token_value.push_back(std::char_traits<char>::to_char_type(result));
-
-	if (result == '\n')
-		++m_line_nr;
-
-	if (VERBOSE >= 6)
 	{
-		std::cerr << "get_next_char => ";
-		if (iscntrl(result) or not isprint(result))
-			std::cerr << int(result) << std::endl;
-		else
-			std::cerr << char(result) << std::endl;
+		if (result == '\r')
+		{
+			if (m_source.sgetc() == '\n')
+				m_source.sbumpc();
+
+			++m_line_nr;
+			result = '\n';
+		}
+		else if (result == '\n')
+			++m_line_nr;
+		
+		m_token_buffer.push_back(std::char_traits<char>::to_char_type(result));
 	}

 	return result;
@@ -100,44 +247,22 @@ int sac_parser::get_next_char()

 void sac_parser::retract()
 {
-	assert(not m_token_value.empty());
+	assert(not m_token_buffer.empty());

-	char ch = m_token_value.back();
+	char ch = m_token_buffer.back();
 	if (ch == '\n')
 		--m_line_nr;

-	m_buffer.push_back(ch == 0 ? std::char_traits<char>::eof() : std::char_traits<char>::to_int_type(ch));
-	m_token_value.pop_back();
-}
-
-int sac_parser::restart(int start)
-{
-	int result = 0;
-
-	while (not m_token_value.empty())
-		retract();
-
-	switch (start)
+	if (ch != 0)
 	{
-		case State::Start:
-			result = State::Float;
-			break;
+		// since we always putback at most a single character,
+		// the test below should never fail.

-		case State::Float:
-			result = State::Int;
-			break;
-
-		case State::Int:
-			result = State::Value;
-			break;
-
-		default:
-			error("Invalid state in SacParser");
+		if (m_source.sputbackc(ch) == std::char_traits<char>::eof())
+			throw std::runtime_error("putback failure");
 	}

-	m_bol = false;
-
-	return result;
+	m_token_buffer.pop_back();
 }

 sac_parser::CIFToken sac_parser::get_next_token()
@@ -146,11 +271,13 @@ sac_parser::CIFToken sac_parser::get_next_token()

 	CIFToken result = CIFToken::Unknown;
 	int quoteChar = 0;
-	int state = State::Start, start = State::Start;
+	State state = State::Start;
 	m_bol = false;

-	m_token_value.clear();
-	mTokenType = CIFValue::Unknown;
+	m_token_buffer.clear();
+	m_token_value = {};
+
+	reserved_words_automaton dag;

 	while (result == CIFToken::Unknown)
 	{
@@ -174,23 +301,27 @@ sac_parser::CIFToken sac_parser::get_next_token()
 					state = State::Tag;
 				else if (ch == ';' and m_bol)
 					state = State::TextField;
+				else if (ch == '?')
+					state = State::QuestionMark;
 				else if (ch == '\'' or ch == '"')
 				{
 					quoteChar = ch;
 					state = State::QuotedString;
 				}
+				else if (dag.move(ch) == reserved_words_automaton::undefined)
+					state = State::Reserved;
 				else
-					state = start = restart(start);
+					state = State::Value;
 				break;

 			case State::White:
 				if (ch == kEOF)
 					result = CIFToken::Eof;
-				else if (not isspace(ch))
+				else if (not is_space(ch))
 				{
 					state = State::Start;
 					retract();
-					m_token_value.clear();
+					m_token_buffer.clear();
 				}
 				else
 					m_bol = (ch == '\n');
@@ -201,38 +332,40 @@ sac_parser::CIFToken sac_parser::get_next_token()
 				{
 					state = State::Start;
 					m_bol = true;
-					m_token_value.clear();
+					m_token_buffer.clear();
 				}
 				else if (ch == kEOF)
 					result = CIFToken::Eof;
 				else if (not is_any_print(ch))
 					error("invalid character in comment");
 				break;
+			
+			case State::QuestionMark:
+				if (not is_non_blank(ch))
+				{
+					retract();
+					result = CIFToken::Value;
+				}
+				else
+					state = State::Value;
+				break;

 			case State::TextField:
 				if (ch == '\n')
-					state = State::TextField + 1;
+					state = State::TextFieldNL;
 				else if (ch == kEOF)
 					error("unterminated textfield");
-				// else if (ch == '\\')
-				// 	state = State::Esc;
 				else if (not is_any_print(ch) and cif::VERBOSE > 2)
 					warning("invalid character in text field '" + std::string({static_cast<char>(ch)}) + "' (" + std::to_string((int)ch) + ")");
 				break;

-			// case State::Esc:
-			// 	if (ch == '\n')
-
-			// 	break;
-
-			case State::TextField + 1:
+			case State::TextFieldNL:
 				if (is_text_lead(ch) or ch == ' ' or ch == '\t')
 					state = State::TextField;
 				else if (ch == ';')
 				{
-					assert(m_token_value.length() >= 2);
-					m_token_value = m_token_value.substr(1, m_token_value.length() - 3);
-					mTokenType = CIFValue::TextField;
+					assert(m_token_buffer.size() >= 2);
+					m_token_value = std::string_view(m_token_buffer.data() + 1, m_token_buffer.size() - 3);
 					result = CIFToken::Value;
 				}
 				else if (ch == kEOF)
@@ -255,12 +388,10 @@ sac_parser::CIFToken sac_parser::get_next_token()
 				{
 					retract();
 					result = CIFToken::Value;
-					mTokenType = CIFValue::String;
-
-					if (m_token_value.length() < 2)
+					if (m_token_buffer.size() < 2)
 						error("Invalid quoted string token");

-					m_token_value = m_token_value.substr(1, m_token_value.length() - 2);
+					m_token_value = std::string_view(m_token_buffer.data() + 1, m_token_buffer.size() - 2);
 				}
 				else if (ch == quoteChar)
 					;
@@ -277,149 +408,68 @@ sac_parser::CIFToken sac_parser::get_next_token()
 				{
 					retract();
 					result = CIFToken::Tag;
+					m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.size());
 				}
 				break;

-			case State::Float:
-				if (ch == '+' or ch == '-')
+			case State::Reserved:
+				switch (dag.move(ch))
 				{
-					state = State::Float + 1;
+					case reserved_words_automaton::undefined:
+						break;
+
+					case reserved_words_automaton::no_keyword:
+						if (not is_non_blank(ch))
+						{
+							retract();
+							result = CIFToken::Value;
+							m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.size());
+						}
+						else
+							state = State::Value;
+						break;
+
+					case reserved_words_automaton::data:
+						retract();
+						m_token_value = std::string_view(m_token_buffer.data() + 5, m_token_buffer.size() - 5);
+						result = CIFToken::DATA;
+						break;
+
+					case reserved_words_automaton::global:
+						retract();
+						result = CIFToken::GLOBAL;
+						break;
+
+					case reserved_words_automaton::loop:
+						retract();
+						result = CIFToken::LOOP;
+						break;
+
+					case reserved_words_automaton::save:
+						retract();
+						result = CIFToken::SAVE_;
+						break;
+
+					case reserved_words_automaton::save_plus:
+						retract();
+						m_token_value = std::string_view(m_token_buffer.data() + 5, m_token_buffer.size() - 5);
+						result = CIFToken::SAVE_NAME;
+						break;
+
+					case reserved_words_automaton::stop:
+						retract();
+						result = CIFToken::STOP;
+						break;
 				}
-				else if (isdigit(ch))
-					state = State::Float + 1;
-				else
-					state = start = restart(start);
-				break;
-
-			case State::Float + 1:
-				//				if (ch == '(')	// numeric???
-				//					mState = State::NumericSuffix;
-				//				else
-				if (ch == '.')
-					state = State::Float + 2;
-				else if (tolower(ch) == 'e')
-					state = State::Float + 3;
-				else if (is_white(ch) or ch == kEOF)
-				{
-					retract();
-					result = CIFToken::Value;
-					mTokenType = CIFValue::Int;
-				}
-				else
-					state = start = restart(start);
-				break;
-
-			// parsed '.'
-			case State::Float + 2:
-				if (tolower(ch) == 'e')
-					state = State::Float + 3;
-				else if (is_white(ch) or ch == kEOF)
-				{
-					retract();
-					result = CIFToken::Value;
-					mTokenType = CIFValue::Float;
-				}
-				else
-					state = start = restart(start);
-				break;
-
-			// parsed 'e'
-			case State::Float + 3:
-				if (ch == '-' or ch == '+')
-					state = State::Float + 4;
-				else if (isdigit(ch))
-					state = State::Float + 5;
-				else
-					state = start = restart(start);
-				break;
-
-			case State::Float + 4:
-				if (isdigit(ch))
-					state = State::Float + 5;
-				else
-					state = start = restart(start);
-				break;
-
-			case State::Float + 5:
-				if (is_white(ch) or ch == kEOF)
-				{
-					retract();
-					result = CIFToken::Value;
-					mTokenType = CIFValue::Float;
-				}
-				else
-					state = start = restart(start);
-				break;
-
-			case State::Int:
-				if (isdigit(ch) or ch == '+' or ch == '-')
-					state = State::Int + 1;
-				else
-					state = start = restart(start);
-				break;
-
-			case State::Int + 1:
-				if (is_white(ch) or ch == kEOF)
-				{
-					retract();
-					result = CIFToken::Value;
-					mTokenType = CIFValue::Int;
-				}
-				else
-					state = start = restart(start);
 				break;

 			case State::Value:
-				if (ch == '_')
-				{
-					std::string s = to_lower_copy(m_token_value);
-
-					if (s == "data_")
-					{
-						state = State::DATA;
-						continue;
-					}
-					
-					if (s == "save_")
-					{
-						state = State::SAVE;
-						continue;
-					}
-				}
-
-				if (result == CIFToken::Unknown and not is_non_blank(ch))
-				{
-					retract();
-					result = CIFToken::Value;
-
-					if (m_token_value == ".")
-						mTokenType = CIFValue::Inapplicable;
-					else if (iequals(m_token_value, "global_"))
-						result = CIFToken::GLOBAL;
-					else if (iequals(m_token_value, "stop_"))
-						result = CIFToken::STOP;
-					else if (iequals(m_token_value, "loop_"))
-						result = CIFToken::LOOP;
-					else if (m_token_value == "?")
-					{
-						mTokenType = CIFValue::Unknown;
-						m_token_value.clear();
-					}
-				}
-				break;
-
-			case State::DATA:
-			case State::SAVE:
 				if (not is_non_blank(ch))
 				{
 					retract();
-
-					if (state == State::DATA)
-						result = CIFToken::DATA;
-					else
-						result = CIFToken::SAVE;
-
-					m_token_value.erase(m_token_value.begin(), m_token_value.begin() + 5);
+					result = CIFToken::Value;
+					m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.size());
+					break;
 				}
 				break;

@@ -433,11 +483,9 @@ sac_parser::CIFToken sac_parser::get_next_token()
 	if (VERBOSE >= 5)
 	{
 		std::cerr << get_token_name(result);
-		if (mTokenType != CIFValue::Unknown)
-			std::cerr << ' ' << get_value_name(mTokenType);
 		if (result != CIFToken::Eof)
 			std::cerr << " " << std::quoted(m_token_value);
-		std::cerr << std::endl;
+		std::cerr << '\n';
 	}

 	return result;
@@ -506,7 +554,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock)
 				break;

 			case string_quote:
-				if (std::isspace(ch))
+				if (is_space(ch))
 					state = start;
 				else
 					state = string;
@@ -518,7 +566,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock)
 				break;

 			case data:
-				if (isspace(ch) and dblk[si] == 0)
+				if (is_space(ch) and dblk[si] == 0)
 					found = true;
 				else if (dblk[si++] != ch)
 					state = start;
@@ -596,7 +644,7 @@ sac_parser::datablock_index sac_parser::index_datablocks()
 				break;

 			case string_quote:
-				if (std::isspace(ch))
+				if (is_space(ch))
 					state = start;
 				else
 					state = string;
@@ -620,7 +668,7 @@ sac_parser::datablock_index sac_parser::index_datablocks()
 			case data_name:
 				if (is_non_blank(ch))
 					datablock.insert(datablock.end(), char(ch));
-				else if (isspace(ch))
+				else if (is_space(ch))
 				{
 					if (not datablock.empty())
 						index[datablock] = m_source.pubseekoff(0, std::ios_base::cur, std::ios_base::in);
@@ -696,7 +744,7 @@ void sac_parser::parse_datablock()
 	static const std::string kUnitializedCategory("<invalid>");
 	std::string cat = kUnitializedCategory;	// intial value acts as a guard for empty category names

-	while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag or m_lookahead == CIFToken::SAVE)
+	while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag or m_lookahead == CIFToken::SAVE_NAME)
 	{
 		switch (m_lookahead)
 		{
@@ -761,7 +809,7 @@ void sac_parser::parse_datablock()
 				break;
 			}

-			case CIFToken::SAVE:
+			case CIFToken::SAVE_NAME:
 				parse_save_frame();
 				break;

@@ -779,19 +827,19 @@ void sac_parser::parse_save_frame()

 // --------------------------------------------------------------------

-void parser::produce_datablock(const std::string &name)
+void parser::produce_datablock(std::string_view name)
 {
 	if (VERBOSE >= 4)
-		std::cerr << "producing data_" << name << std::endl;
+		std::cerr << "producing data_" << name << '\n';

 	const auto &[iter, ignore] = m_file.emplace(name);
 	m_datablock = &(*iter);
 }

-void parser::produce_category(const std::string &name)
+void parser::produce_category(std::string_view name)
 {
 	if (VERBOSE >= 4)
-		std::cerr << "producing category " << name << std::endl;
+		std::cerr << "producing category " << name << '\n';

 	const auto &[cat, ignore] = m_datablock->emplace(name);
 	m_category = &*cat;
@@ -800,7 +848,7 @@ void parser::produce_category(const std::string &name)
 void parser::produce_row()
 {
 	if (VERBOSE >= 4 and m_category != nullptr)
-		std::cerr << "producing row for category " << m_category->name() << std::endl;
+		std::cerr << "producing row for category " << m_category->name() << '\n';

 	if (m_category == nullptr)
 		error("inconsistent categories in loop_");
@@ -810,10 +858,10 @@ void parser::produce_row()
 	// m_row.lineNr(m_line_nr);
 }

-void parser::produce_item(const std::string &category, const std::string &item, const std::string &value)
+void parser::produce_item(std::string_view category, std::string_view item, std::string_view value)
 {
 	if (VERBOSE >= 4)
-		std::cerr << "producing _" << category << '.' << item << " -> " << value << std::endl;
+		std::cerr << "producing _" << category << '.' << item << " -> " << value << '\n';

 	if (m_category == nullptr or not iequals(category, m_category->name()))
 		error("inconsistent categories in loop_");
@@ -821,4 +869,4 @@ void parser::produce_item(const std::string &category, const std::string &item,
 	m_row[item] = m_token_value;
 }

-} // namespace cif
+} // namespace cif
--- a/src/pdb/cif2pdb.cpp
+++ b/src/pdb/cif2pdb.cpp
--- a/src/pdb/pdb2cif.cpp
+++ b/src/pdb/pdb2cif.cpp
@@ -24,10 +24,9 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <cif++.hpp>
-#include <cif++/pdb/pdb2cif.hpp>
-#include <cif++/pdb/pdb2cif_remark_3.hpp>
-#include <cif++/gzio.hpp>
+#include "pdb2cif_remark_3.hpp"
+
+#include "cif++.hpp"

 #include <iomanip>
 #include <map>
@@ -38,10 +37,8 @@ using cif::category;
 using cif::datablock;
 using cif::iequals;
 using cif::key;
-// using cif::row;
 using cif::to_lower;
 using cif::to_lower_copy;
-// using cif::compound_factory;

 // --------------------------------------------------------------------
 // attempt to come up with better error handling
@@ -265,7 +262,7 @@ int PDBRecord::vI(int columnFirst, int columnLast)
 	catch (const std::exception &ex)
 	{
 		if (cif::VERBOSE >= 0)
-			std::cerr << "Trying to parse '" << std::string(mValue + columnFirst - 7, mValue + columnLast - 7) << '\'' << std::endl;
+			std::cerr << "Trying to parse '" << std::string(mValue + columnFirst - 7, mValue + columnLast - 7) << '\'' << '\n';
 		throw;
 	}

@@ -335,7 +332,7 @@ std::tuple<std::string, std::string> SpecificationListParser::GetNextSpecificati
 				else if (not isspace(ch))
 				{
 					if (cif::VERBOSE > 0)
-						std::cerr << "skipping invalid character in SOURCE ID: " << ch << std::endl;
+						std::cerr << "skipping invalid character in SOURCE ID: " << ch << '\n';
 				}
 				break;

@@ -352,7 +349,7 @@ std::tuple<std::string, std::string> SpecificationListParser::GetNextSpecificati
 				if (ch == ';')
 				{
 					if (cif::VERBOSE > 0)
-						std::cerr << "Empty value for SOURCE: " << id << std::endl;
+						std::cerr << "Empty value for SOURCE: " << id << '\n';
 					state = eStart;
 				}
 				else if (not isspace(ch))
@@ -416,7 +413,7 @@ std::tuple<std::string, std::string> SpecificationListParser::GetNextSpecificati
 				if (ch == ';')
 				{
 					if (cif::VERBOSE > 0)
-						std::cerr << "Skipping invalid header line: '" << std::string(start, mP) << std::endl;
+						std::cerr << "Skipping invalid header line: '" << std::string(start, mP) << '\n';
 					state = eStart;
 				}
 				break;
@@ -830,7 +827,7 @@ class PDBFileParser
 		{
 			ec = error::make_error_code(error::pdbErrors::residueNotFound);
 			if (cif::VERBOSE > 0)
-				std::cerr << "Residue " << chainID << resSeq << iCode << " could not be mapped" << std::endl;
+				std::cerr << "Residue " << chainID << resSeq << iCode << " could not be mapped\n";
 		}
 		else
 			result = mChainSeq2AsymSeq.at(key);
@@ -925,7 +922,7 @@ class PDBFileParser
 		catch (const std::exception &ex)
 		{
 			if (cif::VERBOSE > 0)
-				std::cerr << ex.what() << std::endl;
+				std::cerr << ex.what() << '\n';
 			ec = error::make_error_code(error::pdbErrors::invalidDate);
 		}

@@ -937,7 +934,7 @@ class PDBFileParser
 		std::error_code ec;
 		auto result = pdb2cifDate(s, ec);
 		if (ec and cif::VERBOSE > 0)
-			std::cerr << "Invalid date(" << s << "): " << ec.message() << std::endl;
+			std::cerr << "Invalid date(" << s << "): " << ec.message() << '\n';
 		return result;
 	}

@@ -1158,7 +1155,7 @@ void PDBFileParser::PreParseInput(std::istream &is)
 				break;

 			if (cif::VERBOSE > 0)
-				std::cerr << "Line number " << lineNr << " is empty!" << std::endl;
+				std::cerr << "Line number " << lineNr << " is empty!\n";

 			getline(is, lookahead);
 			++lineNr;
@@ -1170,6 +1167,8 @@ void PDBFileParser::PreParseInput(std::istream &is)
 		std::string value;
 		if (lookahead.length() > 6)
 			value = cif::trim_right_copy(lookahead.substr(6));
+		
+		lookahead.clear();

 		uint32_t curLineNr = lineNr;
 		getline(is, lookahead);
@@ -1277,7 +1276,7 @@ void PDBFileParser::PreParseInput(std::istream &is)
 				catch (const std::exception &ex)
 				{
 					if (cif::VERBOSE >= 0)
-						std::cerr << "Dropping FORMUL line (" << (lineNr - 1) << ") with invalid component number '" << value.substr(1, 3) << '\'' << std::endl;
+						std::cerr << "Dropping FORMUL line (" << (lineNr - 1) << ") with invalid component number '" << value.substr(1, 3) << '\'' << '\n';
 					continue;
 					// throw_with_nested(std::runtime_error("Invalid component number '" + value.substr(1, 3) + '\''));
 				}
@@ -1305,7 +1304,7 @@ void PDBFileParser::PreParseInput(std::istream &is)
 			catch (const std::exception &ex)
 			{
 				if (cif::VERBOSE >= 0)
-					std::cerr << "Error parsing FORMUL at line " << lineNr << std::endl;
+					std::cerr << "Error parsing FORMUL at line " << lineNr << '\n';
 				throw;
 			}
 		}
@@ -1404,7 +1403,7 @@ void PDBFileParser::PreParseInput(std::istream &is)
 				auto f = cur->vF(74, 78);
 				auto r = cif::from_chars(f.data(), f.data() + f.length(), link.distance);
 				if (r.ec != std::errc() and cif::VERBOSE > 0)
-					std::cerr << "Error parsing link distance at line " << cur->mLineNr << std::endl;
+					std::cerr << "Error parsing link distance at line " << cur->mLineNr << '\n';
 			}
 			//	74 – 78         Real(5.2)      Length          Link distance

@@ -1418,7 +1417,7 @@ void PDBFileParser::PreParseInput(std::istream &is)
 	if (not dropped.empty())
 	{
 		if (cif::VERBOSE >= 0)
-			std::cerr << "Dropped unsupported records: " << cif::join(dropped, ", ") << std::endl;
+			std::cerr << "Dropped unsupported records: " << cif::join(dropped, ", ") << '\n';
 	}

 	if (mData == nullptr)
@@ -1447,7 +1446,7 @@ void PDBFileParser::Match(const std::string &expected, bool throwIfMissing)
 		if (throwIfMissing)
 			throw std::runtime_error("Expected record " + expected + " but found " + mRec->mName);
 		if (cif::VERBOSE > 0)
-			std::cerr << "Expected record " + expected + " but found " + mRec->mName << std::endl;
+			std::cerr << "Expected record " + expected + " but found " + mRec->mName << '\n';
 	}
 }

@@ -1583,7 +1582,7 @@ void PDBFileParser::ParseTitle()
 			if (not iequals(key, "MOL_ID") and mCompounds.empty())
 			{
 				if (cif::VERBOSE > 0)
-					std::cerr << "Ignoring invalid COMPND record" << std::endl;
+					std::cerr << "Ignoring invalid COMPND record\n";
 				break;
 			}

@@ -1631,7 +1630,7 @@ void PDBFileParser::ParseTitle()
 		//			if (colon == std::string::npos)
 		//			{
 		//				if (cif::VERBOSE > 0)
-		//					std::cerr << "invalid source field, missing colon (" << s << ')' << std::endl;
+		//					std::cerr << "invalid source field, missing colon (" << s << ')' << '\n';
 		//				continue;
 		//			}
 		SpecificationListParser p(vS(11));
@@ -1719,7 +1718,7 @@ void PDBFileParser::ParseTitle()
 	if (mRec->is("NUMMDL"))
 	{
 		if (cif::VERBOSE > 0)
-			std::cerr << "skipping unimplemented NUMMDL record" << std::endl;
+			std::cerr << "skipping unimplemented NUMMDL record\n";
 		GetNextRecord();
 	}

@@ -1824,7 +1823,7 @@ void PDBFileParser::ParseTitle()
 	if (mRec->is("SPRSDE"))
 	{
 		if (cif::VERBOSE > 0)
-			std::cerr << "skipping unimplemented SPRSDE record" << std::endl;
+			std::cerr << "skipping unimplemented SPRSDE record\n";
 		GetNextRecord();
 	}

@@ -2112,7 +2111,7 @@ void PDBFileParser::ParseRemarks()

 					while (mRec->is("REMARK 400"))
 					{
-						s << vS(12) << std::endl;
+						s << vS(12) << '\n';
 						GetNextRecord();
 					}

@@ -2129,7 +2128,7 @@ void PDBFileParser::ParseRemarks()

 					while (mRec->is("REMARK 450"))
 					{
-						s << vS(12) << std::endl;
+						s << vS(12) << '\n';
 						GetNextRecord();
 					}

@@ -2349,7 +2348,7 @@ void PDBFileParser::ParseRemarks()
 									catch (const std::exception &ex)
 									{
 										if (cif::VERBOSE > 0)
-											std::cerr << "Dropping REMARK 500 at line " << mRec->mLineNr << " due to invalid symmetry operation" << std::endl;
+											std::cerr << "Dropping REMARK 500 at line " << mRec->mLineNr << " due to invalid symmetry operation\n";
 										continue;
 									}

@@ -2755,7 +2754,7 @@ void PDBFileParser::ParseRemarks()

 					while (mRec->is("REMARK 999"))
 					{
-						s << vS(12) << std::endl;
+						s << vS(12) << '\n';
 						GetNextRecord();
 					}

@@ -2779,12 +2778,12 @@ void PDBFileParser::ParseRemarks()
 					std::stringstream s;

 					if (not mRec->vS(11).empty())
-						s << mRec->vS(11) << std::endl;
+						s << mRec->vS(11) << '\n';
 					GetNextRecord();

 					while (mRec->is(skipped.c_str()))
 					{
-						s << mRec->vS(11) << std::endl;
+						s << mRec->vS(11) << '\n';
 						GetNextRecord();
 					}

@@ -2921,7 +2920,7 @@ void PDBFileParser::ParseRemark200()
 		if (ec)
 		{
 			if (cif::VERBOSE > 0)
-				std::cerr << ec.message() << " for pdbx_collection_date" << std::endl;
+				std::cerr << ec.message() << " for pdbx_collection_date\n";

 			// The date field can become truncated when multiple values are available
 			if (diffrnNr != 1)
@@ -3042,7 +3041,7 @@ void PDBFileParser::ParseRemark200()
 				 "R MERGE FOR SHELL (I)", "R SYM FOR SHELL (I)", "<I/SIGMA(I)> FOR SHELL", "DATA REDUNDANCY IN SHELL" }))
 	{
 		if (cif::VERBOSE > 0)
-			std::cerr << "Not writing reflns_shell record since d_res_high is missing" << std::endl;
+			std::cerr << "Not writing reflns_shell record since d_res_high is missing\n";
 	}
 }

@@ -3622,8 +3621,8 @@ void PDBFileParser::ConstructEntities()

 				if (cif::VERBOSE > 0)
 				{
-					std::cerr << "Detected residues that cannot be aligned to SEQRES" << std::endl
-							  << "First residue is " << chain.mDbref.chainID << ':' << r.mSeqNum << r.mIcode << std::endl;
+					std::cerr << "Detected residues that cannot be aligned to SEQRES\n"
+							  << "First residue is " << chain.mDbref.chainID << ':' << r.mSeqNum << r.mIcode << '\n';
 				}

 				chain.mTerIndex = lastResidueIndex + 1;
@@ -4053,7 +4052,7 @@ void PDBFileParser::ConstructEntities()
 					if (ec)
 					{
 						if (cif::VERBOSE > 0)
-							std::cerr << "dropping unmatched SEQADV record" << std::endl;
+							std::cerr << "dropping unmatched SEQADV record\n";
 						continue;
 					}

@@ -4375,7 +4374,7 @@ void PDBFileParser::ConstructEntities()
 		if (ec) // no need to write a modres if it could not be found
 		{
 			if (cif::VERBOSE > 0)
-				std::cerr << "dropping unmapped MODRES record" << std::endl;
+				std::cerr << "dropping unmapped MODRES record\n";
 			continue;
 		}

@@ -4472,7 +4471,7 @@ void PDBFileParser::ConstructEntities()
 		if (ec)
 		{
 			if (cif::VERBOSE > 0)
-				std::cerr << "error mapping unobserved residue" << std::endl;
+				std::cerr << "error mapping unobserved residue\n";
 			continue;
 		}

@@ -4738,7 +4737,7 @@ void PDBFileParser::ParseSecondaryStructure()
 		if (ec)
 		{
 			if (cif::VERBOSE > 0)
-				std::cerr << "Could not map residue for HELIX " << vI(8, 10) << std::endl;
+				std::cerr << "Could not map residue for HELIX " << vI(8, 10) << '\n';
 		}
 		else
 		{
@@ -4856,7 +4855,7 @@ void PDBFileParser::ParseSecondaryStructure()
 		if (ec)
 		{
 			if (cif::VERBOSE > 0)
-				std::cerr << "Dropping SHEET record " << vI(8, 10) << std::endl;
+				std::cerr << "Dropping SHEET record " << vI(8, 10) << '\n';
 		}
 		else
 		{
@@ -4892,7 +4891,7 @@ void PDBFileParser::ParseSecondaryStructure()
 				if (ec)
 				{
 					if (cif::VERBOSE > 0)
-						std::cerr << "skipping unmatched pdbx_struct_sheet_hbond record" << std::endl;
+						std::cerr << "skipping unmatched pdbx_struct_sheet_hbond record\n";
 				}
 				else
 					getCategory("pdbx_struct_sheet_hbond")->emplace({
@@ -4991,7 +4990,7 @@ void PDBFileParser::ParseConnectivtyAnnotation()
 			if (ec)
 			{
 				if (cif::VERBOSE > 0)
-					std::cerr << "Dropping SSBOND " << vI(8, 10) << std::endl;
+					std::cerr << "Dropping SSBOND " << vI(8, 10) << '\n';
 				continue;
 			}

@@ -5012,7 +5011,7 @@ void PDBFileParser::ParseConnectivtyAnnotation()
 			catch (const std::exception &ex)
 			{
 				if (cif::VERBOSE > 0)
-					std::cerr << "Dropping SSBOND " << vI(8, 10) << " due to invalid symmetry operation" << std::endl;
+					std::cerr << "Dropping SSBOND " << vI(8, 10) << " due to invalid symmetry operation\n";
 				continue;
 			}

@@ -5057,7 +5056,7 @@ void PDBFileParser::ParseConnectivtyAnnotation()
 		if (mRec->is("LINK  ") or mRec->is("LINKR "))
 		{
 			if (cif::VERBOSE > 0 and mRec->is("LINKR "))
-				std::cerr << "Accepting non-standard LINKR record, but ignoring extra information" << std::endl;
+				std::cerr << "Accepting non-standard LINKR record, but ignoring extra information\n";

 			//	 1 -  6         Record name    "LINK  "
 			std::string name1 = vS(13, 16);    //	13 - 16         Atom           name1           Atom name.
@@ -5110,7 +5109,7 @@ void PDBFileParser::ParseConnectivtyAnnotation()
 			if (ec)
 			{
 				if (cif::VERBOSE > 0)
-					std::cerr << "Dropping LINK record at line " << mRec->mLineNr << std::endl;
+					std::cerr << "Dropping LINK record at line " << mRec->mLineNr << '\n';
 				continue;
 			}

@@ -5125,7 +5124,7 @@ void PDBFileParser::ParseConnectivtyAnnotation()
 				if (r.ec != std::errc())
 				{
 					if (cif::VERBOSE > 0)
-						std::cerr << "Distance value '" << distance << "' is not a valid float in LINK record" << std::endl;
+						std::cerr << "Distance value '" << distance << "' is not a valid float in LINK record\n";
 					swap(ccp4LinkID, distance); // assume this is a ccp4_link_id... oh really?
 				}
 			}
@@ -5141,7 +5140,7 @@ void PDBFileParser::ParseConnectivtyAnnotation()
 			catch (const std::exception &ex)
 			{
 				if (cif::VERBOSE > 0)
-					std::cerr << "Dropping LINK record at line " << mRec->mLineNr << " due to invalid symmetry operation" << std::endl;
+					std::cerr << "Dropping LINK record at line " << mRec->mLineNr << " due to invalid symmetry operation\n";
 				continue;
 			}

@@ -5213,7 +5212,7 @@ void PDBFileParser::ParseConnectivtyAnnotation()
 			if (ec)
 			{
 				if (cif::VERBOSE > 0)
-					std::cerr << "Dropping CISPEP record at line " << mRec->mLineNr << std::endl;
+					std::cerr << "Dropping CISPEP record at line " << mRec->mLineNr << '\n';
 				continue;
 			}

@@ -5280,7 +5279,7 @@ void PDBFileParser::ParseMiscellaneousFeatures()
 			if (ec)
 			{
 				if (cif::VERBOSE > 0)
-					std::cerr << "skipping struct_site_gen record" << std::endl;
+					std::cerr << "skipping struct_site_gen record\n";
 			}
 			else
 				cat->emplace({
@@ -5610,7 +5609,7 @@ void PDBFileParser::ParseCoordinate(int modelNr)
 			if (groupPDB == "HETATM")
 			{
 				if (cif::VERBOSE > 0)
-					std::cerr << "Changing atom from HETATM to ATOM at line " << mRec->mLineNr << std::endl;
+					std::cerr << "Changing atom from HETATM to ATOM at line " << mRec->mLineNr << '\n';
 				groupPDB = "ATOM";
 			}
 		}
@@ -5619,7 +5618,7 @@ void PDBFileParser::ParseCoordinate(int modelNr)
 			if (groupPDB == "ATOM")
 			{
 				if (cif::VERBOSE > 0)
-					std::cerr << "Changing atom from ATOM to HETATM at line " << mRec->mLineNr << std::endl;
+					std::cerr << "Changing atom from ATOM to HETATM at line " << mRec->mLineNr << '\n';
 				groupPDB = "HETATM";
 			}
 		}
@@ -5803,7 +5802,7 @@ void PDBFileParser::Parse(std::istream &is, cif::file &result)
 		catch (const std::exception &ex)
 		{
 			if (cif::VERBOSE >= 0)
-				std::cerr << "Error parsing REMARK 3" << std::endl;
+				std::cerr << "Error parsing REMARK 3\n";
 			throw;
 		}
 		//
@@ -5863,12 +5862,12 @@ void PDBFileParser::Parse(std::istream &is, cif::file &result)
 						(z1 - z2) * (z1 - z2)
 					);
 				else if (cif::VERBOSE > 0)
-					std::cerr << "Cannot calculate distance for link since one of the atoms is in another dimension" << std::endl;
+					std::cerr << "Cannot calculate distance for link since one of the atoms is in another dimension\n";
 			}
 			catch (std::exception &ex)
 			{
 				if (cif::VERBOSE > 0)
-					std::cerr << "Error finding atom for LINK distance calculation: " << ex.what() << std::endl;
+					std::cerr << "Error finding atom for LINK distance calculation: " << ex.what() << '\n';
 			}

 			r["pdbx_dist_value"] = distance;
@@ -5883,7 +5882,7 @@ void PDBFileParser::Parse(std::istream &is, cif::file &result)
 			std::cerr << "Error parsing PDB";
 			if (mRec != nullptr)
 				std::cerr << " at line " << mRec->mLineNr;
-			std::cerr << std::endl;
+			std::cerr << '\n';
 		}
 		throw;
 	}
@@ -6051,9 +6050,9 @@ int PDBFileParser::PDBChain::AlignResToSeqRes()
 	// C++ is getting closer to Pascal :-)
 	auto printAlignment = [&tb, highX, highY, &rx, &ry, this]()
 	{
-		std::cerr << std::string(cif::get_terminal_width(), '-') << std::endl
-				  << "Alignment for chain " << mDbref.chainID << std::endl
-				  << std::endl;
+		std::cerr << std::string(22, '-') << '\n'
+				  << "Alignment for chain " << mDbref.chainID << '\n'
+				  << '\n';
 		std::vector<std::pair<std::string, std::string>> alignment;

 		int x = highX;
@@ -6095,9 +6094,9 @@ int PDBFileParser::PDBChain::AlignResToSeqRes()

 		reverse(alignment.begin(), alignment.end());
 		for (auto a : alignment)
-			std::cerr << "  " << a.first << " -- " << a.second << std::endl;
+			std::cerr << "  " << a.first << " -- " << a.second << '\n';

-		std::cerr << std::endl;
+		std::cerr << '\n';
 	};

 	if (cif::VERBOSE > 1)
@@ -6118,7 +6117,7 @@ int PDBFileParser::PDBChain::AlignResToSeqRes()

 				case 1:
 					if (cif::VERBOSE > 3)
-						std::cerr << "Missing residue in ATOM records: " << rx[x].mMonID << " at " << rx[x].mSeqNum << std::endl;
+						std::cerr << "Missing residue in ATOM records: " << rx[x].mMonID << " at " << rx[x].mSeqNum << '\n';

 					--x;
 					break;
@@ -6126,7 +6125,7 @@ int PDBFileParser::PDBChain::AlignResToSeqRes()
 				case 0:
 					if (rx[x].mMonID != ry[y].mMonID)
 					{
-						std::cerr << "Warning, unaligned residues at " << x << "/" << y << "(" << rx[x].mMonID << '/' << ry[y].mMonID << ") SEQRES does not agree with ATOM records" << std::endl;
+						std::cerr << "Warning, unaligned residues at " << x << "/" << y << "(" << rx[x].mMonID << '/' << ry[y].mMonID << ") SEQRES does not agree with ATOM records\n";
 						rx[x].mMonID = ry[y].mMonID;
 					}

@@ -6183,7 +6182,7 @@ bool PDBFileParser::PDBChain::SameSequence(const PDBChain &rhs) const

 // --------------------------------------------------------------------

-void ReadPDBFile(std::istream &pdbFile, cif::file &cifFile)
+void read_pdb_file(std::istream &pdbFile, cif::file &cifFile)
 {
 	PDBFileParser p;

@@ -6192,7 +6191,7 @@ void ReadPDBFile(std::istream &pdbFile, cif::file &cifFile)
 	p.Parse(pdbFile, cifFile);

 	if (not cifFile.is_valid() and cif::VERBOSE >= 0)
-		std::cerr << "Resulting mmCIF file is not valid!" << std::endl;
+		std::cerr << "Resulting mmCIF file is not valid!\n";
 }

 // --------------------------------------------------------------------
@@ -6211,7 +6210,7 @@ file read(std::istream &is)
 		// is 'H'. It is as simple as that.

 		if (ch == 'h' or ch == 'H')
-			ReadPDBFile(is, result);
+			read_pdb_file(is, result);
 		else
 		{
 			try
--- a/src/pdb/pdb2cif_remark_3.cpp
+++ b/src/pdb/pdb2cif_remark_3.cpp
@@ -24,8 +24,9 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

+#include "pdb2cif_remark_3.hpp"
+
 #include <cif++.hpp>
-#include <cif++/pdb/pdb2cif_remark_3.hpp>

 #include <map>
 #include <set>
@@ -1042,7 +1043,7 @@ std::string Remark3Parser::nextLine()
 	}

 	if (cif::VERBOSE >= 2)
-		std::cerr << "RM3: " << mLine << std::endl;
+		std::cerr << "RM3: " << mLine << '\n';

 	return mLine;
 }
@@ -1056,7 +1057,11 @@ bool Remark3Parser::match(const char *expr, int nextState)
 	if (result)
 		mState = nextState;
 	else if (cif::VERBOSE >= 3)
-		std::cerr << cif::coloured("No match:", cif::scWHITE, cif::scRED) << " '" << expr << '\'' << std::endl;
+	{
+		using namespace colour;
+
+		std::cerr << coloured("No match:", white, red, bold) << " '" << expr << '\'' << '\n';
+	}

 	return result;
 }
@@ -1116,7 +1121,11 @@ float Remark3Parser::parse()
 		}

 		if (cif::VERBOSE >= 2)
-			std::cerr << cif::coloured("Dropping line:", cif::scWHITE, cif::scRED) << " '" << mLine << '\'' << std::endl;
+		{
+			using namespace colour;
+
+			std::cerr << coloured("Dropping line:", white, red, bold) << " '" << mLine << '\'' << '\n';
+		}

 		++dropped;
 	}
@@ -1168,7 +1177,7 @@ void Remark3Parser::storeCapture(const char *category, std::initializer_list<con
 			continue;

 		if (cif::VERBOSE >= 3)
-			std::cerr << "storing: '" << value << "' in _" << category << '.' << item << std::endl;
+			std::cerr << "storing: '" << value << "' in _" << category << '.' << item << '\n';

 		auto &cat = mDb[category];
 		if (cat.empty() or createNew)
@@ -1233,7 +1242,9 @@ void Remark3Parser::storeCapture(const char *category, std::initializer_list<con
 			{
 				cat.emplace({ // #warning("crystal id, diffrn id, what should be put here?")
 					{ "crystal_id", 1 },
-					{ "diffrn_id", 1 } });
+					{ "diffrn_id", 1 },
+					{ "operator", "" },
+					{ "fraction", 0.f } });
 			}
 			else if (iequals(category, "reflns"))
 				cat.emplace({ { "pdbx_ordinal", cat.size() + 1 },
@@ -1327,7 +1338,7 @@ bool Remark3Parser::parse(const std::string &expMethod, PDBRecord *r, cif::datab
 	if (line != "REFINEMENT.")
 	{
 		if (cif::VERBOSE > 0)
-			std::cerr << "Unexpected data in REMARK 3" << std::endl;
+			std::cerr << "Unexpected data in REMARK 3\n";
 		return false;
 	}

@@ -1339,7 +1350,7 @@ bool Remark3Parser::parse(const std::string &expMethod, PDBRecord *r, cif::datab
 	if (not std::regex_match(line, m, rxp))
 	{
 		if (cif::VERBOSE > 0)
-			std::cerr << "Expected valid PROGRAM line in REMARK 3" << std::endl;
+			std::cerr << "Expected valid PROGRAM line in REMARK 3\n";
 		return false;
 	}

@@ -1378,13 +1389,13 @@ bool Remark3Parser::parse(const std::string &expMethod, PDBRecord *r, cif::datab
 		catch (const std::exception &e)
 		{
 			if (cif::VERBOSE >= 0)
-				std::cerr << "Error parsing REMARK 3 with " << parser->program() << std::endl
+				std::cerr << "Error parsing REMARK 3 with " << parser->program() << '\n'
 						  << e.what() << '\n';
 			score = 0;
 		}

 		if (cif::VERBOSE >= 2)
-			std::cerr << "Score for " << parser->program() << ": " << score << std::endl;
+			std::cerr << "Score for " << parser->program() << ": " << score << '\n';

 		if (score > 0)
 		{
@@ -1420,7 +1431,7 @@ bool Remark3Parser::parse(const std::string &expMethod, PDBRecord *r, cif::datab
 		else if (cif::starts_with(program, "X-PLOR"))
 			tryParser(new XPLOR_Remark3Parser(program, expMethod, r, db));
 		else if (cif::VERBOSE > 0)
-			std::cerr << "Skipping unknown program (" << program << ") in REMARK 3" << std::endl;
+			std::cerr << "Skipping unknown program (" << program << ") in REMARK 3\n";
 	}

 	sort(scores.begin(), scores.end());
@@ -1429,7 +1440,7 @@ bool Remark3Parser::parse(const std::string &expMethod, PDBRecord *r, cif::datab
 	if (guessProgram)
 	{
 		if (cif::VERBOSE > 0)
-			std::cerr << "Unknown or untrusted program in REMARK 3, trying all parsers to see if there is a match" << std::endl;
+			std::cerr << "Unknown or untrusted program in REMARK 3, trying all parsers to see if there is a match\n";

 		tryParser(new BUSTER_TNT_Remark3Parser("BUSTER-TNT", expMethod, r, db));
 		tryParser(new CNS_Remark3Parser("CNS", expMethod, r, db));
@@ -1454,7 +1465,7 @@ bool Remark3Parser::parse(const std::string &expMethod, PDBRecord *r, cif::datab
 		auto &best = scores.front();

 		if (cif::VERBOSE > 0)
-			std::cerr << "Choosing " << best.parser->program() << " version '" << best.parser->version() << "' as refinement program. Score = " << best.score << std::endl;
+			std::cerr << "Choosing " << best.parser->program() << " version '" << best.parser->version() << "' as refinement program. Score = " << best.score << '\n';

 		auto &software = db["software"];
 		std::string program = best.parser->program();
--- a/include/cif++/pdb/pdb2cif_remark_3.hpp
+++ b/include/cif++/pdb/pdb2cif_remark_3.hpp
@@ -26,7 +26,7 @@

 #pragma once

-#include <cif++/pdb/pdb2cif.hpp>
+#include "pdb_record.hpp"

 // --------------------------------------------------------------------

--- a/src/pdb/pdb_record.hpp
+++ b/src/pdb/pdb_record.hpp
@@ -0,0 +1,63 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice, this
+ *    list of conditions and the following disclaimer
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ *    this list of conditions and the following disclaimer in the documentation
+ *    and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+ * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#pragma once
+
+#include "cif++/file.hpp"
+
+/// \file pdb_record.hpp
+
+namespace cif::pdb
+{
+
+// --------------------------------------------------------------------
+
+struct PDBRecord
+{
+	PDBRecord *mNext;
+	uint32_t mLineNr;
+	char mName[11];
+	size_t mVlen;
+	char mValue[1];
+
+	PDBRecord(uint32_t lineNr, const std::string &name, const std::string &value);
+	~PDBRecord();
+
+	void *operator new(size_t);
+	void *operator new(size_t size, size_t vLen);
+
+	void operator delete(void *p);
+	void operator delete(void *p, size_t vLen);
+
+	bool is(const char *name) const;
+
+	char vC(size_t column);
+	std::string vS(size_t columnFirst, size_t columnLast = std::numeric_limits<size_t>::max());
+	int vI(int columnFirst, int columnLast);
+	std::string vF(size_t columnFirst, size_t columnLast);
+};
+
+} // namespace pdbx
--- a/src/pdb/tls.cpp
+++ b/src/pdb/tls.cpp
--- a/src/point.cpp
+++ b/src/point.cpp
@@ -24,7 +24,8 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <cif++/point.hpp>
+#include "cif++/point.hpp"
+#include "cif++/matrix.hpp"

 #include <cassert>
 #include <random>
@@ -32,245 +33,6 @@
 namespace cif
 {

-// --------------------------------------------------------------------
-// We're using expression templates here
-
-template <typename M>
-class MatrixExpression
-{
-  public:
-	uint32_t dim_m() const { return static_cast<const M &>(*this).dim_m(); }
-	uint32_t dim_n() const { return static_cast<const M &>(*this).dim_n(); }
-
-	double &operator()(uint32_t i, uint32_t j)
-	{
-		return static_cast<M &>(*this).operator()(i, j);
-	}
-
-	double operator()(uint32_t i, uint32_t j) const
-	{
-		return static_cast<const M &>(*this).operator()(i, j);
-	}
-};
-
-// --------------------------------------------------------------------
-// matrix is m x n, addressing i,j is 0 <= i < m and 0 <= j < n
-// element m i,j is mapped to [i * n + j] and thus storage is row major
-
-class Matrix : public MatrixExpression<Matrix>
-{
-  public:
-	template <typename M2>
-	Matrix(const MatrixExpression<M2> &m)
-		: m_m(m.dim_m())
-		, m_n(m.dim_n())
-		, m_data(m_m * m_n)
-	{
-		for (uint32_t i = 0; i < m_m; ++i)
-		{
-			for (uint32_t j = 0; j < m_n; ++j)
-				operator()(i, j) = m(i, j);
-		}
-	}
-
-	Matrix(size_t m, size_t n, double v = 0)
-		: m_m(m)
-		, m_n(n)
-		, m_data(m_m * m_n)
-	{
-		std::fill(m_data.begin(), m_data.end(), v);
-	}
-
-	Matrix() = default;
-	Matrix(Matrix &&m) = default;
-	Matrix(const Matrix &m) = default;
-	Matrix &operator=(Matrix &&m) = default;
-	Matrix &operator=(const Matrix &m) = default;
-
-	size_t dim_m() const { return m_m; }
-	size_t dim_n() const { return m_n; }
-
-	double operator()(size_t i, size_t j) const
-	{
-		assert(i < m_m);
-		assert(j < m_n);
-		return m_data[i * m_n + j];
-	}
-
-	double &operator()(size_t i, size_t j)
-	{
-		assert(i < m_m);
-		assert(j < m_n);
-		return m_data[i * m_n + j];
-	}
-
-  private:
-	size_t m_m = 0, m_n = 0;
-	std::vector<double> m_data;
-};
-
-// --------------------------------------------------------------------
-
-class SymmetricMatrix : public MatrixExpression<SymmetricMatrix>
-{
-  public:
-	SymmetricMatrix(uint32_t n, double v = 0)
-		: m_n(n)
-		, m_data((m_n * (m_n + 1)) / 2)
-	{
-		std::fill(m_data.begin(), m_data.end(), v);
-	}
-
-	SymmetricMatrix() = default;
-	SymmetricMatrix(SymmetricMatrix &&m) = default;
-	SymmetricMatrix(const SymmetricMatrix &m) = default;
-	SymmetricMatrix &operator=(SymmetricMatrix &&m) = default;
-	SymmetricMatrix &operator=(const SymmetricMatrix &m) = default;
-
-	uint32_t dim_m() const { return m_n; }
-	uint32_t dim_n() const { return m_n; }
-
-	double operator()(uint32_t i, uint32_t j) const
-	{
-		return i < j
-		           ? m_data[(j * (j + 1)) / 2 + i]
-		           : m_data[(i * (i + 1)) / 2 + j];
-	}
-
-	double &operator()(uint32_t i, uint32_t j)
-	{
-		if (i > j)
-			std::swap(i, j);
-		assert(j < m_n);
-		return m_data[(j * (j + 1)) / 2 + i];
-	}
-
-  private:
-	uint32_t m_n;
-	std::vector<double> m_data;
-};
-
-class IdentityMatrix : public MatrixExpression<IdentityMatrix>
-{
-  public:
-	IdentityMatrix(uint32_t n)
-		: m_n(n)
-	{
-	}
-
-	uint32_t dim_m() const { return m_n; }
-	uint32_t dim_n() const { return m_n; }
-
-	double operator()(uint32_t i, uint32_t j) const
-	{
-		return i == j ? 1 : 0;
-	}
-
-  private:
-	uint32_t m_n;
-};
-
-// --------------------------------------------------------------------
-// matrix functions, implemented as expression templates
-
-template <typename M1, typename M2>
-class MatrixSubtraction : public MatrixExpression<MatrixSubtraction<M1, M2>>
-{
-  public:
-	MatrixSubtraction(const M1 &m1, const M2 &m2)
-		: m_m1(m1)
-		, m_m2(m2)
-	{
-		assert(m_m1.dim_m() == m_m2.dim_m());
-		assert(m_m1.dim_n() == m_m2.dim_n());
-	}
-
-	uint32_t dim_m() const { return m_m1.dim_m(); }
-	uint32_t dim_n() const { return m_m1.dim_n(); }
-
-	double operator()(uint32_t i, uint32_t j) const
-	{
-		return m_m1(i, j) - m_m2(i, j);
-	}
-
-  private:
-	const M1 &m_m1;
-	const M2 &m_m2;
-};
-
-template <typename M1, typename M2>
-MatrixSubtraction<M1, M2> operator-(const MatrixExpression<M1> &m1, const MatrixExpression<M2> &m2)
-{
-	return MatrixSubtraction(*static_cast<const M1 *>(&m1), *static_cast<const M2 *>(&m2));
-}
-
-template <typename M>
-class MatrixMultiplication : public MatrixExpression<MatrixMultiplication<M>>
-{
-  public:
-	MatrixMultiplication(const M &m, double v)
-		: m_m(m)
-		, m_v(v)
-	{
-	}
-
-	uint32_t dim_m() const { return m_m.dim_m(); }
-	uint32_t dim_n() const { return m_m.dim_n(); }
-
-	double operator()(uint32_t i, uint32_t j) const
-	{
-		return m_m(i, j) * m_v;
-	}
-
-  private:
-	const M &m_m;
-	double m_v;
-};
-
-template <typename M>
-MatrixMultiplication<M> operator*(const MatrixExpression<M> &m, double v)
-{
-	return MatrixMultiplication(*static_cast<const M *>(&m), v);
-}
-
-// --------------------------------------------------------------------
-
-template <class M1>
-Matrix Cofactors(const M1 &m)
-{
-	Matrix cf(m.dim_m(), m.dim_m());
-
-	const size_t ixs[4][3] = {
-		{ 1, 2, 3 },
-		{ 0, 2, 3 },
-		{ 0, 1, 3 },
-		{ 0, 1, 2 }
-	};
-
-	for (size_t x = 0; x < 4; ++x)
-	{
-		const size_t *ix = ixs[x];
-
-		for (size_t y = 0; y < 4; ++y)
-		{
-			const size_t *iy = ixs[y];
-
-			cf(x, y) =
-				m(ix[0], iy[0]) * m(ix[1], iy[1]) * m(ix[2], iy[2]) +
-				m(ix[0], iy[1]) * m(ix[1], iy[2]) * m(ix[2], iy[0]) +
-				m(ix[0], iy[2]) * m(ix[1], iy[0]) * m(ix[2], iy[1]) -
-				m(ix[0], iy[2]) * m(ix[1], iy[1]) * m(ix[2], iy[0]) -
-				m(ix[0], iy[1]) * m(ix[1], iy[0]) * m(ix[2], iy[2]) -
-				m(ix[0], iy[0]) * m(ix[1], iy[2]) * m(ix[2], iy[1]);
-
-			if ((x + y) % 2 == 1)
-				cf(x, y) *= -1;
-		}
-	}
-
-	return cf;
-}
-
 // --------------------------------------------------------------------

 template<typename T>
@@ -299,13 +61,14 @@ quaternion_type<T> normalize(quaternion_type<T> q)

 quaternion construct_from_angle_axis(float angle, point axis)
 {
-	auto q = std::cos((angle * kPI / 180) / 2);
-	auto s = std::sqrt(1 - q * q);
+	angle = static_cast<float>((angle * kPI / 180) / 2);
+	auto s = std::sin(angle);
+	auto c = std::cos(angle);

 	axis.normalize();

 	return normalize(quaternion{
-		static_cast<float>(q),
+		static_cast<float>(c),
 		static_cast<float>(s * axis.m_x),
 		static_cast<float>(s * axis.m_y),
 		static_cast<float>(s * axis.m_z) });
@@ -356,7 +119,7 @@ point center_points(std::vector<point> &Points)
 }

 quaternion construct_for_dihedral_angle(point p1, point p2, point p3, point p4,
-	float angle, float esd)
+	float angle, float /*esd*/)
 {
 	p1 -= p3;
 	p2 -= p3;
@@ -364,33 +127,10 @@ quaternion construct_for_dihedral_angle(point p1, point p2, point p3, point p4,
 	p3 -= p3;

 	quaternion q;
-	auto axis = p2;
+	auto axis = -p2;

 	float dh = dihedral_angle(p1, p2, p3, p4);
-	for (int iteration = 0; iteration < 100; ++iteration)
-	{
-		float delta = std::fmod(angle - dh, 360.0f);
-
-		if (delta < -180)
-			delta += 360;
-		if (delta > 180)
-			delta -= 360;
-
-		if (std::abs(delta) < esd)
-			break;
-
-		// if (iteration > 0)
-		// 	std::cout << cif::coloured(("iteration " + std::to_string(iteration)).c_str(), cif::scBLUE, cif::scBLACK) << " delta: " << delta << std::endl;
-
-		auto q2 = construct_from_angle_axis(delta, axis);
-		q = iteration == 0 ? q2 : q * q2;
-			
-		p4.rotate(q2);
-
-		dh = dihedral_angle(p1, p2, p3, p4);
-	}
-
-	return q;
+	return construct_from_angle_axis(angle - dh, axis);
 }

 point centroid(const std::vector<point> &pts)
@@ -465,8 +205,8 @@ double LargestDepressedQuarticSolution(double a, double b, double c)

 quaternion align_points(const std::vector<point> &pa, const std::vector<point> &pb)
 {
-	// First calculate M, a 3x3 Matrix containing the sums of products of the coordinates of A and B
-	Matrix M(3, 3, 0);
+	// First calculate M, a 3x3 matrix containing the sums of products of the coordinates of A and B
+	matrix3x3<double> M;

 	for (uint32_t i = 0; i < pa.size(); ++i)
 	{
@@ -484,8 +224,8 @@ quaternion align_points(const std::vector<point> &pa, const std::vector<point> &
 		M(2, 2) += a.m_z * b.m_z;
 	}

-	// Now calculate N, a symmetric 4x4 Matrix
-	SymmetricMatrix N(4);
+	// Now calculate N, a symmetric 4x4 matrix
+	symmetric_matrix4x4<double> N(4);

 	N(0, 0) = M(0, 0) + M(1, 1) + M(2, 2);
 	N(0, 1) = M(1, 2) - M(2, 1);
@@ -534,16 +274,22 @@ quaternion align_points(const std::vector<point> &pa, const std::vector<point> &
 	double lambda = LargestDepressedQuarticSolution(C, D, E);

 	// calculate t = (N - λI)
-	Matrix t = N - IdentityMatrix(4) * lambda;
+	matrix<double> t(N - identity_matrix(4) * lambda);

-	// calculate a Matrix of cofactors for t
-	Matrix cf = Cofactors(t);
+	// calculate a matrix of cofactors for t
+	auto cf = matrix_cofactors(t);

 	int maxR = 0;
+	double maxCF = std::abs(cf(0, 0));
+
 	for (int r = 1; r < 4; ++r)
 	{
-		if (std::abs(cf(r, 0)) > std::abs(cf(maxR, 0)))
+		auto cfr = std::abs(cf(r, 0));
+		if (maxCF < cfr)
+		{
+			maxCF = cfr;
 			maxR = r;
+		}
 	}

 	quaternion q(
--- a/src/row.cpp
+++ b/src/row.cpp
@@ -24,7 +24,7 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <cif++/category.hpp>
+#include "cif++/category.hpp"

 namespace cif
 {
--- a/src/symmetry.cpp
+++ b/src/symmetry.cpp
@@ -1,17 +1,17 @@
 /*-
 * SPDX-License-Identifier: BSD-2-Clause
- * 
+ *
 * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
- * 
+ *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
- * 
+ *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
- * 
+ *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -24,23 +24,317 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <cif++/symmetry.hpp>
+#include "cif++/symmetry.hpp"
+#include "cif++/datablock.hpp"
+#include "cif++/point.hpp"

 #include <stdexcept>

-#include "./symop_table_data.hpp"
+#include "symop_table_data.hpp"
+
+#include <Eigen/Eigenvalues>

 namespace cif
 {

 // --------------------------------------------------------------------
-// Unfortunately, clipper has a different numbering scheme than PDB
-// for rotation numbers. So we created a table to map those.
-// Perhaps a bit over the top, but hey....
+
+cell::cell(float a, float b, float c, float alpha, float beta, float gamma)
+	: m_a(a)
+	, m_b(b)
+	, m_c(c)
+	, m_alpha(alpha)
+	, m_beta(beta)
+	, m_gamma(gamma)
+{
+	init();
+}
+
+cell::cell(const datablock &db)
+{
+	auto &_cell = db["cell"];
+
+	tie(m_a, m_b, m_c, m_alpha, m_beta, m_gamma) =
+		_cell.front().get("length_a", "length_b", "length_c", "angle_alpha", "angle_beta", "angle_gamma");
+
+	init();
+}
+
+void cell::init()
+{
+	auto alpha = (m_alpha * kPI) / 180;
+	auto beta = (m_beta * kPI) / 180;
+	auto gamma = (m_gamma * kPI) / 180;
+
+	auto alpha_star = std::acos((std::cos(gamma) * std::cos(beta) - std::cos(alpha)) / (std::sin(beta) * std::sin(gamma)));
+
+	m_orthogonal = identity_matrix(3);
+
+	m_orthogonal(0, 0) = static_cast<float>(m_a);
+	m_orthogonal(0, 1) = static_cast<float>(m_b * std::cos(gamma));
+	m_orthogonal(0, 2) = static_cast<float>(m_c * std::cos(beta));
+	m_orthogonal(1, 1) = static_cast<float>(m_b * std::sin(gamma));
+	m_orthogonal(1, 2) = static_cast<float>(-m_c * std::sin(beta) * std::cos(alpha_star));
+	m_orthogonal(2, 2) = static_cast<float>(m_c * std::sin(beta) * std::sin(alpha_star));
+
+	m_fractional = inverse(m_orthogonal);
+}
+
+float cell::get_volume() const
+{
+	auto alpha = (m_alpha * kPI) / 180;
+	auto beta = (m_beta * kPI) / 180;
+	auto gamma = (m_gamma * kPI) / 180;
+
+	auto cos_alpha = std::cos(alpha);
+	auto cos_beta = std::cos(beta);
+	auto cos_gamma = std::cos(gamma);
+
+	auto vol = m_a * m_b * m_c;
+	vol *= std::sqrt(1.0f - cos_alpha * cos_alpha - cos_beta * cos_beta - cos_gamma * cos_gamma + 2.0f * cos_alpha * cos_beta * cos_gamma);
+
+	return vol;
+}

 // --------------------------------------------------------------------

-int get_space_group_number(std::string spacegroup)
+sym_op::sym_op(std::string_view s)
+{
+	auto b = s.data();
+	auto e = b + s.length();
+
+	int rnri = 256;	// default to unexisting number
+	auto r = std::from_chars(b, e, rnri);
+	
+	m_nr = static_cast<uint8_t>(rnri);
+	m_ta = r.ptr[1] - '0';
+	m_tb = r.ptr[2] - '0';
+	m_tc = r.ptr[3] - '0';
+
+	if (r.ec != std::errc() or rnri > 192 or r.ptr[0] != '_' or m_ta > 9 or m_tb > 9 or m_tc > 9)
+		throw std::invalid_argument("Could not convert string into sym_op");
+}
+
+std::string sym_op::string() const
+{
+	char b[9];
+	auto r = std::to_chars(b, b + sizeof(b), m_nr);
+	if (r.ec != std::errc() or r.ptr > b + 4)
+		throw std::runtime_error("Could not write out symmetry operation to string");
+	
+	*r.ptr++ = '_';
+	*r.ptr++ = '0' + m_ta;
+	*r.ptr++ = '0' + m_tb;
+	*r.ptr++ = '0' + m_tc;
+	*r.ptr = 0;
+
+	return { b, static_cast<size_t>(r.ptr - b) };
+}
+
+// --------------------------------------------------------------------
+
+transformation::transformation(const symop_data &data)
+{
+	const auto &d = data.data();
+
+	m_rotation(0, 0) = static_cast<float>(d[0]);
+	m_rotation(0, 1) = static_cast<float>(d[1]);
+	m_rotation(0, 2) = static_cast<float>(d[2]);
+	m_rotation(1, 0) = static_cast<float>(d[3]);
+	m_rotation(1, 1) = static_cast<float>(d[4]);
+	m_rotation(1, 2) = static_cast<float>(d[5]);
+	m_rotation(2, 0) = static_cast<float>(d[6]);
+	m_rotation(2, 1) = static_cast<float>(d[7]);
+	m_rotation(2, 2) = static_cast<float>(d[8]);
+
+	try_create_quaternion();
+
+	m_translation.m_x = static_cast<float>(d[9] == 0 ? 0 : 1.0 * d[9] / d[10]);
+	m_translation.m_y = static_cast<float>(d[11] == 0 ? 0 : 1.0 * d[11] / d[12]);
+	m_translation.m_z = static_cast<float>(d[13] == 0 ? 0 : 1.0 * d[13] / d[14]);
+}
+
+transformation::transformation(const matrix3x3<float> &r, const cif::point &t)
+	: m_rotation(r)
+	, m_translation(t)
+{
+	try_create_quaternion();
+}
+
+void transformation::try_create_quaternion()
+{
+	float Qxx = m_rotation(0, 0);
+	float Qxy = m_rotation(0, 1);
+	float Qxz = m_rotation(0, 2);
+	float Qyx = m_rotation(1, 0);
+	float Qyy = m_rotation(1, 1);
+	float Qyz = m_rotation(1, 2);
+	float Qzx = m_rotation(2, 0);
+	float Qzy = m_rotation(2, 1);
+	float Qzz = m_rotation(2, 2);
+
+	Eigen::Matrix4f em;
+
+	em << Qxx - Qyy - Qzz, Qyx + Qxy, Qzx + Qxz, Qzy - Qyz,
+			Qyx + Qxy, Qyy - Qxx - Qzz, Qzy + Qyz, Qxz - Qzx,
+			Qzx + Qxz, Qzy + Qyz, Qzz - Qxx - Qyy, Qyx - Qxy,
+			Qzy - Qyz, Qxz - Qzx, Qyx - Qxy, Qxx + Qyy + Qzz;
+
+	Eigen::EigenSolver<Eigen::Matrix4f> es(em / 3);
+
+	auto ev = es.eigenvalues();
+
+	for (size_t j = 0; j < 4; ++j)
+	{
+		if (std::abs(ev[j].real() - 1) > 0.01)
+			continue;
+		
+		auto col = es.eigenvectors().col(j);
+
+		m_q = normalize(cif::quaternion{
+			static_cast<float>(col(3).real()),
+			static_cast<float>(col(0).real()),
+			static_cast<float>(col(1).real()),
+			static_cast<float>(col(2).real()) });
+
+		break;
+	}
+}
+
+transformation operator*(const transformation &lhs, const transformation &rhs)
+{
+	auto r = lhs.m_rotation * rhs.m_rotation;
+	auto t = lhs.m_rotation * rhs.m_translation;
+	t = t + lhs.m_translation;
+
+	return transformation(r, t);
+}
+
+transformation inverse(const transformation &t)
+{
+	auto inv_matrix = inverse(t.m_rotation);
+	return { inv_matrix, -(inv_matrix * t.m_translation) };
+}
+
+// --------------------------------------------------------------------
+
+spacegroup::spacegroup(int nr)
+	: m_nr(nr)
+{
+	const size_t N = kSymopNrTableSize;
+	int32_t L = 0, R = static_cast<int32_t>(N - 1);
+	while (L <= R)
+	{
+		int32_t i = (L + R) / 2;
+		if (kSymopNrTable[i].spacegroup() < m_nr)
+			L = i + 1;
+		else
+			R = i - 1;
+	}
+
+	m_index = L;
+
+	for (size_t i = L; i < N and kSymopNrTable[i].spacegroup() == m_nr; ++i)
+		emplace_back(kSymopNrTable[i].symop().data());
+}
+
+std::string spacegroup::get_name() const
+{
+	for (auto &s : kSpaceGroups)
+	{
+		if (s.nr == m_nr)
+			return s.name;
+	}
+
+	throw std::runtime_error("Spacegroup has an invalid number: " + std::to_string(m_nr));
+}
+
+point offsetToOrigin(const cell &c, const point &p)
+{
+	point d{};
+
+	while (p.m_x + d.m_x < -(c.get_a()))
+		d.m_x += c.get_a();
+	while (p.m_x + d.m_x > (c.get_a()))
+		d.m_x -= c.get_a();
+
+	while (p.m_y + d.m_y < -(c.get_b()))
+		d.m_y += c.get_b();
+	while (p.m_y + d.m_y > (c.get_b()))
+		d.m_y -= c.get_b();
+
+	while (p.m_z + d.m_z < -(c.get_c()))
+		d.m_z += c.get_c();
+	while (p.m_z + d.m_z > (c.get_c()))
+		d.m_z -= c.get_c();
+
+	return d;
+};
+
+point offsetToOriginFractional(const point &p)
+{
+	point d{};
+
+	while (p.m_x + d.m_x < -0.5f)
+		d.m_x += 1;
+	while (p.m_x + d.m_x > 0.5f)
+		d.m_x -= 1;
+
+	while (p.m_y + d.m_y < -0.5f)
+		d.m_y += 1;
+	while (p.m_y + d.m_y > 0.5f)
+		d.m_y -= 1;
+
+	while (p.m_z + d.m_z < -0.5f)
+		d.m_z += 1;
+	while (p.m_z + d.m_z > 0.5f)
+		d.m_z -= 1;
+
+	return d;
+};
+
+point spacegroup::operator()(const point &pt, const cell &c, sym_op symop) const
+{
+	if (symop.m_nr < 1 or symop.m_nr > size())
+		throw std::out_of_range("symmetry operator number out of range");
+	
+	transformation t = at(symop.m_nr - 1);
+
+	t.m_translation.m_x += symop.m_ta - 5;
+	t.m_translation.m_y += symop.m_tb - 5;
+	t.m_translation.m_z += symop.m_tc - 5;
+
+	auto fpt = fractional(pt, c);
+	auto o = offsetToOriginFractional(fpt);
+
+	auto spt = t(fpt + o) - o;
+
+	return orthogonal(spt, c);
+}
+
+point spacegroup::inverse(const point &pt, const cell &c, sym_op symop) const
+{
+	if (symop.m_nr < 1 or symop.m_nr > size())
+		throw std::out_of_range("symmetry operator number out of range");
+	
+	transformation t = at(symop.m_nr - 1);
+
+	t.m_translation.m_x += symop.m_ta - 5;
+	t.m_translation.m_y += symop.m_tb - 5;
+	t.m_translation.m_z += symop.m_tc - 5;
+
+	auto fpt = fractional(pt, c);
+	auto o = offsetToOriginFractional(fpt);
+
+	auto it = cif::inverse(t);
+	auto spt = it(fpt + o) - o;
+
+	return orthogonal(spt, c);
+}
+
+// --------------------------------------------------------------------
+
+int get_space_group_number(std::string_view spacegroup)
 {
 	if (spacegroup == "P 21 21 2 A")
 		spacegroup = "P 21 21 2 (a)";
@@ -73,7 +367,7 @@ int get_space_group_number(std::string spacegroup)
 	{
 		for (size_t i = 0; i < kNrOfSpaceGroups; ++i)
 		{
-			auto& sp = kSpaceGroups[i];
+			auto &sp = kSpaceGroups[i];
 			if (sp.xHM == spacegroup)
 			{
 				result = sp.nr;
@@ -83,14 +377,14 @@ int get_space_group_number(std::string spacegroup)
 	}

 	if (result == 0)
-		throw std::runtime_error("Spacegroup name " + spacegroup + " was not found in table");
-	
+		throw std::runtime_error("Spacegroup name " + std::string(spacegroup) + " was not found in table");
+
 	return result;
 }

 // --------------------------------------------------------------------

-int get_space_group_number(std::string spacegroup, space_group_name type)
+int get_space_group_number(std::string_view spacegroup, space_group_name type)
 {
 	if (spacegroup == "P 21 21 2 A")
 		spacegroup = "P 21 21 2 (a)";
@@ -145,9 +439,99 @@ int get_space_group_number(std::string spacegroup, space_group_name type)

 	// not found, see if we can find a match based on xHM name
 	if (result == 0)
-		throw std::runtime_error("Spacegroup name " + spacegroup + " was not found in table");
-	
+		throw std::runtime_error("Spacegroup name " + std::string(spacegroup) + " was not found in table");
+
 	return result;
 }

+int get_space_group_number(const datablock &db)
+{
+	auto &_symmetry = db["symmetry"];
+
+	if (_symmetry.size() != 1)
+		throw std::runtime_error("Could not find a unique symmetry in this mmCIF file");
+	
+	return _symmetry.front().get<int>("Int_Tables_number");
 }
+
+// --------------------------------------------------------------------
+
+std::tuple<float,point,sym_op> crystal::closest_symmetry_copy(point a, point b) const
+{
+	if (m_cell.get_a() == 0 or m_cell.get_b() == 0 or m_cell.get_c() == 0)
+		throw std::runtime_error("Invalid cell, contains a dimension that is zero");
+
+	point result_fsb;
+	float result_d = std::numeric_limits<float>::max();
+	sym_op result_s;
+
+	auto fa = fractional(a, m_cell);
+	auto fb = fractional(b, m_cell);
+
+	auto o = offsetToOriginFractional(fa);
+
+	fa = fa + o;
+	fb = fb + o;
+
+	a = orthogonal(fa, m_cell);
+
+	for (size_t i = 0; i < m_spacegroup.size(); ++i)
+	{
+		sym_op s(static_cast<uint8_t>(i + 1));
+		auto &t = m_spacegroup[i];
+
+		auto fsb = t(fb);
+
+		while (fsb.m_x - 0.5f > fa.m_x)
+		{
+			fsb.m_x -= 1;
+			s.m_ta -= 1;
+		}
+
+		while (fsb.m_x + 0.5f < fa.m_x)
+		{
+			fsb.m_x += 1;
+			s.m_ta += 1;			
+		}
+
+		while (fsb.m_y - 0.5f > fa.m_y)
+		{
+			fsb.m_y -= 1;
+			s.m_tb -= 1;
+		}
+
+		while (fsb.m_y + 0.5f < fa.m_y)
+		{
+			fsb.m_y += 1;
+			s.m_tb += 1;			
+		}
+
+		while (fsb.m_z - 0.5f > fa.m_z)
+		{
+			fsb.m_z -= 1;
+			s.m_tc -= 1;
+		}
+
+		while (fsb.m_z + 0.5f < fa.m_z)
+		{
+			fsb.m_z += 1;
+			s.m_tc += 1;			
+		}
+
+		auto p = orthogonal(fsb, m_cell);
+		auto dsq = distance_squared(a, p);
+
+		if (result_d > dsq)
+		{
+			result_d = dsq;
+			result_fsb = fsb;
+			result_s = s;
+		}
+	}
+
+	auto p = orthogonal(result_fsb - o, m_cell);
+
+	return { std::sqrt(result_d), p, result_s };
+}
+
+} // namespace cif
--- a/src/symop-map-generator.cpp
+++ b/src/symop-map-generator.cpp
@@ -27,6 +27,7 @@
 #include <cassert>

 #include <array>
+#include <charconv>
 #include <iostream>
 #include <iomanip>
 #include <fstream>
@@ -169,7 +170,7 @@ class SymopParser
 	}

 	Token m_lookahead;
-	int m_nr;
+	int m_nr = -1;

 	std::string m_s;
 	std::string::const_iterator m_p, m_e;
@@ -230,14 +231,15 @@ int main(int argc, char* const argv[])

 	try
 	{
-		if (argc != 3)
+		if (argc != 4)
 		{
-			std::cerr << "Usage symop-map-generator <input-file> <output-file>" << std::endl;
+			std::cerr << "Usage symop-map-generator <syminfo.lib-file> <symop.lib-file> < <output-file>\n";
 			exit(1);
 		}

-		fs::path input(argv[1]);
-		fs::path output(argv[2]);
+		fs::path syminfolib(argv[1]);
+		fs::path symoplib(argv[2]);
+		fs::path output(argv[3]);
 		
 		tmpFile = output.parent_path() / (output.filename().string() + ".tmp");

@@ -261,22 +263,51 @@ int main(int argc, char* const argv[])
 		};

 		std::map<int,SymInfoBlock> symInfo;
-		int symopnr, mysymnr = 10000;

-		std::ifstream file(input);
+		std::ifstream file(symoplib);
+		if (not file.is_open())
+			throw std::runtime_error("Could not open symop.lib file");
+
+		std::string line;
+		int sgnr = 0;
+		int rnr = 0;
+
+		while (getline(file, line))
+		{
+			if (line.empty())
+				continue;
+			
+			if (std::isdigit(line[0]))	// start of new spacegroup
+			{
+				auto r = std::from_chars(line.data(), line.data() + line.length(), sgnr);
+				if (r.ec != std::errc())
+					throw std::runtime_error("Error parsing symop.lib file");
+				rnr = 1;
+				continue;
+			}
+
+			if (not std::isspace(line[0]) or sgnr == 0)
+				throw std::runtime_error("Error parsing symop.lib file");
+			
+			SymopParser p;
+			data.emplace_back(sgnr, rnr, p.parse(line));
+			++rnr;
+		}
+
+		file.close();
+
+		file.open(syminfolib);
 		if (not file.is_open())
 			throw std::runtime_error("Could not open syminfo.lib file");

 		enum class State { skip, spacegroup } state = State::skip;

-		std::string line;
-
 		const std::regex rx(R"(^symbol +(Hall|xHM|old) +'(.+?)'(?: +'(.+?)')?$)"),
 			rx2(R"(symbol ccp4 (\d+))");;

 		SymInfoBlock cur = {};

-		std::vector<std::array<int,15>> symops, cenops;
+		// std::vector<std::array<int,15>> symops, cenops;

 		while (getline(file, line))
 		{
@@ -286,9 +317,7 @@ int main(int argc, char* const argv[])
 					if (line == "begin_spacegroup")
 					{
 						state = State::spacegroup;
-						symopnr = 1;
-						++mysymnr;
-						cur = { mysymnr };
+						cur = {};
 					}
 					break;
 				
@@ -314,34 +343,34 @@ int main(int argc, char* const argv[])
 						if (nr != 0)
 							cur.nr = nr;
 					}
-					else if (line.compare(0, 6, "symop ") == 0)
-					{
-						SymopParser p;
-						symops.emplace_back(p.parse(line.substr(6)));
-					}
-					else if (line.compare(0, 6, "cenop ") == 0)
-					{
-						SymopParser p;
-						cenops.emplace_back(p.parse(line.substr(6)));
-					}
+					// else if (line.compare(0, 6, "symop ") == 0)
+					// {
+					// 	SymopParser p;
+					// 	symops.emplace_back(p.parse(line.substr(6)));
+					// }
+					// else if (line.compare(0, 6, "cenop ") == 0)
+					// {
+					// 	SymopParser p;
+					// 	cenops.emplace_back(p.parse(line.substr(6)));
+					// }
 					else if (line == "end_spacegroup")
 					{
-						for (auto& cenop: cenops)
-						{
-							for (auto symop: symops)
-							{
-								symop = move_symop(symop, cenop);
+					// 	for (auto& cenop: cenops)
+					// 	{
+					// 		for (auto symop: symops)
+					// 		{
+					// 			symop = move_symop(symop, cenop);

-								data.emplace_back(cur.nr, symopnr, symop);
-								++symopnr;
-							}
-						}
+					// 			data.emplace_back(cur.nr, symopnr, symop);
+					// 			++symopnr;
+					// 		}
+					// 	}

 						symInfo.emplace(cur.nr, cur);
 						state = State::skip;

-						symops.clear();
-						cenops.clear();
+					// 	symops.clear();
+					// 	cenops.clear();
 					}
 					break;
 				}
@@ -358,7 +387,7 @@ int main(int argc, char* const argv[])
 // and $CLIBD/syminfo.lib using symop-map-generator,
 // part of the PDB-REDO suite of programs.

-#include <cif++/symmetry.hpp>
+#include "cif++/symmetry.hpp"

 namespace cif
 {
@@ -383,15 +412,15 @@ const space_group kSpaceGroups[] =
 			old = '"' + old + '"' + std::string(20 - old.length(), ' ');
 			xHM = '"' + xHM + '"' + std::string(30 - xHM.length(), ' ');

-			for (std::string::size_type p = Hall.length(); p > 0; --p)
+			for (auto p = Hall.begin(); p != Hall.end(); ++p)
 			{
-				if (Hall[p - 1] == '"')
-					Hall.insert(p - 1, "\\", 1);
+				if (*p == '"')
+					p = Hall.insert(p, '\\') + 1;
 			}

 			Hall = '"' + Hall + '"' + std::string(40 - Hall.length(), ' ');

-			out << "\t{ " << old << ", " << xHM << ", " << Hall << ", " << nr << " }," << std::endl;
+			out << "\t{ " << old << ", " << xHM << ", " << Hall << ", " << nr << " },\n";
 		}

 out << R"(
@@ -400,7 +429,7 @@ out << R"(
 const size_t kNrOfSpaceGroups = sizeof(kSpaceGroups) / sizeof(space_group);

 const symop_datablock kSymopNrTable[] = {
-)" << std::endl;
+)";

 		int spacegroupNr = 0;
 		for (auto& sd: data)
@@ -409,14 +438,14 @@ const symop_datablock kSymopNrTable[] = {
 			std::tie(sp, o, std::ignore) = sd;

 			if (sp > spacegroupNr)
-				out << "    // " << symInfo[sp].xHM << std::endl;
+				out << "    // " << symInfo[sp].xHM << '\n';
 			spacegroupNr = sp;

 			out << "    { " << std::setw(3) << sp
 					<< ", " << std::setw(3) << o << ", { ";
 			for (auto& i: std::get<2>(sd))
 				out << std::setw(2) << i << ',';
-			out << " } }," << std::endl;
+			out << " } },\n";
 		}

 		out << R"(};
@@ -424,16 +453,16 @@ const symop_datablock kSymopNrTable[] = {
 const size_t kSymopNrTableSize = sizeof(kSymopNrTable) / sizeof(symop_datablock);

 } // namespace mmcif
-)" << std::endl;
+)";

 		out.close();
 		fs::rename(tmpFile, output);
 	}
 	catch (const std::exception& ex)
 	{
-		std::cerr << std::endl
-			 << "Program terminated due to error:" << std::endl
-			 << ex.what() << std::endl;
+		std::cerr << '\n'
+			 << "Program terminated due to error:\n"
+			 << ex.what() << '\n';
 	}
 	
 	return 0;
--- a/src/symop_table_data.hpp
+++ b/src/symop_table_data.hpp
--- a/src/text.cpp
+++ b/src/text.cpp
@@ -24,7 +24,7 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <cif++/text.hpp>
+#include "cif++/text.hpp"

 #include <algorithm>
 #include <cassert>
@@ -236,28 +236,19 @@ std::string cif_id_for_number(int number)
 {
 	std::string result;

-	if (number >= 26 * 26 * 26)
-		result = 'L' + std::to_string(number);
-	else
+	do
 	{
-		if (number >= 26 * 26)
-		{
-			int v = number / (26 * 26);
-			result += char('A' - 1 + v);
-			number %= (26 * 26);
-		}
+		int r = number % 26;
+		result += static_cast<char>('A' + r);

-		if (number >= 26)
-		{
-			int v = number / 26;
-			result += char('A' - 1 + v);
-			number %= 26;
-		}
-
-		result += char('A' + number);
+		number = (number - r) / 26 - 1;
 	}
+	while (number >= 0);
+
+	std::reverse(result.begin(), result.end());

 	assert(not result.empty());
+
 	return result;
 }

--- a/src/utilities.cpp
+++ b/src/utilities.cpp
@@ -24,12 +24,14 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <cif++/utilities.hpp>
+#include "cif++/utilities.hpp"
+
 #include "revision.hpp"

 #include <atomic>
 #include <cassert>
 #include <cmath>
+#include <condition_variable>
 #include <cstring>
 #include <deque>
 #include <fstream>
@@ -38,15 +40,9 @@
 #include <iostream>
 #include <map>
 #include <mutex>
-#include <regex>
 #include <sstream>
 #include <thread>

-#if not defined(_MSC_VER)
-#include <sys/ioctl.h>
-#include <termios.h>
-#endif
-
 namespace fs = std::filesystem;

 // --------------------------------------------------------------------
@@ -67,7 +63,7 @@ std::string get_version_nr()

 // --------------------------------------------------------------------

-#ifdef _MSC_VER
+#ifdef _WIN32
 }
 #include <Windows.h>
 #include <libloaderapi.h>
@@ -85,25 +81,10 @@ uint32_t get_terminal_width()
    return csbi.srWindow.Right - csbi.srWindow.Left + 1;
 }

-std::string GetExecutablePath()
-{
-	WCHAR buffer[4096];
-
-	DWORD n = ::GetModuleFileNameW(nullptr, buffer, sizeof(buffer) / sizeof(WCHAR));
-	if (n == 0)
-		throw std::runtime_error("could not get exe path");
-
-	std::wstring ws(buffer);
-
-	// convert from utf16 to utf8
-	std::wstring_convert<std::codecvt_utf8<wchar_t>> conv1;
-	std::string u8str = conv1.to_bytes(ws);
-
-	return u8str;
-}
-
 #else

+#include <sys/ioctl.h>
+#include <termios.h>
 #include <limits.h>

 uint32_t get_terminal_width()
@@ -119,56 +100,59 @@ uint32_t get_terminal_width()
 	return result;
 }

-std::string get_executable_path()
-{
-	using namespace std::literals;
-
-	// This used to be PATH_MAX, but lets simply assume 1024 is enough...
-	char path[1024] = "";
-	if (readlink("/proc/self/exe", path, sizeof(path)) == -1)
-		throw std::runtime_error("could not get exe path "s + strerror(errno));
-	return {path};
-}
-
 #endif

 // --------------------------------------------------------------------

-struct ProgressImpl
+struct progress_bar_impl
 {
-	ProgressImpl(int64_t inMax, const std::string &inAction)
-		: mMax(inMax)
-		, mConsumed(0)
-		, mAction(inAction)
-		, mMessage(inAction)
-		, mThread(std::bind(&ProgressImpl::Run, this))
+	progress_bar_impl(int64_t inMax, const std::string &inAction)
+		: m_max_value(inMax)
+		, m_consumed(0)
+		, m_action(inAction)
+		, m_message(inAction)
+		, m_thread(std::bind(&progress_bar_impl::run, this))
 	{
 	}

-	void Run();
-	void Stop()
-	{
-		mStop = true;
-		if (mThread.joinable())
-			mThread.join();
-	}
+	progress_bar_impl(const progress_bar_impl&) = delete;
+	progress_bar_impl &operator=(const progress_bar_impl &) = delete;

-	void PrintProgress();
-	void PrintDone();
+	~progress_bar_impl();

-	int64_t mMax;
-	std::atomic<int64_t> mConsumed;
-	int64_t mLastConsumed = 0;
-	int mSpinnerIndex = 0;
-	std::string mAction, mMessage;
-	std::mutex mMutex;
-	std::thread mThread;
-	std::chrono::time_point<std::chrono::system_clock>
-		mStart = std::chrono::system_clock::now();
-	bool mStop = false;
+	void run();
+
+	void consumed(int64_t n);
+	void progress(int64_t p);
+	void message(const std::string &msg);
+
+	void print_progress();
+	void print_done();
+
+	using time_point = std::chrono::time_point<std::chrono::system_clock>;
+
+	int64_t m_max_value;
+	std::atomic<int64_t> m_consumed;
+	int64_t m_last_consumed = 0;
+	int m_spinner_index = 0;
+	std::string m_action, m_message;
+	std::mutex m_mutex;
+	std::thread m_thread;
+	time_point m_start = std::chrono::system_clock::now();
+	time_point m_last = std::chrono::system_clock::now();
+	bool m_stop = false;
 };

-void ProgressImpl::Run()
+progress_bar_impl::~progress_bar_impl()
+{
+	using namespace std::literals;
+	assert(m_thread.joinable());
+
+	m_stop = true;
+	m_thread.join();
+}
+
+void progress_bar_impl::run()
 {
 	using namespace std::literals;

@@ -176,22 +160,25 @@ void ProgressImpl::Run()

 	try
 	{
-		for (;;)
+		while (not m_stop)
 		{
-			std::this_thread::sleep_for(2s);
+			auto now = std::chrono::system_clock::now();

-			std::unique_lock lock(mMutex);
-
-			if (mStop or mConsumed == mMax)
-				break;
-
-			auto elapsed = std::chrono::system_clock::now() - mStart;
-
-			if (elapsed < std::chrono::seconds(5))
+			if (now - m_start < 2s or now - m_last < 100ms)
+			{
+				std::this_thread::sleep_for(10ms);
 				continue;
+			}
+
+			std::lock_guard lock(m_mutex);
+
+			if (not printedAny and isatty(STDOUT_FILENO))
+				std::cout << "\x1b[?25l";
+
+			print_progress();

-			PrintProgress();
 			printedAny = true;
+			m_last = std::chrono::system_clock::now();
 		}
 	}
 	catch (...)
@@ -199,93 +186,98 @@ void ProgressImpl::Run()
 	}

 	if (printedAny)
-		PrintDone();
+	{
+		print_done();
+		if (isatty(STDOUT_FILENO))
+			std::cout << "\x1b[?25h";
+	}
 }

-void ProgressImpl::PrintProgress()
+void progress_bar_impl::consumed(int64_t n)
 {
-	//	const char* kBlocks[] = {
-	//		" ",				// 0
-	//		u8"\u258F",			// 1
-	//		u8"\u258E",			// 2
-	//		u8"\u258D",			// 3
-	//		u8"\u258C",			// 4
-	//		u8"\u258B",			// 5
-	//		u8"\u258A",			// 6
-	//		u8"\u2589",			// 7
-	//		u8"\u2588",			// 8
-	//	};
+	m_consumed += n;
+}

+void progress_bar_impl::progress(int64_t p)
+{
+	m_consumed = p;
+}
+
+void progress_bar_impl::message(const std::string &msg)
+{
+	std::unique_lock lock(m_mutex);
+	m_message = msg;
+}
+
+const char* kSpinner[] = {
+	// ".", "o", "O", "0", "O", "o", ".", " "
+	// "⢄", "⢂", "⢁", "⡁", "⡈", "⡐", "⡠"
+	 ".", "o", "O", "0", "@", "*", " "
+};
+
+const size_t kSpinnerCount = sizeof(kSpinner) / sizeof(char*);
+
+const int kSpinnerTimeInterval = 100;
+
+const uint32_t kMinBarWidth = 40, kMinMsgWidth = 12;
+
+void progress_bar_impl::print_progress()
+{
 	const char *kBlocks[] = {
-		" ", // 0
-		" ", // 1
-		" ", // 2
-		"-", // 3
-		"-", // 4
-		"-", // 5
-		"=", // 6
-		"=", // 7
-		"=", // 8
+		// "▯", // 0
+		// "▮", // 1
+		"=",
+		"-"
 	};

 	uint32_t width = get_terminal_width();

-	std::string msg;
-	msg.reserve(width + 1);
-	if (mMessage.length() <= 20)
-	{
-		msg = mMessage;
-		if (msg.length() < 20)
-			msg.append(20 - msg.length(), ' ');
-	}
+	float progress = static_cast<float>(m_consumed) / m_max_value;
+	
+	if (width < kMinBarWidth)
+		std::cout << (100 * progress) << '%' << std::endl;
 	else
-		msg = mMessage.substr(0, 17) + "...";
-
-	msg += " |";
-
-	int64_t consumed = mConsumed;
-	float progress = static_cast<float>(consumed) / mMax;
-	int pi = static_cast<int>(std::ceil(progress * 33 * 8));
-	//	int tw = width - 28;
-	//	int twd = static_cast<int>(tw * progress + 0.5f);
-	//	msg.append(twd, '=');
-	//	msg.append(tw - twd, ' ');
-
-	for (int i = 0; i < 33; ++i)
 	{
-		if (pi <= 0)
-			msg += kBlocks[0];
-		else if (pi >= 8)
-			msg += kBlocks[8];
+		uint32_t bar_width = 7 * width / 10;
+		uint32_t pct_width = 7;
+		uint32_t msg_width = width - bar_width - pct_width - 1;
+
+		if (msg_width < kMinMsgWidth)
+		{
+			bar_width += kMinMsgWidth - msg_width;
+			msg_width = kMinMsgWidth;
+		}
+
+		std::ostringstream msg;
+
+		if (m_message.length() <= msg_width)
+		{
+			msg << m_message;
+			if (m_message.length() < msg_width)
+				msg << std::string(msg_width - m_message.length(), ' ');
+		}
 		else
-			msg += kBlocks[pi];
-		pi -= 8;
+			msg << m_message.substr(0, msg_width - 3) << "...";
+
+		msg << ' ';
+
+		uint32_t pi = static_cast<uint32_t>(std::ceil(progress * bar_width));
+
+		for (uint32_t i = 0; i < bar_width; ++i)
+			msg << kBlocks[i > pi ? 1 : 0];
+
+		msg << ' ';
+
+		msg << std::setw(3) << static_cast<int>(std::ceil(progress * 100)) << "% ";
+
+		auto now = std::chrono::system_clock::now();
+		m_spinner_index = (std::chrono::duration_cast<std::chrono::milliseconds>(now - m_start).count() / kSpinnerTimeInterval) % kSpinnerCount;
+
+		msg << kSpinner[m_spinner_index];
+
+		std::cout << '\r' << msg.str();
+		std::cout.flush();
 	}
-
-	msg.append("| ");
-
-	const char kSpinner[] = {' ', '.', 'o', 'O', '0', 'O', 'o', '.'};
-	const size_t kSpinnerCount = sizeof(kSpinner);
-
-	if (mLastConsumed < consumed)
-	{
-		mLastConsumed = consumed;
-		mSpinnerIndex = (mSpinnerIndex + 1) % kSpinnerCount;
-	}
-
-	const char spinner[2] = {kSpinner[mSpinnerIndex], 0};
-	msg.append(spinner);
-
-	//	int perc = static_cast<int>(100 * progress);
-	//	if (perc < 100)
-	//		msg += ' ';
-	//	if (perc < 10)
-	//		msg += ' ';
-	//	msg += to_string(perc);
-	//	msg += '%';
-
-	std::cout << '\r' << msg;
-	std::cout.flush();
 }

 namespace
@@ -324,12 +316,12 @@ namespace

 } // namespace

-void ProgressImpl::PrintDone()
+void progress_bar_impl::print_done()
 {
-	std::chrono::duration<double> elapsed = std::chrono::system_clock::now() - mStart;
+	std::chrono::duration<double> elapsed = std::chrono::system_clock::now() - m_start;

 	std::ostringstream msgstr;
-	msgstr << mAction << " done in " << elapsed << " seconds";
+	msgstr << m_action << " done in " << elapsed << " seconds";
 	auto msg = msgstr.str();

 	uint32_t width = get_terminal_width();
@@ -340,46 +332,34 @@ void ProgressImpl::PrintDone()
 	std::cout << '\r' << msg << std::endl;
 }

-Progress::Progress(int64_t inMax, const std::string &inAction)
+progress_bar::progress_bar(int64_t inMax, const std::string &inAction)
 	: m_impl(nullptr)
 {
 	if (isatty(STDOUT_FILENO) and VERBOSE >= 0)
-		m_impl = new ProgressImpl(inMax, inAction);
+		m_impl = new progress_bar_impl(inMax, inAction);
 }

-Progress::~Progress()
+progress_bar::~progress_bar()
 {
-	if (m_impl != nullptr)
-		m_impl->Stop();
-
 	delete m_impl;
 }

-void Progress::consumed(int64_t inConsumed)
-{
-	if (m_impl != nullptr and
-		(m_impl->mConsumed += inConsumed) >= m_impl->mMax)
-	{
-		m_impl->Stop();
-	}
-}
-
-void Progress::progress(int64_t inProgress)
-{
-	if (m_impl != nullptr and
-		(m_impl->mConsumed = inProgress) >= m_impl->mMax)
-	{
-		m_impl->Stop();
-	}
-}
-
-void Progress::message(const std::string &inMessage)
+void progress_bar::consumed(int64_t inConsumed)
 {
 	if (m_impl != nullptr)
-	{
-		std::unique_lock lock(m_impl->mMutex);
-		m_impl->mMessage = inMessage;
-	}
+		m_impl->consumed(inConsumed);
+}
+
+void progress_bar::progress(int64_t inProgress)
+{
+	if (m_impl != nullptr)
+		m_impl->progress(inProgress);
+}
+
+void progress_bar::message(const std::string &inMessage)
+{
+	if (m_impl != nullptr)
+		m_impl->message(inMessage);
 }

 } // namespace cif
@@ -405,11 +385,19 @@ struct rsrc_imp
 };
 } // namespace mrsrc

-#if _MSC_VER
+#if _WIN32

-extern "C" CIFPP_EXPORT const mrsrc::rsrc_imp *gResourceIndexDefault[1] = {};
-extern "C" CIFPP_EXPORT const char *gResourceDataDefault[1] = {};
-extern "C" CIFPP_EXPORT const char *gResourceNameDefault[1] = {};
+#if __MINGW32__
+
+extern "C" __attribute__((weak, alias("gResourceIndexDefault"))) const mrsrc::rsrc_imp gResourceIndex[];
+extern "C" __attribute__((weak, alias("gResourceDataDefault"))) const char gResourceData[];
+extern "C" __attribute__((weak, alias("gResourceNameDefault"))) const char gResourceName[];
+
+#else
+
+extern "C" const mrsrc::rsrc_imp *gResourceIndexDefault[1] = {};
+extern "C" const char *gResourceDataDefault[1] = {};
+extern "C" const char *gResourceNameDefault[1] = {};

 extern "C" const mrsrc::rsrc_imp gResourceIndex[];
 extern "C" const char gResourceData[];
@@ -419,6 +407,8 @@ extern "C" const char gResourceName[];
 #pragma comment(linker, "/alternatename:gResourceData=gResourceDataDefault")
 #pragma comment(linker, "/alternatename:gResourceName=gResourceNameDefault")

+#endif
+
 #else
 extern const __attribute__((weak)) mrsrc::rsrc_imp gResourceIndex[];
 extern const __attribute__((weak)) char gResourceData[];
@@ -821,12 +811,12 @@ namespace cif

 // --------------------------------------------------------------------

-class ResourcePool
+class resource_pool
 {
  public:
-	static ResourcePool &instance()
+	static resource_pool &instance()
 	{
-		static std::unique_ptr<ResourcePool> s_instance(new ResourcePool);
+		static std::unique_ptr<resource_pool> s_instance(new resource_pool);
 		return *s_instance;
 	}

@@ -856,7 +846,7 @@ class ResourcePool
 	std::unique_ptr<std::istream> load(fs::path name);

  private:
-	ResourcePool();
+	resource_pool();

 	std::unique_ptr<std::ifstream> open(fs::path &p)
 	{
@@ -882,7 +872,7 @@ class ResourcePool
 	std::deque<fs::path> mDirs;
 };

-ResourcePool::ResourcePool()
+resource_pool::resource_pool()
 {
 #if defined(DATA_DIR)
 	pushDir(DATA_DIR);
@@ -899,7 +889,7 @@ ResourcePool::ResourcePool()
 #endif
 }

-std::unique_ptr<std::istream> ResourcePool::load(fs::path name)
+std::unique_ptr<std::istream> resource_pool::load(fs::path name)
 {
 	std::unique_ptr<std::istream> result;
 	std::error_code ec;
@@ -909,6 +899,9 @@ std::unique_ptr<std::istream> ResourcePool::load(fs::path name)
 	if (mLocalResources.count(name.string()))
 		result = open(mLocalResources[name.string()]);

+	if (fs::exists(p, ec) and not ec)
+		result = open(p);
+
 	for (auto di = mDirs.begin(); not result and di != mDirs.end(); ++di)
 	{
 		auto p2 = *di / p;
@@ -931,17 +924,17 @@ std::unique_ptr<std::istream> ResourcePool::load(fs::path name)

 void add_data_directory(std::filesystem::path dataDir)
 {
-	ResourcePool::instance().pushDir(dataDir);
+	resource_pool::instance().pushDir(dataDir);
 }

 void add_file_resource(const std::string &name, std::filesystem::path dataFile)
 {
-	ResourcePool::instance().pushAlias(name, dataFile);
+	resource_pool::instance().pushAlias(name, dataFile);
 }

 std::unique_ptr<std::istream> load_resource(std::filesystem::path name)
 {
-	return ResourcePool::instance().load(name);
+	return resource_pool::instance().load(name);
 }

 } // namespace cif
--- a/src/validate.cpp
+++ b/src/validate.cpp
@@ -24,10 +24,10 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include <cif++/dictionary_parser.hpp>
-#include <cif++/validate.hpp>
-#include <cif++/utilities.hpp>
-#include <cif++/gzio.hpp>
+#include "cif++/validate.hpp"
+#include "cif++/dictionary_parser.hpp"
+#include "cif++/gzio.hpp"
+#include "cif++/utilities.hpp"

 #include <cassert>
 #include <fstream>
@@ -97,24 +97,6 @@ type_validator::~type_validator()
 	delete m_rx;
 }

-template <typename T>
-struct my_from_chars
-{
-	static std::from_chars_result from_chars(const char *a, const char *b, T &d)
-	{
-		return cif::from_chars(a, b, d);
-	}
-};
-
-template <typename T>
-struct std_from_chars
-{
-	static std::from_chars_result from_chars(const char *a, const char *b, T &d)
-	{
-		return std::from_chars(a, b, d);
-	}
-};
-
 int type_validator::compare(std::string_view a, std::string_view b) const
 {
 	int result = 0;
@@ -260,7 +242,7 @@ void category_validator::addItemValidator(item_validator &&v)

 	auto r = m_item_validators.insert(std::move(v));
 	if (not r.second and VERBOSE >= 4)
-		std::cout << "Could not add validator for item " << v.m_tag << " to category " << m_name << std::endl;
+		std::cout << "Could not add validator for item " << v.m_tag << " to category " << m_name << '\n';
 }

 const item_validator *category_validator::get_validator_for_item(std::string_view tag) const
@@ -270,7 +252,7 @@ const item_validator *category_validator::get_validator_for_item(std::string_vie
 	if (i != m_item_validators.end())
 		result = &*i;
 	else if (VERBOSE > 4)
-		std::cout << "No validator for tag " << tag << std::endl;
+		std::cout << "No validator for tag " << tag << '\n';
 	return result;
 }

@@ -280,7 +262,7 @@ void validator::add_type_validator(type_validator &&v)
 {
 	auto r = m_type_validators.insert(std::move(v));
 	if (not r.second and VERBOSE > 4)
-		std::cout << "Could not add validator for type " << v.m_name << std::endl;
+		std::cout << "Could not add validator for type " << v.m_name << '\n';
 }

 const type_validator *validator::get_validator_for_type(std::string_view typeCode) const
@@ -291,7 +273,7 @@ const type_validator *validator::get_validator_for_type(std::string_view typeCod
 	if (i != m_type_validators.end())
 		result = &*i;
 	else if (VERBOSE > 4)
-		std::cout << "No validator for type " << typeCode << std::endl;
+		std::cout << "No validator for type " << typeCode << '\n';
 	return result;
 }

@@ -299,7 +281,7 @@ void validator::add_category_validator(category_validator &&v)
 {
 	auto r = m_category_validators.insert(std::move(v));
 	if (not r.second and VERBOSE > 4)
-		std::cout << "Could not add validator for category " << v.m_name << std::endl;
+		std::cout << "Could not add validator for category " << v.m_name << '\n';
 }

 const category_validator *validator::get_validator_for_category(std::string_view category) const
@@ -309,7 +291,7 @@ const category_validator *validator::get_validator_for_category(std::string_view
 	if (i != m_category_validators.end())
 		result = &*i;
 	else if (VERBOSE > 4)
-		std::cout << "No validator for category " << category << std::endl;
+		std::cout << "No validator for category " << category << '\n';
 	return result;
 }

@@ -325,7 +307,7 @@ item_validator *validator::get_validator_for_item(std::string_view tag) const
 		result = const_cast<item_validator *>(cv->get_validator_for_item(item));

 	if (result == nullptr and VERBOSE > 4)
-		std::cout << "No validator for item " << tag << std::endl;
+		std::cout << "No validator for item " << tag << '\n';

 	return result;
 }
@@ -394,99 +376,106 @@ void validator::report_error(const std::string &msg, bool fatal) const
 	if (m_strict or fatal)
 		throw validation_error(msg);
 	else if (VERBOSE > 0)
-		std::cerr << msg << std::endl;
+		std::cerr << msg << '\n';
 }

 // --------------------------------------------------------------------

 const validator &validator_factory::operator[](std::string_view dictionary_name)
 {
-	std::lock_guard lock(m_mutex);
-
-	for (auto &validator : m_validators)
+	try
 	{
-		if (iequals(validator.name(), dictionary_name))
-			return validator;
-	}
-
-	// not found, try to see if it helps if we tweak the name a little
-
-	// too bad clang version 10 did not have a constructor for std::filesystem::path that accepts a std::string_view
-	std::filesystem::path dictionary(dictionary_name.data(), dictionary_name.data() + dictionary_name.length());
-
-	if (dictionary.extension() != ".dic")
-	{
-		auto dict_name = dictionary.filename().string() + ".dic";
+		std::lock_guard lock(m_mutex);

 		for (auto &validator : m_validators)
 		{
-			if (iequals(validator.name(), dict_name))
+			if (iequals(validator.name(), dictionary_name))
 				return validator;
 		}
-	}

-	// not found, add it
+		// not found, try to see if it helps if we tweak the name a little

+		// too bad clang version 10 did not have a constructor for std::filesystem::path that accepts a std::string_view
+		std::filesystem::path dictionary(dictionary_name.data(), dictionary_name.data() + dictionary_name.length());

-	auto data = load_resource(dictionary_name);
-
-	if (not data and dictionary.extension().string() != ".dic")
-		data = load_resource(dictionary.parent_path() / (dictionary.filename().string() + ".dic"));
-
-	if (data)
-		construct_validator(dictionary_name, *data);
-	else
-	{
-		std::error_code ec;
-
-		// might be a compressed dictionary on disk
-		std::filesystem::path p = dictionary;
-		if (p.extension() == ".dic")
-			p = p.parent_path() / (p.filename().string() + ".gz");
-		else
-			p = p.parent_path() / (p.filename().string() + ".dic.gz");
-
-#if defined(CACHE_DIR) or defined(DATA_DIR)
-		if (not std::filesystem::exists(p, ec) or ec)
+		if (dictionary.extension() != ".dic")
 		{
-			for (const char *dir : {
-#if defined(CACHE_DIR)
-					 CACHE_DIR,
-#endif
-#if defined(DATA_DIR)
-						 DATA_DIR
-#endif
-				 })
+			auto dict_name = dictionary.filename().string() + ".dic";
+
+			for (auto &validator : m_validators)
 			{
-				auto p2 = std::filesystem::path(dir) / p;
-				if (std::filesystem::exists(p2, ec) and not ec)
-				{
-					swap(p, p2);
-					break;
-				}
+				if (iequals(validator.name(), dict_name))
+					return validator;
 			}
 		}
+
+		// not found, add it
+		auto data = load_resource(dictionary_name);
+
+		if (not data and dictionary.extension().string() != ".dic")
+			data = load_resource(dictionary.parent_path() / (dictionary.filename().string() + ".dic"));
+
+		if (data)
+			construct_validator(dictionary_name, *data);
+		else
+		{
+			std::error_code ec;
+
+			// might be a compressed dictionary on disk
+			std::filesystem::path p = dictionary;
+			if (p.extension() == ".dic")
+				p = p.parent_path() / (p.filename().string() + ".gz");
+			else
+				p = p.parent_path() / (p.filename().string() + ".dic.gz");
+
+#if defined(CACHE_DIR) or defined(DATA_DIR)
+			if (not std::filesystem::exists(p, ec) or ec)
+			{
+				for (const char *dir : {
+#if defined(CACHE_DIR)
+						 CACHE_DIR,
+#endif
+#if defined(DATA_DIR)
+							 DATA_DIR
+#endif
+					 })
+				{
+					auto p2 = std::filesystem::path(dir) / p;
+					if (std::filesystem::exists(p2, ec) and not ec)
+					{
+						swap(p, p2);
+						break;
+					}
+				}
+			}
 #endif

-		if (std::filesystem::exists(p, ec) and not ec)
-		{
-			gzio::ifstream in(p);
+			if (std::filesystem::exists(p, ec) and not ec)
+			{
+				gzio::ifstream in(p);

-			if (not in.is_open())
-				throw std::runtime_error("Could not open dictionary (" + p.string() + ")");
+				if (not in.is_open())
+					throw std::runtime_error("Could not open dictionary (" + p.string() + ")");

-			construct_validator(dictionary_name, in);
+				construct_validator(dictionary_name, in);
+			}
+			else
+				throw std::runtime_error("Dictionary not found or defined (" + dictionary.string() + ")");
 		}
-		else
-			throw std::runtime_error("Dictionary not found or defined (" + dictionary.string() + ")");
-	}

-	return m_validators.back();
+		return m_validators.back();
+	}
+	catch (const std::exception &ex)
+	{
+		std::string msg = "Error while loading dictionary ";
+		msg += dictionary_name;
+		std::throw_with_nested(std::runtime_error(msg));
+	}
 }

-void validator_factory::construct_validator(std::string_view name, std::istream &is)
+const validator &validator_factory::construct_validator(std::string_view name, std::istream &is)
 {
-	m_validators.emplace_back(parse_dictionary(name, is));
+	return m_validators.emplace_back(parse_dictionary(name, is));
 }

 } // namespace cif
--- a/test/.1juh.cif.gz.swp
+++ b/test/.1juh.cif.gz.swp
--- a/test/2bi3.cif.gz
+++ b/test/2bi3.cif.gz
--- a/test/3bwh.cif.gz
+++ b/test/3bwh.cif.gz
--- a/test/4wvp.cif.gz
+++ b/test/4wvp.cif.gz
--- a/test/REA_v2.cif
+++ b/test/REA_v2.cif
@@ -0,0 +1,187 @@
+data_REA_v2
+# 
+_chem_comp.id                                    REA_v2 
+_chem_comp.name                                  "RETINOIC ACID" 
+_chem_comp.type                                  NON-POLYMER 
+_chem_comp.pdbx_type                             HETAIN 
+_chem_comp.formula                               "C20 H28 O2" 
+_chem_comp.mon_nstd_parent_comp_id               ? 
+_chem_comp.pdbx_synonyms                         ? 
+_chem_comp.pdbx_formal_charge                    0 
+_chem_comp.pdbx_initial_date                     1999-07-08 
+_chem_comp.pdbx_modified_date                    2016-10-18 
+_chem_comp.pdbx_ambiguous_flag                   N 
+_chem_comp.pdbx_release_status                   REL 
+_chem_comp.pdbx_replaced_by                      ? 
+_chem_comp.pdbx_replaces                         3KV 
+_chem_comp.formula_weight                        300.435 
+_chem_comp.one_letter_code                       ? 
+_chem_comp.pdbx_model_coordinates_details        ? 
+_chem_comp.pdbx_model_coordinates_missing_flag   N 
+_chem_comp.pdbx_ideal_coordinates_details        Corina 
+_chem_comp.pdbx_ideal_coordinates_missing_flag   N 
+_chem_comp.pdbx_model_coordinates_db_code        1CBS 
+_chem_comp.pdbx_subcomponent_list                ? 
+_chem_comp.pdbx_processing_site                  RCSB 
+# 
+loop_
+_chem_comp_atom.comp_id 
+_chem_comp_atom.atom_id 
+_chem_comp_atom.alt_atom_id 
+_chem_comp_atom.type_symbol 
+_chem_comp_atom.charge 
+_chem_comp_atom.pdbx_align 
+_chem_comp_atom.pdbx_aromatic_flag 
+_chem_comp_atom.pdbx_leaving_atom_flag 
+_chem_comp_atom.pdbx_stereo_config 
+_chem_comp_atom.model_Cartn_x 
+_chem_comp_atom.model_Cartn_y 
+_chem_comp_atom.model_Cartn_z 
+_chem_comp_atom.pdbx_model_Cartn_x_ideal 
+_chem_comp_atom.pdbx_model_Cartn_y_ideal 
+_chem_comp_atom.pdbx_model_Cartn_z_ideal 
+_chem_comp_atom.pdbx_component_atom_id 
+_chem_comp_atom.pdbx_component_comp_id 
+_chem_comp_atom.pdbx_ordinal 
+REA_v2 C1   C1   C 0 1 N N N 21.972 29.831 16.739 -4.684 0.932  -0.497 C1   REA_v2 1  
+REA_v2 C2   C2   C 0 1 N N N 20.921 30.524 15.841 -5.837 0.190  -1.176 C2   REA_v2 2  
+REA_v2 C3   C3   C 0 1 N N N 20.245 29.635 14.848 -6.441 -0.798 -0.171 C3   REA_v2 3  
+REA_v2 C4   C4   C 0 1 N N N 19.555 28.479 15.488 -5.418 -1.903 0.100  C4   REA_v2 4  
+REA_v2 C5   C5   C 0 1 N N N 20.389 27.812 16.587 -4.082 -1.301 0.429  C5   REA_v2 5  
+REA_v2 C6   C6   C 0 1 N N N 21.425 28.446 17.218 -3.756 -0.048 0.161  C6   REA_v2 6  
+REA_v2 C7   C7   C 0 1 N N N 22.242 27.851 18.297 -2.457 0.396  0.516  C7   REA_v2 7  
+REA_v2 C8   C8   C 0 1 N N N 21.868 26.977 19.240 -1.363 -0.229 0.007  C8   REA_v2 8  
+REA_v2 C9   C9   C 0 1 N N N 22.705 26.434 20.286 -0.076 0.257  0.298  C9   REA_v2 9  
+REA_v2 C10  C10  C 0 1 N N N 22.159 25.536 21.131 1.022  -0.370 -0.213 C10  REA_v2 10 
+REA_v2 C11  C11  C 0 1 N N N 22.875 24.924 22.234 2.306  0.115  0.077  C11  REA_v2 11 
+REA_v2 C12  C12  C 0 1 N N N 22.237 24.026 22.990 3.405  -0.513 -0.435 C12  REA_v2 12 
+REA_v2 C13  C13  C 0 1 N N N 22.856 23.377 24.125 4.689  -0.028 -0.144 C13  REA_v2 13 
+REA_v2 C14  C14  C 0 1 N N N 22.135 22.473 24.834 5.787  -0.655 -0.656 C14  REA_v2 14 
+REA_v2 C15  C15  C 0 1 N N N 22.563 21.710 26.016 7.077  -0.265 -0.244 C15  REA_v2 15 
+REA_v2 C16  C16  C 0 1 N N N 22.238 30.737 17.948 -5.246 1.886  0.559  C16  REA_v2 16 
+REA_v2 C17  C17  C 0 1 N N N 23.292 29.620 15.948 -3.911 1.737  -1.544 C17  REA_v2 17 
+REA_v2 C18  C18  C 0 1 N N N 19.791 26.449 16.947 -3.056 -2.175 1.103  C18  REA_v2 18 
+REA_v2 C19  C19  C 0 1 N N N 24.181 26.841 20.385 0.090  1.471  1.175  C19  REA_v2 19 
+REA_v2 C20  C20  C 0 1 N N N 24.303 23.747 24.489 4.855  1.186  0.733  C20  REA_v2 20 
+REA_v2 O1   O1   O 0 1 N N N 23.640 21.075 25.978 7.210  0.553  0.648  O1   REA_v2 21 
+REA_v2 O2   O2   O 0 1 N N N 21.840 21.712 27.037 8.166  -0.798 -0.840 O2   REA_v2 22 
+REA_v2 H21  H21  H 0 1 N N N 20.147 30.955 16.494 -6.598 0.905  -1.490 H21  REA_v2 23 
+REA_v2 H22  H22  H 0 1 N N N 21.425 31.330 15.288 -5.462 -0.353 -2.044 H22  REA_v2 24 
+REA_v2 H31  H31  H 0 1 N N N 19.501 30.227 14.295 -6.673 -0.278 0.759  H31  REA_v2 25 
+REA_v2 H32  H32  H 0 1 N N N 21.001 29.250 14.148 -7.349 -1.234 -0.586 H32  REA_v2 26 
+REA_v2 H41  H41  H 0 1 N N N 18.613 28.835 15.931 -5.756 -2.511 0.938  H41  REA_v2 27 
+REA_v2 H42  H42  H 0 1 N N N 19.335 27.730 14.713 -5.322 -2.531 -0.786 H42  REA_v2 28 
+REA_v2 H7   H7   H 0 1 N N N 23.276 28.162 18.329 -2.337 1.230  1.191  H7   REA_v2 29 
+REA_v2 H8   H8   H 0 1 N N N 20.840 26.645 19.217 -1.482 -1.100 -0.622 H8   REA_v2 30 
+REA_v2 H10  H10  H 0 1 N N N 21.127 25.256 20.977 0.903  -1.241 -0.842 H10  REA_v2 31 
+REA_v2 H11  H11  H 0 1 N N N 23.902 25.189 22.440 2.425  0.985  0.706  H11  REA_v2 32 
+REA_v2 H12  H12  H 0 1 N N N 21.216 23.774 22.743 3.286  -1.383 -1.063 H12  REA_v2 33 
+REA_v2 H14  H14  H 0 1 N N N 21.127 22.292 24.490 5.667  -1.451 -1.376 H14  REA_v2 34 
+REA_v2 H161 H161 H 0 0 N N N 22.984 30.265 18.604 -5.802 1.316  1.303  H161 REA_v2 35 
+REA_v2 H162 H162 H 0 0 N N N 22.618 31.709 17.601 -4.426 2.415  1.044  H162 REA_v2 36 
+REA_v2 H163 H163 H 0 0 N N N 21.302 30.887 18.506 -5.911 2.605  0.081  H163 REA_v2 37 
+REA_v2 H171 H171 H 0 0 N N N 24.033 29.127 16.595 -4.598 2.394  -2.077 H171 REA_v2 38 
+REA_v2 H172 H172 H 0 0 N N N 23.095 28.989 15.069 -3.146 2.335  -1.050 H172 REA_v2 39 
+REA_v2 H173 H173 H 0 0 N N N 23.683 30.595 15.620 -3.439 1.054  -2.251 H173 REA_v2 40 
+REA_v2 H181 H181 H 0 0 N N N 20.397 25.979 17.736 -3.448 -3.187 1.201  H181 REA_v2 41 
+REA_v2 H182 H182 H 0 0 N N N 18.761 26.584 17.308 -2.145 -2.194 0.503  H182 REA_v2 42 
+REA_v2 H183 H183 H 0 0 N N N 19.786 25.804 16.056 -2.831 -1.775 2.092  H183 REA_v2 43 
+REA_v2 H191 H191 H 0 0 N N N 24.647 26.327 21.238 0.171  1.159  2.216  H191 REA_v2 44 
+REA_v2 H192 H192 H 0 0 N N N 24.702 26.559 19.458 0.993  2.008  0.885  H192 REA_v2 45 
+REA_v2 H193 H193 H 0 0 N N N 24.252 27.929 20.529 -0.774 2.125  1.058  H193 REA_v2 46 
+REA_v2 H201 H201 H 0 0 N N N 24.620 23.168 25.369 5.026  0.871  1.762  H201 REA_v2 47 
+REA_v2 H202 H202 H 0 0 N N N 24.965 23.516 23.641 5.707  1.771  0.386  H202 REA_v2 48 
+REA_v2 H203 H203 H 0 0 N N N 24.360 24.822 24.717 3.952  1.795  0.685  H203 REA_v2 49 
+REA_v2 HO2  HO2  H 0 1 N N N 22.244 21.180 27.713 9.006  -0.469 -0.490 HO2  REA_v2 50 
+# 
+loop_
+_chem_comp_bond.comp_id 
+_chem_comp_bond.atom_id_1 
+_chem_comp_bond.atom_id_2 
+_chem_comp_bond.value_order 
+_chem_comp_bond.pdbx_aromatic_flag 
+_chem_comp_bond.pdbx_stereo_config 
+_chem_comp_bond.pdbx_ordinal 
+REA_v2 C1  C2   SING N N 1  
+REA_v2 C1  C6   SING N N 2  
+REA_v2 C1  C16  SING N N 3  
+REA_v2 C1  C17  SING N N 4  
+REA_v2 C2  C3   SING N N 5  
+REA_v2 C2  H21  SING N N 6  
+REA_v2 C2  H22  SING N N 7  
+REA_v2 C3  C4   SING N N 8  
+REA_v2 C3  H31  SING N N 9  
+REA_v2 C3  H32  SING N N 10 
+REA_v2 C4  C5   SING N N 11 
+REA_v2 C4  H41  SING N N 12 
+REA_v2 C4  H42  SING N N 13 
+REA_v2 C5  C6   DOUB N N 14 
+REA_v2 C5  C18  SING N N 15 
+REA_v2 C6  C7   SING N N 16 
+REA_v2 C7  C8   DOUB N E 17 
+REA_v2 C7  H7   SING N N 18 
+REA_v2 C8  C9   SING N N 19 
+REA_v2 C8  H8   SING N N 20 
+REA_v2 C9  C10  DOUB N E 21 
+REA_v2 C9  C19  SING N N 22 
+REA_v2 C10 C11  SING N N 23 
+REA_v2 C10 H10  SING N N 24 
+REA_v2 C11 C12  DOUB N E 25 
+REA_v2 C11 H11  SING N N 26 
+REA_v2 C12 C13  SING N N 27 
+REA_v2 C12 H12  SING N N 28 
+REA_v2 C13 C14  DOUB N E 29 
+REA_v2 C13 C20  SING N N 30 
+REA_v2 C14 C15  SING N N 31 
+REA_v2 C14 H14  SING N N 32 
+REA_v2 C15 O1   DOUB N N 33 
+REA_v2 C15 O2   SING N N 34 
+REA_v2 C16 H161 SING N N 35 
+REA_v2 C16 H162 SING N N 36 
+REA_v2 C16 H163 SING N N 37 
+REA_v2 C17 H171 SING N N 38 
+REA_v2 C17 H172 SING N N 39 
+REA_v2 C17 H173 SING N N 40 
+REA_v2 C18 H181 SING N N 41 
+REA_v2 C18 H182 SING N N 42 
+REA_v2 C18 H183 SING N N 43 
+REA_v2 C19 H191 SING N N 44 
+REA_v2 C19 H192 SING N N 45 
+REA_v2 C19 H193 SING N N 46 
+REA_v2 C20 H201 SING N N 47 
+REA_v2 C20 H202 SING N N 48 
+REA_v2 C20 H203 SING N N 49 
+REA_v2 O2  HO2  SING N N 50 
+# 
+loop_
+_pdbx_chem_comp_descriptor.comp_id 
+_pdbx_chem_comp_descriptor.type 
+_pdbx_chem_comp_descriptor.program 
+_pdbx_chem_comp_descriptor.program_version 
+_pdbx_chem_comp_descriptor.descriptor 
+REA_v2 SMILES           ACDLabs              12.01 "C1(CCCC(=C1\C=C\C(=C\C=C\C(=C\C(=O)O)C)C)C)(C)C"                                                                                                     
+REA_v2 InChI            InChI                1.03  "InChI=1S/C20H28O2/c1-15(8-6-9-16(2)14-19(21)22)11-12-18-17(3)10-7-13-20(18,4)5/h6,8-9,11-12,14H,7,10,13H2,1-5H3,(H,21,22)/b9-6+,12-11+,15-8+,16-14+" 
+REA_v2 InChIKey         InChI                1.03  SHGAZHPCJJPHSC-YCNIQYBTSA-N                                                                                                                           
+REA_v2 SMILES_CANONICAL CACTVS               3.385 "CC1=C(\C=C\C(C)=C\C=C\C(C)=C\C(O)=O)C(C)(C)CCC1"                                                                                                     
+REA_v2 SMILES           CACTVS               3.385 "CC1=C(C=CC(C)=CC=CC(C)=CC(O)=O)C(C)(C)CCC1"                                                                                                          
+REA_v2 SMILES_CANONICAL "OpenEye OEToolkits" 1.7.6 "CC1=C(C(CCC1)(C)C)/C=C/C(=C/C=C/C(=C/C(=O)O)/C)/C"                                                                                                   
+REA_v2 SMILES           "OpenEye OEToolkits" 1.7.6 "CC1=C(C(CCC1)(C)C)C=CC(=CC=CC(=CC(=O)O)C)C"                                                                                                          
+# 
+loop_
+_pdbx_chem_comp_identifier.comp_id 
+_pdbx_chem_comp_identifier.type 
+_pdbx_chem_comp_identifier.program 
+_pdbx_chem_comp_identifier.program_version 
+_pdbx_chem_comp_identifier.identifier 
+REA_v2 "SYSTEMATIC NAME" ACDLabs              12.01 "retinoic acid"                                                                             
+REA_v2 "SYSTEMATIC NAME" "OpenEye OEToolkits" 1.7.6 "(2E,4E,6E,8E)-3,7-dimethyl-9-(2,6,6-trimethylcyclohexen-1-yl)nona-2,4,6,8-tetraenoic acid" 
+# 
+loop_
+_pdbx_chem_comp_audit.comp_id 
+_pdbx_chem_comp_audit.action_type 
+_pdbx_chem_comp_audit.date 
+_pdbx_chem_comp_audit.processing_site 
+REA_v2 "Create component"   1999-07-08 RCSB 
+REA_v2 "Modify descriptor"  2011-06-04 RCSB 
+REA_v2 "Other modification" 2016-10-18 RCSB 
+# 
--- a/test/format-test.cpp
+++ b/test/format-test.cpp
@@ -82,4 +82,19 @@ BOOST_AUTO_TEST_CASE(fmt_1)

 	BOOST_CHECK_EQUAL(cif::format("Hello, %-10.10s, the magic number is %d and pi is %g", world, 42, cif::kPI).str(),
 		"Hello, world     , the magic number is 42 and pi is 3.14159");
+}
+
+// --------------------------------------------------------------------
+
+BOOST_AUTO_TEST_CASE(clr_1)
+{
+	using namespace cif::colour;
+
+	std::cout << "Hello, " << cif::coloured("world!", white, red, cif::colour::regular) << '\n'
+			  << "Hello, " << cif::coloured("world!", white, red, bold) << '\n'
+			  << "Hello, " << cif::coloured("world!", black, red) << '\n'
+			  << "Hello, " << cif::coloured("world!", white, green) << '\n'
+			  << "Hello, " << cif::coloured("world!", white, blue) << '\n'
+			  << "Hello, " << cif::coloured("world!", blue, white) << '\n'
+			  << "Hello, " << cif::coloured("world!", red, white, bold) << '\n';
 }
--- a/test/io-test.cpp
+++ b/test/io-test.cpp
@@ -0,0 +1,39 @@
+#include <cif++.hpp>
+
+class dummy_parser : public cif::sac_parser
+{
+  public:
+	dummy_parser(std::istream &is)
+		: sac_parser(is)
+	{
+	}
+
+	void produce_datablock(std::string_view name) override
+	{
+	}
+
+	void produce_category(std::string_view name) override
+	{
+	}
+
+	void produce_row() override
+	{
+	}
+
+	void produce_item(std::string_view category, std::string_view item, std::string_view value) override
+	{
+	}
+};
+
+
+int main()
+{
+	cif::gzio::ifstream in("/srv/data/pdb/mmCIF/gl/8glv.cif.gz");
+
+	dummy_parser parser(in);
+	parser.parse_file();
+
+	// cif::file f("/srv/data/pdb/mmCIF/gl/8glv.cif.gz");
+
+	return 0;
+}
--- a/test/model-test.cpp
+++ b/test/model-test.cpp
@@ -194,12 +194,129 @@ _atom_type.symbol   C
 	if (not (expected.front() == structure.get_datablock()))
 	{
 		BOOST_TEST(false);
-		std::cout << expected.front() << std::endl
-				<< std::endl
-				<< structure.get_datablock() << std::endl;
+		std::cout << expected.front() << '\n'
+				<< '\n'
+				<< structure.get_datablock() << '\n';
 	}
 }

+// --------------------------------------------------------------------
+
+BOOST_AUTO_TEST_CASE(create_nonpoly_2)
+{
+    cif::VERBOSE = 1;
+
+	cif::file file;
+	file.load_dictionary("mmcif_pdbx.dic");
+	file.emplace("TEST");	// create a datablock
+	
+	cif::mm::structure structure(file);
+
+	cif::file lig(gTestDir / "HEM.cif");
+	auto &chem_comp_atom = lig["HEM"]["chem_comp_atom"];
+
+	std::vector<cif::row_initializer> atoms;
+
+	for (const auto &[type_symbol, label_atom_id, Cartn_x, Cartn_y, Cartn_z] :
+		chem_comp_atom.rows<std::string,std::string,float,float,float>(
+				"type_symbol", "atom_id", "model_Cartn_x", "model_Cartn_y", "model_Cartn_z"))
+	{
+		atoms.emplace_back(cif::row_initializer{
+			{ "type_symbol", type_symbol },
+			{ "label_atom_id", label_atom_id },
+			{ "auth_atom_id", label_atom_id },
+			{ "Cartn_x", Cartn_x },
+			{ "Cartn_y", Cartn_y },
+			{ "Cartn_z", Cartn_z }
+		});
+
+		if (atoms.size() == 4)
+			break;
+	}
+
+	std::string entity_id = structure.create_non_poly_entity("HEM");
+	structure.create_non_poly(entity_id, atoms);
+
+	auto expected = R"(
+data_TEST
+# 
+_pdbx_nonpoly_scheme.asym_id         A 
+_pdbx_nonpoly_scheme.ndb_seq_num     1 
+_pdbx_nonpoly_scheme.entity_id       1 
+_pdbx_nonpoly_scheme.mon_id          HEM 
+_pdbx_nonpoly_scheme.pdb_seq_num     1 
+_pdbx_nonpoly_scheme.auth_seq_num    1 
+_pdbx_nonpoly_scheme.pdb_mon_id      HEM 
+_pdbx_nonpoly_scheme.auth_mon_id     HEM 
+_pdbx_nonpoly_scheme.pdb_strand_id   A 
+_pdbx_nonpoly_scheme.pdb_ins_code    . 
+#
+loop_
+_atom_site.id
+_atom_site.auth_asym_id
+_atom_site.label_alt_id
+_atom_site.label_asym_id
+_atom_site.label_atom_id
+_atom_site.label_comp_id
+_atom_site.label_entity_id
+_atom_site.label_seq_id
+_atom_site.type_symbol
+_atom_site.group_PDB
+_atom_site.pdbx_PDB_ins_code
+_atom_site.Cartn_x
+_atom_site.Cartn_y
+_atom_site.Cartn_z
+_atom_site.occupancy
+_atom_site.pdbx_formal_charge
+_atom_site.auth_seq_id
+_atom_site.auth_comp_id
+_atom_site.auth_atom_id
+_atom_site.pdbx_PDB_model_num
+1 A ? A CHA HEM 1 . C HETATM ? 2.748 -19.531 39.896 1.00 ? 1 HEM CHA 1
+2 A ? A CHB HEM 1 . C HETATM ? 3.258 -17.744 35.477 1.00 ? 1 HEM CHB 1
+3 A ? A CHC HEM 1 . C HETATM ? 1.703 -21.9   33.637 1.00 ? 1 HEM CHC 1
+4 A ? A CHD HEM 1 . C HETATM ? 1.149 -23.677 38.059 1.00 ? 1 HEM CHD 1
+#
+_chem_comp.id               HEM
+_chem_comp.type             NON-POLYMER
+_chem_comp.name             'PROTOPORPHYRIN IX CONTAINING FE'
+_chem_comp.formula          'C34 H32 Fe N4 O4'
+_chem_comp.formula_weight   616.487000
+#
+_pdbx_entity_nonpoly.entity_id   1
+_pdbx_entity_nonpoly.name        'PROTOPORPHYRIN IX CONTAINING FE'
+_pdbx_entity_nonpoly.comp_id     HEM
+#
+_entity.id                 1
+_entity.type               non-polymer
+_entity.pdbx_description   'PROTOPORPHYRIN IX CONTAINING FE'
+_entity.formula_weight     616.487000
+#
+_struct_asym.id                            A
+_struct_asym.entity_id                     1
+_struct_asym.pdbx_blank_PDB_chainid_flag   N
+_struct_asym.pdbx_modified                 N
+_struct_asym.details                       ?
+#
+_atom_type.symbol   C
+)"_cf;
+
+	expected.load_dictionary("mmcif_pdbx.dic");
+
+	if (not (expected.front() == structure.get_datablock()))
+	{
+		BOOST_TEST(false);
+		std::cout << expected.front() << '\n'
+				<< '\n'
+				<< structure.get_datablock() << '\n';
+		
+
+		expected.save("/tmp/a");
+		file.save("/tmp/b");
+	}
+}
+
+
 // --------------------------------------------------------------------

 BOOST_AUTO_TEST_CASE(test_atom_id)
--- a/test/rename-compound-test.cpp
+++ b/test/rename-compound-test.cpp
@@ -72,7 +72,7 @@ int main(int argc, char* argv[])
 	}
 	catch (const std::exception& e)
 	{
-		std::cerr << e.what() << std::endl;
+		std::cerr << e.what() << '\n';
 		exit(1);
 	}
 	
--- a/test/spinner-test.cpp
+++ b/test/spinner-test.cpp
@@ -0,0 +1,49 @@
+#include "cif++/utilities.hpp"
+
+#include <random>
+#include <thread>
+
+void test_one()
+{
+	std::random_device rd;
+    std::mt19937 gen(rd());
+    std::uniform_int_distribution<> distrib(100, 1000);
+
+	cif::progress_bar pb(10, "test");
+
+	for (int i = 0; i < 10; ++i)
+	{
+		std::this_thread::sleep_for(std::chrono::milliseconds(distrib(gen)));
+
+		pb.message("step " + std::to_string(i));
+		pb.consumed(1);
+	}
+}
+
+void test_two()
+{
+	cif::progress_bar pb(10, "test");
+
+
+	for (int i = 0; i < 5; ++i)
+		pb.consumed(1);
+}
+
+void test_three()
+{
+	using namespace std::literals;
+
+	cif::progress_bar pb(10, "test");
+	pb.consumed(10);
+
+	std::this_thread::sleep_for(100ms);
+}
+
+int main()
+{
+	test_one();
+	test_two();
+	test_three();
+
+	return 0;
+}
--- a/test/unit-3d-test.cpp
+++ b/test/unit-3d-test.cpp
@@ -31,10 +31,10 @@

 #include <cif++.hpp>

-#include <cif++/dictionary_parser.hpp>
-#include <cif++/parser.hpp>
+#include <Eigen/Eigenvalues>

 namespace tt = boost::test_tools;
+namespace utf = boost::unit_test;

 std::filesystem::path gTestDir = std::filesystem::current_path(); // filled in first test

@@ -124,7 +124,7 @@ BOOST_AUTO_TEST_CASE(t1)

 	BOOST_TEST(rmsd < 1e-5);

-	// std::cout << "rmsd: " << RMSd(p1, p2) << std::endl;
+	// std::cout << "rmsd: " << RMSd(p1, p2) << '\n';
 }

 BOOST_AUTO_TEST_CASE(t2)
@@ -161,13 +161,53 @@ BOOST_AUTO_TEST_CASE(t3)
 	v.rotate(q);
 	v += p[0];

-	std::cout << v << std::endl;
+	std::cout << v << '\n';

 	double a = cif::angle(v, p[0], p[1]);

 	BOOST_TEST(a == 45, tt::tolerance(0.01));
 }

+BOOST_AUTO_TEST_CASE(dh_q_0)
+{
+	cif::point axis(1, 0, 0);
+
+	cif::point p(1, 1, 0);
+	
+	cif::point t[3] =
+	{
+		{ 0, 1, 0 },
+		{ 0, 0, 0 },
+		{ 1, 0, 0 }
+	};
+
+	auto a = cif::dihedral_angle(t[0], t[1], t[2], p);
+	BOOST_TEST(a == 0, tt::tolerance(0.01f));
+
+	auto q = cif::construct_from_angle_axis(90, axis);
+
+	p.rotate(q);
+
+	BOOST_TEST(p.m_x == 1, tt::tolerance(0.01f));
+	BOOST_TEST(p.m_y == 0, tt::tolerance(0.01f));
+	BOOST_TEST(p.m_z == 1, tt::tolerance(0.01f));
+
+	a = cif::dihedral_angle(t[0], t[1], t[2], p);
+	BOOST_TEST(a == 90, tt::tolerance(0.01f));
+
+	q = cif::construct_from_angle_axis(-90, axis);
+
+	p.rotate(q);
+
+	BOOST_TEST(p.m_x == 1, tt::tolerance(0.01f));
+	BOOST_TEST(p.m_y == 1, tt::tolerance(0.01f));
+	BOOST_TEST(p.m_z == 0, tt::tolerance(0.01f));
+
+	a = cif::dihedral_angle(t[0], t[1], t[2], p);
+	BOOST_TEST(a == 0, tt::tolerance(0.01f));
+
+}
+
 BOOST_AUTO_TEST_CASE(dh_q_1)
 {
 	struct
@@ -204,11 +244,358 @@ BOOST_AUTO_TEST_CASE(dh_q_1)
 	{
 		auto q = cif::construct_for_dihedral_angle(pts[0], pts[1], pts[2], pts[3], angle, 1);

-		pts[3] -= pts[2];
-		pts[3].rotate(q);
-		pts[3] += pts[2];
+		pts[3].rotate(q, pts[2]);

 		auto dh = cif::dihedral_angle(pts[0], pts[1], pts[2], pts[3]);
 		BOOST_TEST(dh == angle, tt::tolerance(0.1f));
 	}
-}
+}
+
+// --------------------------------------------------------------------
+
+BOOST_AUTO_TEST_CASE(m2q_0, *utf::tolerance(0.001f))
+{
+	for (size_t i = 0; i < cif::kSymopNrTableSize; ++i)
+	{
+		auto d = cif::kSymopNrTable[i].symop().data();
+
+		cif::matrix3x3<float> rot;
+		float Qxx = rot(0, 0) = d[0];
+		float Qxy = rot(0, 1) = d[1];
+		float Qxz = rot(0, 2) = d[2];
+		float Qyx = rot(1, 0) = d[3];
+		float Qyy = rot(1, 1) = d[4];
+		float Qyz = rot(1, 2) = d[5];
+		float Qzx = rot(2, 0) = d[6];
+		float Qzy = rot(2, 1) = d[7];
+		float Qzz = rot(2, 2) = d[8];
+
+		Eigen::Matrix4f em;
+
+		em << Qxx - Qyy - Qzz, Qyx + Qxy, Qzx + Qxz, Qzy - Qyz,
+		      Qyx + Qxy, Qyy - Qxx - Qzz, Qzy + Qyz, Qxz - Qzx,
+			  Qzx + Qxz, Qzy + Qyz, Qzz - Qxx - Qyy, Qyx - Qxy,
+			  Qzy - Qyz, Qxz - Qzx, Qyx - Qxy, Qxx + Qyy + Qzz;
+
+		Eigen::EigenSolver<Eigen::Matrix4f> es(em / 3);
+
+		auto ev = es.eigenvalues();
+
+		size_t bestJ = 0;
+		float bestEV = -1;
+
+		for (size_t j = 0; j < 4; ++j)
+		{
+			if (bestEV < ev[j].real())
+			{
+				bestEV = ev[j].real();
+				bestJ = j;
+			}
+		}
+
+		if (std::abs(bestEV - 1) > 0.01)
+			continue; // not a rotation matrix
+
+		auto col = es.eigenvectors().col(bestJ);
+
+		auto q = normalize(cif::quaternion{
+			static_cast<float>(col(3).real()),
+			static_cast<float>(col(0).real()),
+			static_cast<float>(col(1).real()),
+			static_cast<float>(col(2).real()) });
+		
+		cif::point p1{ 1, 1, 1 };
+		cif::point p2 = p1;
+		p2.rotate(q);
+
+		cif::point p3 = rot * p1;
+
+		BOOST_TEST(p2.m_x == p3.m_x);
+		BOOST_TEST(p2.m_y == p3.m_y);
+		BOOST_TEST(p2.m_z == p3.m_z);
+	}
+}
+
+// BOOST_AUTO_TEST_CASE(m2q_1, *utf::tolerance(0.001f))
+// {
+// 	for (size_t i = 0; i < cif::kSymopNrTableSize; ++i)
+// 	{
+// 		auto d = cif::kSymopNrTable[i].symop().data();
+
+// 		cif::matrix3x3<float> rot;
+// 		float Qxx = rot(0, 0) = d[0];
+// 		float Qxy = rot(0, 1) = d[1];
+// 		float Qxz = rot(0, 2) = d[2];
+// 		float Qyx = rot(1, 0) = d[3];
+// 		float Qyy = rot(1, 1) = d[4];
+// 		float Qyz = rot(1, 2) = d[5];
+// 		float Qzx = rot(2, 0) = d[6];
+// 		float Qzy = rot(2, 1) = d[7];
+// 		float Qzz = rot(2, 2) = d[8];
+
+// 		cif::matrix4x4<float> m({
+// 			Qxx - Qyy - Qzz, Qyx + Qxy, Qzx + Qxz, Qzy - Qyz,
+// 			Qyx + Qxy, Qyy - Qxx - Qzz, Qzy + Qyz, Qxz - Qzx,
+// 			Qzx + Qxz, Qzy + Qyz, Qzz - Qxx - Qyy, Qyx - Qxy,
+// 			Qzy - Qyz, Qxz - Qzx, Qyx - Qxy, Qxx + Qyy + Qzz
+// 		});
+
+// 		auto &&[ev, em] = cif::eigen(m * (1/3.0f), false);
+
+// 		size_t bestJ = 0;
+// 		float bestEV = -1;
+
+// 		for (size_t j = 0; j < 4; ++j)
+// 		{
+// 			if (bestEV < ev[j])
+// 			{
+// 				bestEV = ev[j];
+// 				bestJ = j;
+// 			}
+// 		}
+
+// 		if (std::abs(bestEV - 1) > 0.01)
+// 			continue; // not a rotation matrix
+
+// 		auto q = normalize(cif::quaternion{
+// 			static_cast<float>(em(bestJ, 3)),
+// 			static_cast<float>(em(bestJ, 0)),
+// 			static_cast<float>(em(bestJ, 1)),
+// 			static_cast<float>(em(bestJ, 2)) });
+		
+// 		cif::point p1{ 1, 1, 1 };
+// 		cif::point p2 = p1;
+// 		p2.rotate(q);
+
+// 		cif::point p3 = rot * p1;
+
+// 		BOOST_TEST(p2.m_x == p3.m_x);
+// 		BOOST_TEST(p2.m_y == p3.m_y);
+// 		BOOST_TEST(p2.m_z == p3.m_z);
+// 	}
+// }
+
+// --------------------------------------------------------------------
+
+BOOST_AUTO_TEST_CASE(symm_1)
+{
+	cif::cell c(10, 10, 10);
+
+	cif::point p{ 1, 1, 1 };
+
+	cif::point f = fractional(p, c);
+
+	BOOST_TEST(f.m_x == 0.1f, tt::tolerance(0.01));
+	BOOST_TEST(f.m_y == 0.1f, tt::tolerance(0.01));
+	BOOST_TEST(f.m_z == 0.1f, tt::tolerance(0.01));
+
+	cif::point o = orthogonal(f, c);
+
+	BOOST_TEST(o.m_x == 1.f, tt::tolerance(0.01));
+	BOOST_TEST(o.m_y == 1.f, tt::tolerance(0.01));
+	BOOST_TEST(o.m_z == 1.f, tt::tolerance(0.01));
+}
+
+BOOST_AUTO_TEST_CASE(symm_2)
+{
+	using namespace cif::literals;
+
+	auto symop = "1_555"_symop;
+
+	BOOST_TEST(symop.is_identity() == true);
+}
+
+BOOST_AUTO_TEST_CASE(symm_3)
+{
+	using namespace cif::literals;
+
+	cif::spacegroup sg(18);
+
+	BOOST_TEST(sg.size() == 4);
+	BOOST_TEST(sg.get_name() == "P 21 21 2");
+}
+
+BOOST_AUTO_TEST_CASE(symm_4, *utf::tolerance(0.1f))
+{
+	using namespace cif::literals;
+
+	// based on 2b8h
+	auto sg = cif::spacegroup(154); // p 32 2 1
+	auto c = cif::cell(107.516, 107.516, 338.487, 90.00, 90.00, 120.00);
+	
+	cif::point a{   -8.688,  79.351, 10.439 }; // O6 NAG A 500
+	cif::point b{  -35.356,  33.693, -3.236 }; // CG2 THR D 400
+	cif::point sb(  -6.916,   79.34,   3.236); // 4_565 copy of b
+
+	BOOST_TEST(distance(a, sg(a, c, "1_455"_symop)) == static_cast<float>(c.get_a()));
+	BOOST_TEST(distance(a, sg(a, c, "1_545"_symop)) == static_cast<float>(c.get_b()));
+	BOOST_TEST(distance(a, sg(a, c, "1_554"_symop)) == static_cast<float>(c.get_c()));
+
+	auto sb2 = sg(b, c, "4_565"_symop);
+	BOOST_TEST(sb.m_x == sb2.m_x);
+	BOOST_TEST(sb.m_y == sb2.m_y);
+	BOOST_TEST(sb.m_z == sb2.m_z);
+
+	BOOST_TEST(distance(a, sb2) == 7.42f);	
+}
+
+// --------------------------------------------------------------------
+
+BOOST_AUTO_TEST_CASE(symm_4wvp_1, *utf::tolerance(0.1f))
+{
+	using namespace cif::literals;
+
+	cif::file f(gTestDir / "4wvp.cif.gz");
+
+	auto &db = f.front();
+	cif::mm::structure s(db);
+
+	cif::crystal c(db);
+
+	cif::point p{ -78.722, 98.528,  11.994 };
+	auto a = s.get_residue("A", 10, "").get_atom_by_atom_id("O");
+
+	auto sp1 = c.symmetry_copy(a.get_location(), "2_565"_symop);
+	BOOST_TEST(sp1.m_x == p.m_x);
+	BOOST_TEST(sp1.m_y == p.m_y);
+	BOOST_TEST(sp1.m_z == p.m_z);
+
+	const auto &[d, sp2, so] = c.closest_symmetry_copy(p, a.get_location());
+
+	BOOST_TEST(d < 1);
+
+	BOOST_TEST(sp2.m_x == p.m_x);
+	BOOST_TEST(sp2.m_y == p.m_y);
+	BOOST_TEST(sp2.m_z == p.m_z);
+
+}
+
+BOOST_AUTO_TEST_CASE(symm_2bi3_1, *utf::tolerance(0.1f))
+{
+	cif::file f(gTestDir / "2bi3.cif.gz");
+
+	auto &db = f.front();
+	cif::mm::structure s(db);
+
+	cif::crystal c(db);
+
+	auto struct_conn = db["struct_conn"];
+	for (const auto &[
+			asym1, seqid1, authseqid1, atomid1, symm1,
+			asym2, seqid2, authseqid2, atomid2, symm2,
+			dist] : struct_conn.find<
+				std::string,int,std::string,std::string,std::string,
+				std::string,int,std::string,std::string,std::string,
+				float>(
+			cif::key("ptnr1_symmetry") != "1_555" or cif::key("ptnr2_symmetry") != "1_555",
+			"ptnr1_label_asym_id", "ptnr1_label_seq_id", "ptnr1_auth_seq_id", "ptnr1_label_atom_id", "ptnr1_symmetry", 
+			"ptnr2_label_asym_id", "ptnr2_label_seq_id", "ptnr2_auth_seq_id", "ptnr2_label_atom_id", "ptnr2_symmetry", 
+			"pdbx_dist_value"
+		))
+	{
+		auto &r1 = s.get_residue(asym1, seqid1, authseqid1);
+		auto &r2 = s.get_residue(asym2, seqid2, authseqid2);
+
+		auto a1 = r1.get_atom_by_atom_id(atomid1);
+		auto a2 = r2.get_atom_by_atom_id(atomid2);
+
+		auto sa1 = c.symmetry_copy(a1.get_location(), cif::sym_op(symm1));
+		auto sa2 = c.symmetry_copy(a2.get_location(), cif::sym_op(symm2));
+
+		BOOST_TEST(cif::distance(sa1, sa2) == dist);
+
+		auto pa1 = a1.get_location();
+
+		const auto &[d, p, so] = c.closest_symmetry_copy(pa1, a2.get_location());
+
+		BOOST_TEST(p.m_x == sa2.m_x);
+		BOOST_TEST(p.m_y == sa2.m_y);
+		BOOST_TEST(p.m_z == sa2.m_z);
+
+		BOOST_TEST(d == dist);
+		BOOST_TEST(so.string() == symm2);
+	}
+}
+
+BOOST_AUTO_TEST_CASE(symm_2bi3_1a, *utf::tolerance(0.1f))
+{
+	using namespace cif::literals;
+
+	cif::file f(gTestDir / "2bi3.cif.gz");
+
+	auto &db = f.front();
+
+	cif::crystal c(db);
+	auto struct_conn = db["struct_conn"];
+	auto atom_site = db["atom_site"];
+
+	for (const auto &[
+			asym1, seqid1, authseqid1, atomid1, symm1,
+			asym2, seqid2, authseqid2, atomid2, symm2,
+			dist] : struct_conn.find<
+				std::string,std::optional<int>,std::string,std::string,std::string,
+				std::string,std::optional<int>,std::string,std::string,std::string,
+				float>(
+			cif::key("ptnr1_symmetry") != "1_555" or cif::key("ptnr2_symmetry") != "1_555",
+			"ptnr1_label_asym_id", "ptnr1_label_seq_id", "ptnr1_auth_seq_id", "ptnr1_label_atom_id", "ptnr1_symmetry", 
+			"ptnr2_label_asym_id", "ptnr2_label_seq_id", "ptnr2_auth_seq_id", "ptnr2_label_atom_id", "ptnr2_symmetry", 
+			"pdbx_dist_value"
+		))
+	{
+		cif::point p1 = atom_site.find1<float,float,float>(
+			"label_asym_id"_key == asym1 and "label_seq_id"_key == seqid1 and "auth_seq_id"_key == authseqid1 and "label_atom_id"_key == atomid1,
+			"cartn_x", "cartn_y", "cartn_z");
+		cif::point p2 = atom_site.find1<float,float,float>(
+			"label_asym_id"_key == asym2 and "label_seq_id"_key == seqid2 and "auth_seq_id"_key == authseqid2 and "label_atom_id"_key == atomid2,
+			"cartn_x", "cartn_y", "cartn_z");
+
+		auto sa1 = c.symmetry_copy(p1, cif::sym_op(symm1));
+		auto sa2 = c.symmetry_copy(p2, cif::sym_op(symm2));
+
+		BOOST_TEST(cif::distance(sa1, sa2) == dist);
+
+		const auto &[d, p, so] = c.closest_symmetry_copy(p1, p2);
+
+		BOOST_TEST(p.m_x == sa2.m_x);
+		BOOST_TEST(p.m_y == sa2.m_y);
+		BOOST_TEST(p.m_z == sa2.m_z);
+
+		BOOST_TEST(d == dist);
+		BOOST_TEST(so.string() == symm2);
+	}
+}
+
+BOOST_AUTO_TEST_CASE(symm_3bwh_1, *utf::tolerance(0.1f))
+{
+	cif::file f(gTestDir / "3bwh.cif.gz");
+
+	auto &db = f.front();
+
+	cif::crystal c(db);
+	cif::mm::structure s(db);
+
+	for (auto a1 : s.atoms())
+	{
+		for (auto a2 : s.atoms())
+		{
+			if (a1 == a2)
+				continue;
+			
+			const auto&[ d, p, so ] = c.closest_symmetry_copy(a1.get_location(), a2.get_location());
+
+			BOOST_TEST(d == distance(a1.get_location(), p));
+		}
+	}
+}
+
+BOOST_AUTO_TEST_CASE(volume_3bwh_1, *utf::tolerance(0.1f))
+{
+	cif::file f(gTestDir / "1juh.cif.gz");
+
+	auto &db = f.front();
+
+	cif::crystal c(db);
+
+	BOOST_CHECK_EQUAL(c.get_cell().get_volume(), 741009.625f);
+}
+
--- a/test/unit-v2-test.cpp
+++ b/test/unit-v2-test.cpp
@@ -31,8 +31,8 @@

 #include <cif++.hpp>

-#include <cif++/dictionary_parser.hpp>
-#include <cif++/parser.hpp>
+#include "cif++/dictionary_parser.hpp"
+

 namespace tt = boost::test_tools;

@@ -75,6 +75,30 @@ bool init_unit_test()

 // --------------------------------------------------------------------

+BOOST_AUTO_TEST_CASE(id_1)
+{
+	BOOST_TEST(cif::cif_id_for_number(0) == "A");
+	BOOST_TEST(cif::cif_id_for_number(25) == "Z");
+	BOOST_TEST(cif::cif_id_for_number(26) == "AA");
+	BOOST_TEST(cif::cif_id_for_number(26 + 1) == "AB");
+
+	BOOST_TEST(cif::cif_id_for_number(26 + 26 * 26 - 1) == "ZZ");
+	BOOST_TEST(cif::cif_id_for_number(26 + 26 * 26) == "AAA");
+	BOOST_TEST(cif::cif_id_for_number(26 + 26 * 26 + 1) == "AAB");
+
+	std::set<std::string> testset;
+
+	for (int i = 0; i < 100000; ++i)
+	{
+		std::string id = cif::cif_id_for_number(i);
+		BOOST_TEST(testset.count(id) == 0);
+		testset.insert(id);
+	}
+	BOOST_TEST(testset.size() == 100000);
+}
+
+// --------------------------------------------------------------------
+
 BOOST_AUTO_TEST_CASE(cc_1)
 {
 	std::tuple<std::string_view, float, char> tests[] = {
@@ -126,6 +150,31 @@ BOOST_AUTO_TEST_CASE(cc_2)
 	}
 }

+BOOST_AUTO_TEST_CASE(cc_3)
+{
+	cif::category c("foo");
+	c.emplace({
+		{ "f-1", 1 },
+		{ "f-2", "-1" },
+		{ "f-3", "+1" },
+		{ "f-4", " 1" },
+		{ "f-5", " +1" },
+		{ "f-6", "1 " },
+	});
+
+	auto row = c.front();
+	BOOST_CHECK_EQUAL(row["f-1"].as<int>(), 1);
+	BOOST_CHECK_EQUAL(row["f-2"].as<int>(), -1);
+	BOOST_CHECK_EQUAL(row["f-3"].as<int>(), 1);
+
+	// BOOST_CHECK_THROW(row["f-4"].as<int>(), std::exception);
+	// BOOST_CHECK_THROW(row["f-5"].as<int>(), std::exception);
+	// BOOST_CHECK_THROW(row["f-6"].as<int>(), std::exception);
+	BOOST_CHECK_EQUAL(row["f-4"].as<int>(), 0);
+	BOOST_CHECK_EQUAL(row["f-5"].as<int>(), 0);
+	BOOST_CHECK_EQUAL(row["f-6"].as<int>(), 0);
+}
+
 BOOST_AUTO_TEST_CASE(item_1)
 {
 	using namespace cif;
@@ -155,6 +204,26 @@ BOOST_AUTO_TEST_CASE(item_1)
 	BOOST_CHECK(ci3.empty());
 }

+BOOST_AUTO_TEST_CASE(item_2)
+{
+	using namespace cif;
+
+	cif::item i0("test1");
+	BOOST_CHECK(i0.value() == ".");
+
+	cif::item i1("test1", std:: optional<float>());
+	BOOST_CHECK(i1.value() == "?");
+
+	cif::item i2("test1", std::make_optional<float>(1));
+	BOOST_CHECK(i2.value() == "1");
+
+	cif::item i3("test1", std::optional<float>(), 2);
+	BOOST_CHECK(i3.value() == "?");
+
+	cif::item i4("test1", std::make_optional<float>(1), 2);
+	BOOST_CHECK(i4.value() == "1.00");
+}
+
 // --------------------------------------------------------------------

 BOOST_AUTO_TEST_CASE(r_1)
@@ -1841,6 +1910,15 @@ _test.name
 	BOOST_TEST(db["test"].find_first<int>(cif::key("id") == 1, "id") == 1);
 	BOOST_TEST(db["test"].find_first<int>(cif::all(), "id") == 1);

+	std::optional<int> v;
+
+	v = db["test"].find_first<std::optional<int>>(cif::key("id") == 1, "id");
+	BOOST_TEST(v.has_value());
+	BOOST_TEST(*v == 1);
+
+	v = db["test"].find_first<std::optional<int>>(cif::key("id") == 6, "id");
+	BOOST_TEST(not v.has_value());
+
 	// find1 tests
 	BOOST_TEST(db["test"].find1<int>(cif::key("id") == 1, "id") == 1);
 	BOOST_CHECK_THROW(db["test"].find1<int>(cif::all(), "id"), cif::multiple_results_error);
@@ -1858,7 +1936,7 @@ BOOST_AUTO_TEST_CASE(r1)
 	    of pdbx_nonpoly_scheme which itself is a parent of pdbx_entity_nonpoly. If I want to rename a residue
 	    I cannot update pdbx_nonpoly_scheme since changing a parent changes children, but not vice versa.

-	    But if I change the comp_id in atom_site, the pdbx_nonpoly_scheme is update, that's good, and then
+	    But if I change the comp_id in atom_site, the pdbx_nonpoly_scheme is updated, that's good, and then
 	    pdbx_entity_nonpoly is updated and that's bad.

 	    The idea is now that if we update a parent and a child that must change as well, we first check
@@ -2144,6 +2222,228 @@ _cat_3.num
 	// f.save(std::cout);
 }

+BOOST_AUTO_TEST_CASE(pc_1)
+{
+	/*
+	    Parent/child tests
+
+		Note that the dictionary is different than the one in test r1
+	*/
+
+	const char dict[] = R"(
+data_test_dict.dic
+    _datablock.id	test_dict.dic
+    _datablock.description
+;
+    A test dictionary
+;
+    _dictionary.title           test_dict.dic
+    _dictionary.datablock_id    test_dict.dic
+    _dictionary.version         1.0
+
+     loop_
+    _item_type_list.code
+    _item_type_list.primitive_code
+    _item_type_list.construct
+               code      char
+               '[][_,.;:"&<>()/\{}'`~!@#$%A-Za-z0-9*|+-]*'
+
+               text      char
+               '[][ \n\t()_,.;:"&<>/\{}'`~!@#$%?+=*A-Za-z0-9|^-]*'
+
+               int       numb
+               '[+-]?[0-9]+'
+
+save_cat_1
+    _category.description     'A simple test category'
+    _category.id              cat_1
+    _category.mandatory_code  no
+    _category_key.name        '_cat_1.id'
+    save_
+
+save__cat_1.id
+    _item.name                '_cat_1.id'
+    _item.category_id         cat_1
+    _item.mandatory_code      yes
+    _item_linked.child_name   '_cat_2.parent_id'
+    _item_linked.parent_name  '_cat_1.id'
+    _item_type.code           int
+    save_
+
+save__cat_1.name
+    _item.name                '_cat_1.name'
+    _item.category_id         cat_1
+    _item.mandatory_code      yes
+    _item_type.code           code
+    save_
+
+save__cat_1.desc
+    _item.name                '_cat_1.desc'
+    _item.category_id         cat_1
+    _item.mandatory_code      yes
+    _item_type.code           text
+    save_
+
+save_cat_2
+    _category.description     'A second simple test category'
+    _category.id              cat_2
+    _category.mandatory_code  no
+    _category_key.name        '_cat_2.id'
+    save_
+
+save__cat_2.id
+    _item.name                '_cat_2.id'
+    _item.category_id         cat_2
+    _item.mandatory_code      yes
+    _item_type.code           int
+    save_
+
+save__cat_2.name
+    _item.name                '_cat_2.name'
+    _item.category_id         cat_2
+    _item.mandatory_code      yes
+    _item_type.code           code
+    save_
+
+save__cat_2.num
+    _item.name                '_cat_2.num'
+    _item.category_id         cat_2
+    _item.mandatory_code      yes
+    _item_type.code           int
+    save_
+
+save__cat_2.desc
+    _item.name                '_cat_2.desc'
+    _item.category_id         cat_2
+    _item.mandatory_code      yes
+    _item_type.code           text
+    save_
+
+save_cat_3
+    _category.description     'A third simple test category'
+    _category.id              cat_3
+    _category.mandatory_code  no
+    _category_key.name        '_cat_3.id'
+    save_
+
+save__cat_3.id
+    _item.name                '_cat_3.id'
+    _item.category_id         cat_3
+    _item.mandatory_code      yes
+    _item_type.code           int
+    save_
+
+save__cat_3.name
+    _item.name                '_cat_3.name'
+    _item.category_id         cat_3
+    _item.mandatory_code      yes
+    _item_type.code           code
+    save_
+
+save__cat_3.num
+    _item.name                '_cat_3.num'
+    _item.category_id         cat_3
+    _item.mandatory_code      yes
+    _item_type.code           int
+    save_
+
+loop_
+_pdbx_item_linked_group_list.parent_category_id
+_pdbx_item_linked_group_list.link_group_id
+_pdbx_item_linked_group_list.parent_name
+_pdbx_item_linked_group_list.child_name
+_pdbx_item_linked_group_list.child_category_id
+cat_1 1 '_cat_1.name' '_cat_2.name' cat_2
+cat_2 1 '_cat_2.name' '_cat_3.name' cat_3
+cat_2 1 '_cat_2.num'  '_cat_3.num'  cat_3
+
+    )";
+
+	struct membuf : public std::streambuf
+	{
+		membuf(char *text, size_t length)
+		{
+			this->setg(text, text, text + length);
+		}
+	} buffer(const_cast<char *>(dict), sizeof(dict) - 1);
+
+	std::istream is_dict(&buffer);
+
+	auto validator = cif::parse_dictionary("test", is_dict);
+
+	cif::file f;
+	f.set_validator(&validator);
+
+	// --------------------------------------------------------------------
+
+	const char data[] = R"(
+data_test
+loop_
+_cat_1.id
+_cat_1.name
+_cat_1.desc
+1 aap  Aap
+2 noot Noot
+3 mies Mies
+
+loop_
+_cat_2.id
+_cat_2.name
+_cat_2.num
+_cat_2.desc
+1 aap  1 'Een dier'
+2 aap  2 'Een andere aap'
+3 noot 1 'walnoot bijvoorbeeld'
+
+loop_
+_cat_3.id
+_cat_3.name
+_cat_3.num
+1 aap 1
+2 aap 2
+    )";
+
+	using namespace cif::literals;
+
+	struct data_membuf : public std::streambuf
+	{
+		data_membuf(char *text, size_t length)
+		{
+			this->setg(text, text, text + length);
+		}
+	} data_buffer(const_cast<char *>(data), sizeof(data) - 1);
+
+	std::istream is_data(&data_buffer);
+	f.load(is_data);
+
+	auto &cat1 = f.front()["cat_1"];
+	auto &cat2 = f.front()["cat_2"];
+	auto &cat3 = f.front()["cat_3"];
+
+	// some parent/child tests
+
+	// find all children in cat2 for the row with id == 1 in cat1
+	auto rs1 = cat1.get_children(cat1.find1("id"_key == 1), cat2);
+	BOOST_TEST(rs1.size() == 2);
+
+	auto rs2 = cat1.get_children(cat1.find1("id"_key == 2), cat2);
+	BOOST_TEST(rs2.size() == 1);
+
+	auto rs3 = cat1.get_children(cat1.find1("id"_key == 3), cat2);
+	BOOST_TEST(rs3.size() == 0);
+
+	// finding parents
+	auto rs4 = cat2.get_parents(cat2.find1("id"_key == 1), cat1);
+	BOOST_TEST(rs4.size() == 1);
+
+	auto rs5 = cat3.get_parents(cat3.find1("id"_key == 1), cat2);
+	BOOST_TEST(rs5.size() == 1);
+
+	// This link is not defined:
+	auto rs6 = cat3.get_parents(cat3.find1("id"_key == 1), cat1);
+	BOOST_TEST(rs6.size() == 0);
+}
+
 // --------------------------------------------------------------------

 // BOOST_AUTO_TEST_CASE(bondmap_1)
@@ -2357,8 +2657,6 @@ _test.text ??

 BOOST_AUTO_TEST_CASE(output_test_1)
 {
-	cif::VERBOSE = 5;
-
 	auto data1 = R"(
 data_Q
 loop_
@@ -2863,7 +3161,7 @@ save__cat_1.name

 	std::istream is_dict(&buffer);

-	auto validator = cif::parse_dictionary("test_dict.dic", is_dict);
+	auto &validator = cif::validator_factory::instance().construct_validator("test_dict.dic", is_dict);

 	cif::file f;
 	f.set_validator(&validator);
@@ -2901,8 +3199,6 @@ _cat_1.name
 	ss << f;

 	cif::file f2(ss);
-
-	f2.set_validator(&validator);
 	BOOST_ASSERT(f2.is_valid());

 	auto &audit_conform = f2.front()["audit_conform"];
@@ -3105,3 +3401,85 @@ _date    today
 	BOOST_TEST(db == db2);
 }

+BOOST_AUTO_TEST_CASE(find1_opt_1)
+{
+	using namespace cif::literals;
+	using namespace std::literals;
+
+	auto f = R"(data_TEST
+#
+loop_
+_test.id
+_test.name
+_test.value
+1 aap   1.0
+2 noot  1.1
+3 mies  1.2
+    )"_cf;
+
+	auto &db = f.front();
+	auto &test = db["test"];
+
+	auto v = test.find1<std::optional<float>>("id"_key == 1, "value");
+	BOOST_CHECK(v.has_value());
+	BOOST_TEST(*v == 1.0f);
+
+	v = test.find1<std::optional<float>>("id"_key == 4, "value");
+	BOOST_CHECK(v.has_value() == false);
+}
+
+// --------------------------------------------------------------------
+
+BOOST_AUTO_TEST_CASE(compound_test_1)
+{
+	cif::compound_factory::instance().push_dictionary(gTestDir / "REA_v2.cif");
+	auto compound = cif::compound_factory::instance().create("REA_v2");
+	BOOST_ASSERT(compound != nullptr);
+	BOOST_CHECK(compound->id() == "REA_v2");
+}
+
+// --------------------------------------------------------------------
+
+BOOST_AUTO_TEST_CASE(pdb_parser_test_1)
+{
+	char k1CBS[] = R"(HEADER    RETINOIC-ACID TRANSPORT                 28-SEP-94   1CBS
+TITLE     CRYSTAL STRUCTURE OF CELLULAR RETINOIC-ACID-BINDING
+TITLE    2 PROTEINS I AND II IN COMPLEX WITH ALL-TRANS-RETINOIC ACID
+TITLE    3 AND A SYNTHETIC RETINOID
+COMPND    MOL_ID: 1;
+COMPND   2 MOLECULE: CELLULAR RETINOIC ACID BINDING PROTEIN TYPE II;
+COMPND   3 CHAIN: A;
+COMPND   4 ENGINEERED: YES
+SOURCE    MOL_ID: 1;
+SOURCE   2 ORGANISM_SCIENTIFIC: HOMO SAPIENS;
+SOURCE   3 ORGANISM_COMMON: HUMAN;
+SOURCE   4 ORGANISM_TAXID: 9606;
+SOURCE   5 CELL_LINE: BL21;
+SOURCE   6 GENE: HUMAN CRABP-II;
+SOURCE   7 EXPRESSION_SYSTEM: ESCHERICHIA COLI BL21(DE3);
+SOURCE   8 EXPRESSION_SYSTEM_TAXID: 469008;
+SOURCE   9 EXPRESSION_SYSTEM_STRAIN: BL21 (DE3);
+SOURCE  10 EXPRESSION_SYSTEM_PLASMID: PET-3A
+KEYWDS    RETINOIC-ACID TRANSPORT
+EXPDTA    X-RAY DIFFRACTION
+AUTHOR    G.J.KLEYWEGT,T.BERGFORS,T.A.JONES
+ATOM      1  N   PRO A   1      16.979  13.301  44.555  1.00 30.05           N
+ATOM      2  CA  PRO A   1      18.150  13.525  43.680  1.00 28.82           C
+ATOM      3  C   PRO A   1      18.656  14.966  43.784  1.00 26.59           C
+ATOM      4  O   PRO A   1      17.890  15.889  44.078  1.00 26.84           O
+ATOM      5  CB  PRO A   1      17.678  13.270  42.255  1.00 29.24           C
+ATOM      6  CG  PRO A   1      16.248  13.734  42.347  1.00 29.29           C
+ATOM      7  CD  PRO A   1      15.762  13.216  43.724  1.00 30.71           C)";
+
+	struct membuf : public std::streambuf
+	{
+		membuf(char *text, size_t length)
+		{
+			this->setg(text, text, text + length);
+		}
+	} buffer(k1CBS, sizeof(k1CBS) - 1);
+
+	std::istream is(&buffer);
+
+	auto f = cif::pdb::read(is);
+}
Author	SHA1	Message	Date
Maarten L. Hekkelman	b3dc38f2d8	update for WIN32	2023-11-02 14:12:17 +01:00
Maarten L. Hekkelman	6044d3dce3	Added cif::cell::get_volume()	2023-10-19 11:58:21 +02:00
Maarten L. Hekkelman	29446f2122	new cif::item constructors version bump	2023-10-19 09:51:10 +02:00
Maarten L. Hekkelman	abb8673549	Better support for older cmake versions	2023-10-17 15:24:21 +02:00
Maarten L. Hekkelman	ffc8f9dcdf	Better support for older cmake versions	2023-10-17 15:22:35 +02:00
Maarten L. Hekkelman	288b2bb720	update changelog	2023-10-17 12:23:59 +02:00
Maarten L. Hekkelman	fb3b7bda68	made data dir options more visible in cmake config better error reporting in file::load	2023-10-10 13:39:13 +02:00
Maarten L. Hekkelman	6d5efe1cbd	Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk	2023-09-26 14:40:44 +02:00
Maarten L. Hekkelman	1ceec22184	Better conversion from string to int	2023-09-26 14:40:38 +02:00
Maarten L. Hekkelman	951ff9b953	Better conversion from string to int	2023-09-26 14:39:26 +02:00
Maarten L. Hekkelman	641f06a7e7	sqrt is not constexpr on macOS	2023-09-22 09:37:15 +02:00
Maarten L. Hekkelman	915ba4ac21	describe download CCD	2023-09-18 10:49:08 +02:00
Maarten L. Hekkelman	824637d83f	Update README.md, add link to documentation	2023-09-15 08:56:34 +02:00
Maarten L. Hekkelman	0871406fe3	Eigen dependency removed for clients Typos fixed Version bump	2023-09-14 16:03:00 +02:00
Maarten L. Hekkelman	1ad7e47b2e	version bump, changelog	2023-09-14 12:53:07 +02:00
Maarten L. Hekkelman	f72a2c69d0	rename doxygen target	2023-09-14 11:14:38 +02:00
Maarten L. Hekkelman	84d9275cb8	update versionstring	2023-09-14 11:12:49 +02:00
Maarten L. Hekkelman	1b7c387c8b	reintroduced get_terminal_width as an exported function	2023-09-14 09:52:52 +02:00
Maarten L. Hekkelman	2f1adbd22c	clean up	2023-09-14 09:45:13 +02:00
Maarten L. Hekkelman	65031523a6	Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk	2023-09-14 09:15:07 +02:00
Maarten L. Hekkelman	02cc0fa0f6	Using CTest Create docs/xml directory	2023-09-14 09:13:16 +02:00
Maarten L. Hekkelman	1e1afa023f	Using CTest Create docs/xml directory	2023-09-14 09:12:28 +02:00
Maarten L. Hekkelman	41f343c2cd	Create the docs/xml directory in configure step	2023-09-13 17:01:43 +02:00
Maarten L. Hekkelman	a73c4deaca	Revert changes in CMakeLists.txt for docs	2023-09-13 16:45:15 +02:00
Maarten L. Hekkelman	01a21aebc4	Fix install rules	2023-09-13 16:16:16 +02:00
Maarten L. Hekkelman	49434043f2	typo	2023-09-13 16:04:30 +02:00
Maarten L. Hekkelman	2e23877912	cleaning up a readthedocs.yaml file	2023-09-13 15:23:35 +02:00
Maarten L. Hekkelman	b737dd7df4	Even more documentation	2023-09-13 14:59:57 +02:00
Maarten L. Hekkelman	137ffaf768	Merge remote-tracking branch 'github/new-docs' into develop	2023-09-13 12:15:13 +02:00
Maarten L. Hekkelman	747c6d30d2	Added better support for std::optional in conditions	2023-09-13 12:14:04 +02:00
Maarten L. Hekkelman	4585968b11	fix point_type	2023-09-13 10:23:31 +02:00
Maarten L. Hekkelman	84af564aee	More documentation Version bump	2023-09-13 10:22:28 +02:00
Maarten L. Hekkelman	86d957675e	documented last code	2023-09-12 10:25:18 +02:00
Maarten L. Hekkelman	bc33e608db	better docs	2023-09-12 10:04:34 +02:00
Maarten L. Hekkelman	0b5d28338e	replacing std::endl where appropriate more docs	2023-09-12 09:06:45 +02:00
Maarten L. Hekkelman	bdbf22e705	more docs	2023-09-11 15:12:37 +02:00
Maarten L. Hekkelman	160f6016ee	more docs	2023-09-11 14:31:13 +02:00
Maarten L. Hekkelman	0855965edf	Documenting more Fixed colouring output manipulators	2023-09-11 12:37:51 +02:00
Maarten L. Hekkelman	fe3cbdab78	refactoring PDB interface	2023-09-11 08:37:18 +02:00
Maarten L. Hekkelman	a8c25f910d	new colouring of output	2023-09-08 10:22:08 +02:00
Maarten L. Hekkelman	cb82ec9b01	new colouring of output, first attempt	2023-09-08 09:23:01 +02:00
Maarten L. Hekkelman	e84282cb9a	documenting symmetry and text	2023-09-06 17:01:53 +02:00
Maarten L. Hekkelman	8b2e02e1b0	documented row	2023-09-06 14:29:14 +02:00
Maarten L. Hekkelman	1addd2be89	documented point	2023-09-06 13:49:13 +02:00
Maarten L. Hekkelman	2aebfc29ac	documenting parser	2023-09-06 11:22:17 +02:00
Maarten L. Hekkelman	26a5410b38	documenting model	2023-09-06 10:59:48 +02:00
Maarten L. Hekkelman	f44e6d0948	backup of documentation	2023-09-05 16:43:24 +02:00
Maarten L. Hekkelman	d496ebf6dd	backup of documentation	2023-09-05 15:40:53 +02:00
Maarten L. Hekkelman	1719ed6979	backup of documentation	2023-09-05 15:07:32 +02:00
Maarten L. Hekkelman	821895bb1b	backup of documentation	2023-09-05 13:28:09 +02:00
Maarten L. Hekkelman	3f437277d1	accidentally deleted	2023-09-05 11:00:14 +02:00
Maarten L. Hekkelman	39fc56084a	documentation backup	2023-09-05 10:42:11 +02:00
Maarten L. Hekkelman	e2fca07fad	documenting more	2023-09-04 16:04:41 +02:00
Maarten L. Hekkelman	ec0d75ce95	documenting more	2023-09-04 13:41:21 +02:00
Maarten L. Hekkelman	877a64adaa	documenting more	2023-09-04 11:28:33 +02:00
Maarten L. Hekkelman	0fcf9ed5ad	documenting more	2023-09-04 09:44:36 +02:00
Maarten L. Hekkelman	c99de817fa	documentation	2023-09-01 21:52:46 +02:00
Maarten L. Hekkelman	600c86a185	attempt to get some documentation	2023-09-01 17:12:54 +02:00
Maarten L. Hekkelman	1ae755b0a5	Start adding documentation	2023-08-31 14:31:41 +02:00
Maarten L. Hekkelman	7186057dd3	version bump	2023-08-22 13:45:08 +02:00
Maarten L. Hekkelman	768fec9c58	Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop	2023-08-22 13:36:52 +02:00
Maarten L. Hekkelman	7197dd877b	remove pkgconfig support	2023-08-22 13:36:43 +02:00
Maarten L. Hekkelman	b7aa7eac9f	remove pkgconfig support	2023-08-22 13:33:34 +02:00
Maarten L. Hekkelman	818dc2f952	Remove Dart	2023-08-22 13:32:11 +02:00
Maarten L. Hekkelman	9dc5d11829	newer version string code, this should be final	2023-08-16 11:26:20 +02:00
Maarten L. Hekkelman	8565e1b408	Better version string	2023-08-15 13:27:10 +02:00
Maarten L. Hekkelman	bfc7133786	fix config better version string implementation	2023-08-15 09:49:04 +02:00
Maarten L. Hekkelman	15a49f1bb4	Fix uncompressing concatenated gzip files	2023-08-04 09:51:11 +02:00
Maarten L. Hekkelman	db1dff16fe	update changelog	2023-08-03 10:20:40 +02:00
Maarten L. Hekkelman	8d7d9d3a31	Fix for PDB files that do not terminate their last line with a new line character	2023-08-03 10:19:12 +02:00
Maarten L. Hekkelman	078bf8a559	stricter code	2023-07-31 10:48:47 +02:00
Maarten L. Hekkelman	1f314a5e9b	removing compiler warnings on MSVC	2023-07-19 15:49:26 +02:00
Maarten L. Hekkelman	0adb50ac01	Add dependency on Eigen3 in config	2023-07-19 14:21:43 +02:00
Maarten L. Hekkelman	d91707cd06	Link to Eigen3	2023-07-19 14:11:22 +02:00
Maarten L. Hekkelman	c0e7ee4eeb	small stuff	2023-07-18 15:25:44 +02:00
Pino Toscano	c143a7223e	build: fix installation of cron script on GNU/Hurd (#46 ) Use the same Linux paths, as the cron implementations available on the Hurd as usually the same as Linux.	2023-07-16 12:01:17 +02:00
Maarten L. Hekkelman	2c951ba146	prevent downloading components.cif if it already exists	2023-06-20 13:47:13 +02:00
Maarten L. Hekkelman	660aadcd9c	conditional include <compare>	2023-06-20 11:11:00 +02:00
Maarten L. Hekkelman	91d6adb980	Version bump	2023-06-20 09:48:27 +02:00
Maarten L. Hekkelman	b79ddd55c5	Update readme	2023-06-20 09:45:46 +02:00
Maarten L. Hekkelman	0ca645c634	right align number, if there was a dictionary loaded containing the required information	2023-06-20 08:48:40 +02:00
Maarten L. Hekkelman	676c0c8dc8	Added include compare for spaceship operator	2023-06-20 08:48:08 +02:00
Maarten L. Hekkelman	5c366ad9b1	- remove three_letter_code for CCP4 dictionaries - fix test for equality of compound ID's, they are case insensitive you know	2023-06-13 11:42:39 +02:00
Maarten L. Hekkelman	836aed6ea9	Fix includes to contain <cstdint>	2023-06-08 13:15:43 +02:00
Maarten L. Hekkelman	50df250415	Merge branch 'develop' into trunk	2023-06-08 10:12:03 +02:00
Maarten L. Hekkelman	2409fc5b7b	update changelog, version bump	2023-06-08 10:10:49 +02:00
Maarten L. Hekkelman	8a1184a24c	Fix cif_id_for_number	2023-06-07 19:11:20 +02:00
Maarten L. Hekkelman	d2fbc54765	New cache location	2023-06-07 14:07:27 +02:00
Maarten L. Hekkelman	1bcb26ba75	extend validator faster unique_id	2023-06-07 13:08:36 +02:00
Maarten L. Hekkelman	32f4749d84	faster cif parser	2023-06-07 11:19:35 +02:00
Maarten L. Hekkelman	da12be879a	progress_bar consuming too much time	2023-06-07 09:15:17 +02:00
Maarten L. Hekkelman	94a38ad4e8	Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop	2023-06-06 14:31:26 +02:00
Maarten L. Hekkelman	20ef79a172	for c++17, limited version of std::string_view	2023-06-06 14:30:11 +02:00
Maarten L. Hekkelman	92bf25476e	Speed improvements	2023-06-06 14:12:21 +02:00
Maarten L. Hekkelman	b55e074dd7	reserve some token buffer space	2023-06-06 09:33:31 +02:00
Maarten L. Hekkelman	7b654a837d	with reserved words automaton	2023-06-06 09:22:55 +02:00
Maarten L. Hekkelman	ae9d247d22	optimised the parser a bit	2023-06-05 13:43:31 +02:00
Maarten L. Hekkelman	16b7deafe8	Better is_unquoted_string test	2023-06-02 17:09:57 +02:00
Maarten L. Hekkelman	f2cfe28458	Update README	2023-05-31 15:56:50 +02:00
Maarten L. Hekkelman	2e8a52949e	Update example and README	2023-05-31 15:54:53 +02:00
Maarten L. Hekkelman	441e142767	Update readme	2023-05-31 15:42:54 +02:00
Maarten L. Hekkelman	bf9bdd2aae	Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk	2023-05-31 15:17:00 +02:00
Maarten L. Hekkelman	ce14593f0b	improved loading resources from absolute path better error reporting when loading dictionary	2023-05-31 15:16:10 +02:00
Maarten L. Hekkelman	1c02a451e1	improved has_atom_id added couple of comparison operators to sym_op class	2023-05-16 13:55:07 +02:00
Maarten L. Hekkelman	448855a2d3	catch error in create entity for branch	2023-05-09 11:46:37 +02:00
Maarten L. Hekkelman	8ac8e89f2b	Fix progress bar by removing conditional variable	2023-05-02 13:45:02 +02:00
Maarten L. Hekkelman	2281f59401	Remove struct_conn records as well in remove_branch	2023-05-02 13:44:36 +02:00
Maarten L. Hekkelman	4cb0673370	small change to matrix	2023-04-25 10:13:30 +02:00
Maarten L. Hekkelman	76c5706f7c	moving to eigen3 eigensolver, fixing include and dependencies	2023-04-22 14:14:48 +02:00
Maarten L. Hekkelman	2bf4284ff4	cleanup	2023-04-21 14:52:12 +02:00
Maarten L. Hekkelman	d9e2fc97f3	Added missing spinner test	2023-04-21 14:50:22 +02:00
Maarten L. Hekkelman	85dfdf4174	Better progress bar	2023-04-21 14:49:54 +02:00
Martin Salinas	1bede3efda	Removed unused argument warning (#36 ) As argument rhs is not being used in that equals (should the equals function always return false), I added that flag so the compiler skips that warning.	2023-04-21 09:18:47 +02:00
Maarten L. Hekkelman	505f0fdd31	oops	2023-04-20 16:33:24 +02:00
Maarten L. Hekkelman	eed7ec3a4a	Merge branch 'develop' of github.com:PDB-REDO/libcifpp into develop	2023-04-20 13:38:48 +02:00
Maarten L. Hekkelman	fdb057e0e2	for now, require eigen3 add inverse_symmetry_copy to crystal	2023-04-20 13:14:52 +02:00
Maarten L. Hekkelman	3fddd1a628	Using quaternions, when possible	2023-04-20 11:37:36 +02:00
Maarten L. Hekkelman	2440706b87	backup	2023-04-19 18:51:41 +02:00
Maarten L. Hekkelman	cf628fa95c	backup	2023-04-19 18:36:33 +02:00
Maarten L. Hekkelman	2b0b47d20d	Fix special case	2023-04-19 16:04:59 +02:00
Maarten L. Hekkelman	a8abf2804f	attempt to use quaternions	2023-04-19 16:01:52 +02:00
Maarten L. Hekkelman	22d7757949	Introduced cif::crystal	2023-04-19 10:17:38 +02:00
Maarten L. Hekkelman	0b0d170c96	a bit of documentation	2023-04-19 09:57:49 +02:00
Maarten L. Hekkelman	1e8e9adf62	Merge branch 'trunk' into develop	2023-04-19 09:22:49 +02:00
Maarten L. Hekkelman	0f03fc31e0	added required include	2023-04-19 09:22:32 +02:00
Maarten L. Hekkelman	518432e0fb	test data	2023-04-17 20:54:57 +02:00
Maarten L. Hekkelman	10ef3464ef	Fix symmetry issue	2023-04-17 20:52:10 +02:00
Maarten L. Hekkelman	226abbd577	Merge branch 'develop' of s4.hekkelman.net:git-repo/libcifpp into develop	2023-04-17 18:56:46 +02:00
Maarten L. Hekkelman	8d66f42ab1	more test cases	2023-04-17 18:56:02 +02:00
Maarten L. Hekkelman	0f14d06f9a	Added inverse symmetry operation	2023-04-14 19:38:39 +02:00
Maarten L. Hekkelman	c53be78496	symmetry fixes	2023-04-14 19:04:16 +02:00
Maarten L. Hekkelman	a38f31ce48	fix closest_symmetry_copy	2023-04-14 17:56:59 +02:00
Maarten L. Hekkelman	1258bd5047	eigen, fixed	2023-04-14 14:08:52 +02:00
Maarten L. Hekkelman	d25cbeb14c	matrix eigen value work	2023-04-14 11:47:18 +02:00
Maarten L. Hekkelman	9b60a07fb6	calculating eigen values	2023-04-13 19:55:32 +02:00
Maarten L. Hekkelman	c0dd41ce50	added inverse symmetry operation	2023-04-13 15:49:15 +02:00
Maarten L. Hekkelman	4cff92bbcc	symmetry operations now working correctly	2023-04-13 11:42:59 +02:00
Maarten L. Hekkelman	9aa8a223c7	symmetry work	2023-04-12 17:00:09 +02:00
Maarten L. Hekkelman	fb59adcfdd	Fix symmetry rotational numbers	2023-04-12 10:59:23 +02:00
Maarten L. Hekkelman	4acca8a3e3	Merge branch 'trunk' into develop	2023-04-07 09:31:11 +02:00
Maarten L. Hekkelman	c1030d2b08	Merge branch 'MartinSalinas98-trunk' into trunk	2023-04-07 09:26:00 +02:00
Maarten L. Hekkelman	16a185c6c0	More include changes	2023-04-07 09:16:38 +02:00
Maarten L. Hekkelman	174e818bd0	Merge branch 'trunk' of github.com:MartinSalinas98/libcifpp into MartinSalinas98-trunk	2023-04-07 08:43:44 +02:00
Maarten L. Hekkelman	7f829bf5df	Merge branch 'trunk' of github.com:PDB-REDO/libcifpp into trunk	2023-04-07 08:43:01 +02:00
Maarten L. Hekkelman	71908282bb	merged from trunk	2023-04-07 08:42:46 +02:00
MartinSalinas98	db3ae446af	Imported local files with relative path	2023-04-07 03:39:02 +02:00
Martin Salinas	bc7d291307	Merge branch 'PDB-REDO:trunk' into trunk	2023-04-07 03:33:57 +02:00
Maarten L. Hekkelman	cfd4702279	Fix memory leak	2023-04-05 20:46:18 +02:00
Maarten L. Hekkelman	54eefb546d	Fix memory leak	2023-04-05 20:44:47 +02:00
Maarten L. Hekkelman	6af0d96a4e	Fix memory leak in category	2023-04-05 20:28:47 +02:00
Maarten L. Hekkelman	eb50bee4a3	atom_type_traits changes	2023-04-04 19:19:30 +02:00
Maarten L. Hekkelman	b6143f3652	optimise load atom data	2023-03-30 20:49:03 +02:00
Maarten L. Hekkelman	348aa7afb6	fix test (use gTestDir)	2023-03-30 20:48:41 +02:00
Martin Salinas	66912b68cc	Commented unused variable ``` /home/msalinas/Documents/standalone-installations/cifParsers/libcifpp/src/pdb/cif2pdb.cpp: In function ‘std::tuple<int, int> cif::pdb::WriteCoordinatesForModel(std::ostream&, const cif::datablock&, const std::map<std::__cxx11::basic_string<char>, std::tuple<std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > > >&, std::set<std::__cxx11::basic_string<char> >&, int)’: /home/msalinas/Documents/standalone-installations/cifParsers/libcifpp/src/pdb/cif2pdb.cpp:3362:15: warning: unused variable ‘pdbx_nonpoly_scheme’ [-Wunused-variable] 3362 \| auto &pdbx_nonpoly_scheme = db["pdbx_nonpoly_scheme"]; \| ``` This warning appeared while compiling the library. The use of that variable below has been commented, so I think it's appropiate to do the same thing with the unused variable.	2023-03-28 13:54:45 +02:00
Maarten L. Hekkelman	84dd218758	Merge branch 'MartinSalinas98-patch-1' into trunk	2023-03-28 11:33:05 +02:00
Maarten L. Hekkelman	106ae38976	Update readme	2023-03-28 11:32:11 +02:00
Maarten L. Hekkelman	f1a52245ea	Merge branch 'patch-1' of github.com:MartinSalinas98/libcifpp into MartinSalinas98-patch-1	2023-03-28 11:27:48 +02:00
Maarten L. Hekkelman	cea38e5bb2	Merge branch 'trunk' into develop	2023-03-28 10:31:24 +02:00
Maarten L. Hekkelman	ed5aac358c	libcifpp really requires zlib, not only private.	2023-03-28 10:27:58 +02:00
Maarten L. Hekkelman	5eb128251e	Added category::find1<std::optional>	2023-03-27 10:36:47 +02:00
Maarten L. Hekkelman	cfa46ec954	Added model::has_atom_id	2023-03-23 14:32:21 +01:00
Martin Salinas	07cc60e264	Fixed installation commands Installation commands in the readme cause an error when running last command `cmake --install .` because of the lack of sudo privileges. The following commands don't require sudo to run successfully and install the library.	2023-03-23 11:42:10 +01:00
Maarten L. Hekkelman	90973dc547	version bump, update changelog	2023-03-22 12:39:16 +01:00
Maarten L. Hekkelman	12e3d71b00	fix construct_from_angle_axis	2023-03-21 19:49:21 +01:00
Maarten L. Hekkelman	9addc8f873	fix remove_atom add create_water	2023-03-21 19:49:11 +01:00
Maarten L. Hekkelman	343465cef0	Added test for create_non_poly with initializers	2023-03-08 16:00:40 +01:00
Maarten L. Hekkelman	bec5159415	residue numbering in pdb, again...	2023-02-14 08:28:40 +01:00
Maarten L. Hekkelman	f8da8360e6	write twin info in pdb format	2023-02-14 08:27:13 +01:00
Maarten L. Hekkelman	fb2ad7b75d	Fix in REMARK3 parser for more strict mmcif_pdbx dictionary	2023-02-10 16:24:53 +01:00