for write_data_files=off

added option to not write data files
2026-06-07 15:54:22 +08:00 · 2024-01-31 10:55:08 +01:00 · 2024-01-31 10:37:02 +01:00
28 changed files with 790 additions and 2700 deletions
--- a/.github/workflows/build-documentation.yml
+++ b/.github/workflows/build-documentation.yml
@@ -1,65 +0,0 @@
-# This starter workflow is for a CMake project running on multiple platforms. There is a different starter workflow if you just want a single platform.
-# See: https://github.com/actions/starter-workflows/blob/main/ci/cmake-single-platform.yml
-name: publish docs
-
-on:
-  push:
-    branches: [ "trunk" ]
-
-permissions:
-  contents: read
-  pages: write
-  id-token: write
-
-concurrency:
-  group: "pages"
-  cancel-in-progress: false
-
-jobs:
-  docs:
-    runs-on: ubuntu-latest
-    steps:
-    - uses: actions/checkout@v1
-
-    - name: Set reusable strings
-      # Turn repeated input strings (such as the build output directory) into step outputs. These step outputs can be used throughout the workflow file.
-      id: strings
-      shell: bash
-      run: |
-        echo "build-output-dir=${{ github.workspace }}/build" >> "$GITHUB_OUTPUT"
-
-    - name: Install dependencies Ubuntu
-      run: sudo apt-get update && sudo apt-get install cmake doxygen
-
-    - uses: actions/setup-python@v4
-      with:
-        python-version: '3.9'
-        cache: 'pip' # caching pip dependencies
-    - run: pip install -r docs/requirements.txt
-
-    - name: Configure CMake
-      run: cmake -S . -B ${{ steps.strings.outputs.build-output-dir }} -DBUILD_DOCUMENTATION=ON -DBUILD_TESTING=OFF
-
-    - name: Run Sphinx
-      run: |
-        cmake --build ${{ steps.strings.outputs.build-output-dir }} --target Sphinx-libcifpp
-        ls -l ${{ steps.strings.outputs.build-output-dir }}
-        ls -l ${{ steps.strings.outputs.build-output-dir }}/docs/sphinx
-
-    - name: Upload artifact
-      uses: actions/upload-pages-artifact@v2
-      with:
-        path: ${{ steps.strings.outputs.build-output-dir }}/docs/sphinx
-
-  deploy:
-    environment:
-      name: github-pages
-      url: ${{ steps.deployment.outputs.page_url }}
-
-    runs-on: ubuntu-latest
-    needs: docs
-
-    steps:
-      - name: Deploy to GitHub Pages
-        id: deployment
-        uses: actions/deploy-pages@v2
--- a/.github/workflows/cmake-multi-platform.yml
+++ b/.github/workflows/cmake-multi-platform.yml
@@ -45,15 +45,13 @@ jobs:
        cmake -B ${{ steps.strings.outputs.build-output-dir }}
        -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }}
        -DCMAKE_BUILD_TYPE=Release
+        -DCIFPP_DOWNLOAD_CCD=OFF
        -S ${{ github.workspace }}
        
    - name: Build
      run: cmake --build ${{ steps.strings.outputs.build-output-dir }} --config Release
-      
+
    - name: Test
      working-directory: ${{ steps.strings.outputs.build-output-dir }}
      run: ctest --build-config Release --output-on-failure

-    - name: Install
-      if: matrix.os != 'windows-latest'
-      run: sudo cmake --install ${{ steps.strings.outputs.build-output-dir }} --config Release
--- a/.gitignore
+++ b/.gitignore
@@ -2,7 +2,7 @@ build/
 .vscode/
 .vs/
 tools/update-libcifpp-data
-rsrc/components.cif*
+data/components.cif*
 CMakeSettings.json
 msvc/
 src/revision.hpp
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,24 +11,21 @@
 # this list of conditions and the following disclaimer in the documentation
 # and/or other materials provided with the distribution.

-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
+# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 cmake_minimum_required(VERSION 3.16)

 # set the project name
-project(
-	libcifpp
-	VERSION 6.1.0
-	LANGUAGES CXX)
+project(libcifpp VERSION 6.0.0 LANGUAGES CXX)

 list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

@@ -53,9 +50,7 @@ if(CMAKE_GENERATOR STREQUAL "Ninja Multi-Config")
 endif()

 if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
-	set(CMAKE_CXX_FLAGS
-		"${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers"
-	)
+	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra -Wno-unused-parameter -Wno-missing-field-initializers")
 elseif(MSVC)
 	set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4")
 endif()
@@ -71,26 +66,28 @@ if(NOT(BUILD_FOR_CCP4 AND WIN32))
 	option(BUILD_SHARED_LIBS "Build a shared library instead of a static one" OFF)
 endif()

-if(BUILD_FOR_CCP4)
-	unset(CIFPP_DOWNLOAD_CCD)
-	unset(CIFPP_INSTALL_UPDATE_SCRIPT)
-else()
-	# Lots of code depend on the availability of the components.cif file
-	option(CIFPP_DOWNLOAD_CCD
-		"Download the CCD file components.cif during installation" ON)
+# Lots of code depend on the availability of the components.cif file
+option(CIFPP_DOWNLOAD_CCD "Download the CCD file components.cif during installation" ON)

-	# An optional cron script can be installed to keep the data files up-to-date
-	if(UNIX AND NOT APPLE)
-		option(CIFPP_INSTALL_UPDATE_SCRIPT
-			"Install the script to update CCD and dictionary files" ON)
-	endif()
+# An optional cron script can be installed to keep the data files up-to-date
+if("${CMAKE_SYSTEM_NAME}" STREQUAL "Linux" AND NOT BUILD_FOR_CCP4)
+	option(CIFPP_INSTALL_UPDATE_SCRIPT "Install the script to update CCD and dictionary files" ON)
 endif()

-# When CCP4 is sourced in the environment, we can recreate the symmetry
-# operations table
-if(EXISTS "$ENV{CCP4}/lib/data/syminfo.lib")
-	option(CIFPP_RECREATE_SYMOP_DATA
-		"Recreate SymOp data table in case it is out of date" ON)
+# Optionally avoid installing data files (requires privileges at inconvenient moments)
+option(WRITE_DATA_FILES "Write data files during installation" ON)
+
+# When CCP4 is sourced in the environment, we can recreate the symmetry operations table
+if(EXISTS "$ENV{CCP4}")
+	if(EXISTS "$ENV{CCP4}/lib/data/syminfo.lib")
+		option(CIFPP_RECREATE_SYMOP_DATA "Recreate SymOp data table in case it is out of date" ON)
+	else()
+		set(CIFPP_RECREATE_SYMOP_DATA OFF)
+		message(WARNING "Symop data table recreation requested, but file syminfo.lib was not found in $ENV{CLIBD}")
+	endif()
+else()
+	set(CIFPP_RECREATE_SYMOP_DATA OFF)
+	message("Not trying to recreate symop_table_data.hpp since CCP4 is not defined")
 endif()

 # CCP4 build
@@ -147,30 +144,23 @@ endif()
 # Libraries

 # Start by finding out if std:regex is usable. Note that the current
-# implementation in GCC is not acceptable, it crashes on long lines. The
-# implementation in libc++ (clang) and MSVC seem to be OK.
-check_cxx_source_compiles(
-	"
+# implementation in GCC is not acceptable, it crashes on long lines.
+# The implementation in libc++ (clang) and MSVC seem to be OK.
+check_cxx_source_compiles("
 #include <iostream>
 #ifndef __GLIBCXX__
 #error
 #endif
-int main(int argc, char *argv[]) { return 0; }"
-	GXX_LIBSTDCPP)
+int main(int argc, char *argv[]) { return 0; }" GXX_LIBSTDCPP)

 if(GXX_LIBSTDCPP)
-	message(
-		STATUS "Testing for known regex bug, since you're using GNU libstdc++")
+	message(STATUS "Testing for known regex bug, since you're using GNU libstdc++")

 	try_run(STD_REGEX_RUNNING STD_REGEX_COMPILING
-		${CMAKE_CURRENT_BINARY_DIR}/test
-		${PROJECT_SOURCE_DIR}/cmake/test-rx.cpp)
+		${CMAKE_CURRENT_BINARY_DIR}/test ${PROJECT_SOURCE_DIR}/cmake/test-rx.cpp)

 	if(STD_REGEX_RUNNING STREQUAL FAILED_TO_RUN)
-		message(
-			STATUS
-			"You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will use boost::regex instead"
-		)
+		message(STATUS "You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will use boost::regex instead")

 		find_package(Boost 1.80 QUIET COMPONENTS regex)

@@ -180,7 +170,8 @@ if(GXX_LIBSTDCPP)
 			FetchContent_Declare(
 				boost-rx
 				GIT_REPOSITORY https://github.com/boostorg/regex
-				GIT_TAG boost-1.83.0)
+				GIT_TAG boost-1.83.0
+			)

 			FetchContent_MakeAvailable(boost-rx)
 		endif()
@@ -194,8 +185,8 @@ set(THREADS_PREFER_PTHREAD_FLAG)
 find_package(Threads)

 if(MSVC)
-	# Avoid linking the shared library of zlib Search ZLIB_ROOT first if it is
-	# set.
+	# Avoid linking the shared library of zlib
+	# Search ZLIB_ROOT first if it is set.
 	if(ZLIB_ROOT)
 		set(_ZLIB_SEARCH_ROOT PATHS ${ZLIB_ROOT} NO_DEFAULT_PATH)
 		list(APPEND _ZLIB_SEARCHES _ZLIB_SEARCH_ROOT)
@@ -205,7 +196,8 @@ if(MSVC)
 	set(_ZLIB_x86 "(x86)")
 	set(_ZLIB_SEARCH_NORMAL
 		PATHS "[HKEY_LOCAL_MACHINE\\SOFTWARE\\GnuWin32\\Zlib;InstallPath]"
-		"$ENV{ProgramFiles}/zlib" "$ENV{ProgramFiles${_ZLIB_x86}}/zlib")
+		"$ENV{ProgramFiles}/zlib"
+		"$ENV{ProgramFiles${_ZLIB_x86}}/zlib")
 	unset(_ZLIB_x86)
 	list(APPEND _ZLIB_SEARCHES _ZLIB_SEARCH_NORMAL)

@@ -214,37 +206,26 @@ if(MSVC)
 	endif()

 	foreach(search ${_ZLIB_SEARCHES})
-		find_library(
-			ZLIB_LIBRARY
-			NAMES zlibstatic NAMES_PER_DIR ${${search}}
-			PATH_SUFFIXES lib)
+		find_library(ZLIB_LIBRARY NAMES zlibstatic NAMES_PER_DIR ${${search}} PATH_SUFFIXES lib)
 	endforeach()
 endif()

 find_package(ZLIB REQUIRED)

-# Using Eigen3 is a bit of a thing. We don't want to build it completely since
-# we only need a couple of header files. Nothing special. But often, eigen3 is
-# already installed and then we prefer that.
-find_package(Eigen3 3.4 QUIET)
+find_package(Eigen3 QUIET)

-if(Eigen3_FOUND AND TARGET Eigen3::Eigen)
-	get_target_property(EIGEN_INCLUDE_DIR Eigen3::Eigen
-		INTERFACE_INCLUDE_DIRECTORIES)
+if(Eigen3_FOUND)
+	get_target_property(EIGEN_INCLUDE_DIR Eigen3::Eigen INTERFACE_INCLUDE_DIRECTORIES)
 else()
-	# Create a private copy of eigen3 and populate it only, no need to build
-	FetchContent_Declare(
-		my-eigen3
+	ExternalProject_Add(
+		local_Eigen3
 		GIT_REPOSITORY https://gitlab.com/libeigen/eigen.git
-		GIT_TAG 3.4.0)
+		GIT_TAG 3.4.0
+		CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${CMAKE_CURRENT_BINARY_DIR}/external
+	)

-	FetchContent_GetProperties(my-eigen3)
-
-	if(NOT my-eigen3_POPULATED)
-		FetchContent_Populate(my-eigen3)
-	endif()
-
-	set(EIGEN_INCLUDE_DIR ${my-eigen3_SOURCE_DIR})
+	set(EIGEN3_D local_Eigen3)
+	set(EIGEN_INCLUDE_DIR ${CMAKE_CURRENT_BINARY_DIR}/external/include/eigen3)
 endif()

 include(FindFilesystem)
@@ -260,20 +241,17 @@ write_version_header(${PROJECT_SOURCE_DIR}/src/ LIB_NAME "LibCIFPP")
 # SymOp data table
 if(CIFPP_RECREATE_SYMOP_DATA)
 	# The tool to create the table
-	add_executable(symop-map-generator
-		"${PROJECT_SOURCE_DIR}/src/symop-map-generator.cpp")
+	add_executable(symop-map-generator "${PROJECT_SOURCE_DIR}/src/symop-map-generator.cpp")

 	add_custom_command(
 		OUTPUT ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
-		COMMAND
-		$<TARGET_FILE:symop-map-generator> $ENV{CLIBD}/syminfo.lib
-		$ENV{CLIBD}/symop.lib ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp)
+		COMMAND $<TARGET_FILE:symop-map-generator> $ENV{CLIBD}/syminfo.lib $ENV{CLIBD}/symop.lib ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
+	)

 	add_custom_target(
-		OUTPUT
-		${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
-		DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib"
-		"$ENV{CLIBD}/symop.lib")
+		OUTPUT ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp
+		DEPENDS symop-map-generator "$ENV{CLIBD}/syminfo.lib" "$ENV{CLIBD}/symop.lib"
+	)
 endif()

 # Sources
@@ -289,18 +267,19 @@ set(project_sources
 	${PROJECT_SOURCE_DIR}/src/validate.cpp
 	${PROJECT_SOURCE_DIR}/src/text.cpp
 	${PROJECT_SOURCE_DIR}/src/utilities.cpp
+
 	${PROJECT_SOURCE_DIR}/src/atom_type.cpp
 	${PROJECT_SOURCE_DIR}/src/compound.cpp
 	${PROJECT_SOURCE_DIR}/src/point.cpp
 	${PROJECT_SOURCE_DIR}/src/symmetry.cpp
+
 	${PROJECT_SOURCE_DIR}/src/model.cpp
+
 	${PROJECT_SOURCE_DIR}/src/pdb/cif2pdb.cpp
 	${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif.cpp
 	${PROJECT_SOURCE_DIR}/src/pdb/pdb_record.hpp
 	${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.hpp
 	${PROJECT_SOURCE_DIR}/src/pdb/pdb2cif_remark_3.cpp
-	${PROJECT_SOURCE_DIR}/src/pdb/reconstruct.cpp
-	${PROJECT_SOURCE_DIR}/src/pdb/validate-pdbx.cpp
 )

 set(project_headers
@@ -317,32 +296,33 @@ set(project_headers
 	${PROJECT_SOURCE_DIR}/include/cif++/condition.hpp
 	${PROJECT_SOURCE_DIR}/include/cif++/category.hpp
 	${PROJECT_SOURCE_DIR}/include/cif++/row.hpp
+
 	${PROJECT_SOURCE_DIR}/include/cif++/atom_type.hpp
 	${PROJECT_SOURCE_DIR}/include/cif++/compound.hpp
 	${PROJECT_SOURCE_DIR}/include/cif++/point.hpp
 	${PROJECT_SOURCE_DIR}/include/cif++/symmetry.hpp
+
 	${PROJECT_SOURCE_DIR}/include/cif++/model.hpp
+
 	${PROJECT_SOURCE_DIR}/include/cif++/pdb.hpp
+
 	${PROJECT_SOURCE_DIR}/include/cif++/pdb/cif2pdb.hpp
 	${PROJECT_SOURCE_DIR}/include/cif++/pdb/io.hpp
 	${PROJECT_SOURCE_DIR}/include/cif++/pdb/pdb2cif.hpp
-	${PROJECT_SOURCE_DIR}/include/cif++/pdb/tls.hpp)
+	${PROJECT_SOURCE_DIR}/include/cif++/pdb/tls.hpp
+)

-add_library(cifpp ${project_sources} ${project_headers}
-	${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp)
+add_library(cifpp ${project_sources} ${project_headers} ${PROJECT_SOURCE_DIR}/src/symop_table_data.hpp)
 add_library(cifpp::cifpp ALIAS cifpp)

 set(CMAKE_DEBUG_POSTFIX d)
 set_target_properties(cifpp PROPERTIES DEBUG_POSTFIX "d")

-generate_export_header(cifpp EXPORT_FILE_NAME
-	${PROJECT_SOURCE_DIR}/include/cif++/exports.hpp)
+generate_export_header(cifpp EXPORT_FILE_NAME ${PROJECT_SOURCE_DIR}/include/cif++/exports.hpp)

 if(BOOST_REGEX)
-	target_compile_definitions(cifpp PRIVATE USE_BOOST_REGEX=1
-		BOOST_REGEX_STANDALONE=1)
-	get_target_property(BOOST_REGEX_INCLUDE_DIR Boost::regex
-		INTERFACE_INCLUDE_DIRECTORIES)
+	target_compile_definitions(cifpp PRIVATE USE_BOOST_REGEX=1 BOOST_REGEX_STANDALONE=1)
+	get_target_property(BOOST_REGEX_INCLUDE_DIR Boost::regex INTERFACE_INCLUDE_DIRECTORIES)
 endif()

 if(MSVC)
@@ -351,14 +331,20 @@ endif()

 set_target_properties(cifpp PROPERTIES POSITION_INDEPENDENT_CODE ON)

-target_include_directories(
-	cifpp
-	PUBLIC "$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>"
+target_include_directories(cifpp
+	PUBLIC
+	"$<BUILD_INTERFACE:${PROJECT_SOURCE_DIR}/include>"
 	"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
-	PRIVATE "${BOOST_REGEX_INCLUDE_DIR}" "${EIGEN_INCLUDE_DIR}")
+	PRIVATE
+	"${EIGEN_INCLUDE_DIR}"
+	"${BOOST_REGEX_INCLUDE_DIR}"
+)

-target_link_libraries(cifpp PUBLIC Threads::Threads ZLIB::ZLIB
-	${CIFPP_REQUIRED_LIBRARIES})
+target_link_libraries(cifpp PUBLIC Threads::Threads ZLIB::ZLIB ${CIFPP_REQUIRED_LIBRARIES})
+
+if(${EIGEN3_D})
+	add_dependencies(cifpp ${EIGEN3_D})
+endif()

 if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
 	target_link_options(cifpp PRIVATE -undefined dynamic_lookup)
@@ -366,7 +352,7 @@ endif(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")

 if(CIFPP_DOWNLOAD_CCD)
 	# download the components.cif file from CCD
-	set(COMPONENTS_CIF ${PROJECT_SOURCE_DIR}/rsrc/components.cif)
+	set(COMPONENTS_CIF ${PROJECT_SOURCE_DIR}/data/components.cif)

 	if(EXISTS ${COMPONENTS_CIF})
 		file(SIZE ${COMPONENTS_CIF} CCD_FILE_SIZE)
@@ -378,88 +364,70 @@ if(CIFPP_DOWNLOAD_CCD)
 	endif()

 	if(NOT EXISTS ${COMPONENTS_CIF})
-		# Since the file(DOWNLOAD) command in cmake does not use compression, we try
-		# to download the gzipped version and decompress it ourselves.
+		if(NOT EXISTS ${PROJECT_SOURCE_DIR}/data)
+			file(MAKE_DIRECTORY ${PROJECT_SOURCE_DIR}/data/)
+		endif()
+
+		# Since the file(DOWNLOAD) command in cmake does not use
+		# compression, we try to download the gzipped version and
+		# decompress it ourselves.
 		find_program(GUNZIP gunzip)

-		if(WIN32 OR GUNZIP STREQUAL "GUNZIP-NOTFOUND")
-			file(
-				DOWNLOAD https://files.wwpdb.org/pub/pdb/data/monomers/components.cif
-				${COMPONENTS_CIF}
-				SHOW_PROGRESS
-				STATUS CCD_FETCH_STATUS)
-		else()
+		if(GUNZIP)
 			if(NOT EXISTS "${COMPONENTS_CIF}.gz")
-				file(
-					DOWNLOAD
-					https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz
-					${COMPONENTS_CIF}.gz
-					SHOW_PROGRESS
-					STATUS CCD_FETCH_STATUS)
+				file(DOWNLOAD https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz ${COMPONENTS_CIF}.gz
+					SHOW_PROGRESS STATUS CCD_FETCH_STATUS)
 			endif()

-			add_custom_command(
-				OUTPUT ${COMPONENTS_CIF}
-				COMMAND "${GUNZIP}" ${COMPONENTS_CIF}.gz
-				WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/rsrc/)
-
-			add_custom_target(COMPONENTS ALL DEPENDS ${COMPONENTS_CIF})
+			add_custom_command(OUTPUT ${COMPONENTS_CIF}
+				COMMAND ${GUNZIP} ${COMPONENTS_CIF}.gz
+				WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/data/)
+		else()
+			file(DOWNLOAD https://files.wwpdb.org/pub/pdb/data/monomers/components.cif ${COMPONENTS_CIF}
+				SHOW_PROGRESS STATUS CCD_FETCH_STATUS)
 		endif()

 		# Do not continue if downloading went wrong
 		list(POP_FRONT CCD_FETCH_STATUS CCD_FETCH_STATUS_CODE)

 		if(CCD_FETCH_STATUS_CODE)
-			message(
-				FATAL_ERROR "Error trying to download CCD file: ${CCD_FETCH_STATUS}")
+			message(FATAL_ERROR "Error trying to download CCD file: ${CCD_FETCH_STATUS}")
 		endif()
 	endif()
+
+	add_custom_target(COMPONENTS ALL DEPENDS ${COMPONENTS_CIF})
 endif()

 # Installation directories
 if(BUILD_FOR_CCP4)
-	set(CIFPP_DATA_DIR
-		"$ENV{CCP4}/share/libcifpp"
-		CACHE PATH "Directory where dictionary and other static data is stored")
+	set(CIFPP_DATA_DIR "$ENV{CCP4}/share/libcifpp" CACHE PATH "Directory where dictionary and other static data is stored")
 else()
-	set(CIFPP_DATA_DIR
-		"${CMAKE_INSTALL_FULL_DATADIR}/libcifpp"
-		CACHE PATH "Directory where dictionary and other static data is stored")
+	set(CIFPP_DATA_DIR "${CMAKE_INSTALL_FULL_DATADIR}/libcifpp" CACHE PATH "Directory where dictionary and other static data is stored")
 endif()

-if(CIFPP_DATA_DIR)
-	target_compile_definitions(cifpp PUBLIC DATA_DIR="${CIFPP_DATA_DIR}")
-endif()
+target_compile_definitions(cifpp PUBLIC DATA_DIR="${CIFPP_DATA_DIR}")

 if(UNIX AND NOT BUILD_FOR_CCP4)
 	if("${CMAKE_INSTALL_PREFIX}" STREQUAL "/usr/local")
-		set(CIFPP_CACHE_DIR
-			"/var/cache/libcifpp"
-			CACHE PATH "The directory where downloaded data files are stored")
+		set(CIFPP_CACHE_DIR "/var/cache/libcifpp" CACHE PATH "The directory where downloaded data files are stored")
 	else()
-		set(CIFPP_CACHE_DIR
-			"${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/cache/libcifpp"
-			CACHE PATH "The directory where downloaded data files are stored")
+		set(CIFPP_CACHE_DIR "${CMAKE_INSTALL_FULL_LOCALSTATEDIR}/cache/libcifpp" CACHE PATH "The directory where downloaded data files are stored")
 	endif()

 	target_compile_definitions(cifpp PUBLIC CACHE_DIR="${CIFPP_CACHE_DIR}")

-	set(CIFPP_ETC_DIR
-		"${CMAKE_INSTALL_FULL_SYSCONFDIR}"
-		CACHE PATH "The directory where the update configuration file is stored")
+	set(CIFPP_ETC_DIR "${CMAKE_INSTALL_FULL_SYSCONFDIR}" CACHE PATH "The directory where the update configuration file is stored")
 else()
 	unset(CIFPP_CACHE_DIR)
 endif()

 # Install rules
-install(
-	TARGETS cifpp
+install(TARGETS cifpp
 	EXPORT cifpp-targets
 	ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
 	LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
 	RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
-	INCLUDES
-	DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
+	INCLUDES DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})

 if(MSVC AND BUILD_SHARED_LIBS)
 	install(
@@ -474,89 +442,91 @@ file(GLOB OLD_CONFIG_FILES
 	${CMAKE_INSTALL_FULL_LIBDIR}/cmake/cifpp/cifppTargets*.cmake)

 if(OLD_CONFIG_FILES)
-	message(
-		STATUS "Installation will remove old config files: ${OLD_CONFIG_FILES}")
+	message(STATUS "Installation will remove old config files: ${OLD_CONFIG_FILES}")
 	install(CODE "file(REMOVE ${OLD_CONFIG_FILES})")
 endif()

-install(
-	EXPORT cifpp-targets
+install(EXPORT cifpp-targets
 	FILE "cifpp-targets.cmake"
 	NAMESPACE cifpp::
-	DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp)
+	DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
+)

 install(
 	DIRECTORY include/cif++
 	DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
-	COMPONENT Devel)
+	COMPONENT Devel
+)

 install(
 	FILES include/cif++.hpp
 	DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}
-	COMPONENT Devel)
+	COMPONENT Devel
+)

-if(CIFPP_DATA_DIR)
-	install(
-		FILES ${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
+if(WRITE_DATA_FILES)
+	install(FILES
+		${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
 		${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
-		${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic ${COMPONENTS_CIF}
-		DESTINATION ${CIFPP_DATA_DIR})
-endif()
+		${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic
+		${COMPONENTS_CIF}
+		DESTINATION ${CIFPP_DATA_DIR}
+	)

-if(CIFPP_CACHE_DIR)
-	install(
-		FILES ${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
-		${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
-		${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic ${COMPONENTS_CIF}
-		DESTINATION ${CIFPP_CACHE_DIR})
+	if(CIFPP_CACHE_DIR)
+		install(FILES
+			${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
+			${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
+			${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic
+			${COMPONENTS_CIF}
+			DESTINATION ${CIFPP_CACHE_DIR}
+		)
+	endif()
 endif()

 set(CONFIG_TEMPLATE_FILE ${PROJECT_SOURCE_DIR}/cmake/cifpp-config.cmake.in)

 configure_package_config_file(
-	${CONFIG_TEMPLATE_FILE} ${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake
+	${CONFIG_TEMPLATE_FILE}
+	${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake
 	INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
-	PATH_VARS CIFPP_DATA_DIR)
+	PATH_VARS CIFPP_DATA_DIR
+)

-install(
-	FILES "${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake"
+install(FILES
+	"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake"
 	"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config-version.cmake"
 	DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/cifpp
-	COMPONENT Devel)
+	COMPONENT Devel
+)

-set_target_properties(
-	cifpp
-	PROPERTIES VERSION ${PROJECT_VERSION}
+set_target_properties(cifpp PROPERTIES
+	VERSION ${PROJECT_VERSION}
 	SOVERSION "${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}"
 	INTERFACE_cifpp_MAJOR_VERSION ${PROJECT_VERSION_MAJOR})

-set_property(
-	TARGET cifpp
-	APPEND
-	PROPERTY COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION)
+set_property(TARGET cifpp APPEND PROPERTY
+	COMPATIBLE_INTERFACE_STRING cifpp_MAJOR_VERSION
+)

 write_basic_package_version_file(
 	"${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config-version.cmake"
 	VERSION ${PROJECT_VERSION}
-	COMPATIBILITY AnyNewerVersion)
-
-# In case we're included as sub_directory:
-if(NOT PROJECT_IS_TOP_LEVEL)
-	set(CIFPP_SHARE_DIR ${PROJECT_SOURCE_DIR}/rsrc PARENT_SCOPE)
-endif()
+	COMPATIBILITY AnyNewerVersion
+)

 if(BUILD_TESTING)
+
 	# We're using the older version 2 of Catch2
 	FetchContent_Declare(
 		Catch2
 		GIT_REPOSITORY https://github.com/catchorg/Catch2.git
-		GIT_TAG v2.13.9)
+		GIT_TAG v2.13.9
+	)

 	FetchContent_MakeAvailable(Catch2)

-	list(
-		APPEND
-		CIFPP_tests
+	list(APPEND CIFPP_tests
 		unit-v2
 		unit-3d
 		format
@@ -564,82 +534,63 @@ if(BUILD_TESTING)
 		rename-compound
 		sugar
 		spinner
-		validate-pdbx)
+	)

 	foreach(CIFPP_TEST IN LISTS CIFPP_tests)
 		set(CIFPP_TEST "${CIFPP_TEST}-test")
 		set(CIFPP_TEST_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/test/${CIFPP_TEST}.cpp")

-		add_executable(
-			${CIFPP_TEST} ${CIFPP_TEST_SOURCE}
-			"${CMAKE_CURRENT_SOURCE_DIR}/test/test-main.cpp")
+		add_executable(${CIFPP_TEST} ${CIFPP_TEST_SOURCE} "${CMAKE_CURRENT_SOURCE_DIR}/test/test-main.cpp")

-		target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp::cifpp
-			Catch2::Catch2)
-		target_include_directories(${CIFPP_TEST} PRIVATE "${EIGEN_INCLUDE_DIR}")
+		target_link_libraries(${CIFPP_TEST} PRIVATE Threads::Threads cifpp::cifpp Catch2::Catch2)
+		target_include_directories(${CIFPP_TEST} PRIVATE ${EIGEN_INCLUDE_DIR})

 		if(MSVC)
 			# Specify unwind semantics so that MSVC knowns how to handle exceptions
 			target_compile_options(${CIFPP_TEST} PRIVATE /EHsc)
 		endif()

-		add_custom_target(
-			"run-${CIFPP_TEST}"
-			DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch ${CIFPP_TEST})
+		add_custom_target("run-${CIFPP_TEST}" DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch ${CIFPP_TEST})

 		add_custom_command(
 			OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch
-			COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir
-			${CMAKE_CURRENT_SOURCE_DIR}/test)
+			COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir ${CMAKE_CURRENT_SOURCE_DIR}/test)

-		add_test(NAME ${CIFPP_TEST} COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir
-			${CMAKE_CURRENT_SOURCE_DIR}/test)
+		add_test(NAME ${CIFPP_TEST}
+			COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir ${CMAKE_CURRENT_SOURCE_DIR}/test)
 	endforeach()
 endif()

 # Optionally install the update scripts for CCD and dictionary files
 if(CIFPP_INSTALL_UPDATE_SCRIPT)
-	if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAME} STREQUAL
-		"GNU")
+	if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux" OR ${CMAKE_SYSTEM_NAME} STREQUAL "GNU")
 		if("${CMAKE_INSTALL_PREFIX}" STREQUAL "/usr/local")
-			set(CIFPP_CRON_DIR
-				"/etc/cron.weekly"
-				CACHE PATH "The cron directory, for the update script")
+			set(CIFPP_CRON_DIR "/etc/cron.weekly" CACHE PATH "The cron directory, for the update script")
 		else()
-			set(CIFPP_CRON_DIR
-				"${CIFPP_ETC_DIR}/cron.weekly"
-				CACHE PATH "The cron directory, for the update script")
+			set(CIFPP_CRON_DIR "${CIFPP_ETC_DIR}/cron.weekly" CACHE PATH "The cron directory, for the update script")
 		endif()
-	elseif(${CMAKE_SYSTEM_NAME} STREQUAL "FreeBSD")
-		set(CIFPP_CRON_DIR
-			"${CIFPP_ETC_DIR}/periodic/weekly"
-			CACHE PATH "The cron directory, for the update script")
+	elseif(UNIX) # assume all others are like FreeBSD...
+		set(CIFPP_CRON_DIR "${CIFPP_ETC_DIR}/periodic/weekly" CACHE PATH "The cron directory, for the update script")
 	else()
 		message(FATAL_ERROR "Don't know where to install the update script")
 	endif()

-	configure_file(${PROJECT_SOURCE_DIR}/tools/update-libcifpp-data.in
-		update-libcifpp-data @ONLY)
+	configure_file(${PROJECT_SOURCE_DIR}/tools/update-libcifpp-data.in update-libcifpp-data @ONLY)
 	install(
 		FILES ${CMAKE_CURRENT_BINARY_DIR}/update-libcifpp-data
 		DESTINATION ${CIFPP_CRON_DIR}
-		PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE
-		WORLD_READ)
+		PERMISSIONS OWNER_EXECUTE OWNER_READ GROUP_EXECUTE GROUP_READ WORLD_EXECUTE WORLD_READ
+	)

 	install(DIRECTORY DESTINATION ${CIFPP_CACHE_DIR})

 	# a config file, to make it complete
 	if(NOT EXISTS "${CIFPP_ETC_DIR}/libcifpp.conf")
-		file(
-			WRITE ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf
-			[[# Uncomment the next line to enable automatic updates
+		file(WRITE ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf [[# Uncomment the next line to enable automatic updates
 # update=true
 ]])
-		install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf
-			DESTINATION "${CIFPP_ETC_DIR}")
-		install(
-			CODE "message(\"A configuration file has been written to ${CIFPP_ETC_DIR}/libcifpp.conf, please edit this file to enable automatic updates\")"
-		)
+		install(FILES ${CMAKE_CURRENT_BINARY_DIR}/libcifpp.conf DESTINATION "${CIFPP_ETC_DIR}")
+		install(CODE "message(\"A configuration file has been written to ${CIFPP_ETC_DIR}/libcifpp.conf, please edit this file to enable automatic updates\")")

 		install(DIRECTORY DESTINATION "${CIFPP_ETC_DIR}/libcifpp/cache-update.d")
 	endif()
@@ -656,7 +607,7 @@ set(CPACK_SOURCE_TGZ ON)
 set(CPACK_SOURCE_TBZ2 OFF)
 set(CPACK_SOURCE_TXZ OFF)
 set(CPACK_SOURCE_TZ OFF)
-set(CPACK_SOURCE_IGNORE_FILES "/rsrc/components.cif;/build;/.vscode;/.git")
+set(CPACK_SOURCE_IGNORE_FILES "/data/components.cif;/build;/.vscode;/.git")
 set(CPACK_PACKAGE_FILE_NAME "${PROJECT_NAME}-${PROJECT_VERSION}")
 set(CPACK_SOURCE_PACKAGE_FILE_NAME ${CPACK_PACKAGE_FILE_NAME})
 include(CPack)
--- a/README.md
+++ b/README.md
@@ -1,34 +1,10 @@
-[![github CI](https://github.com/pdb-redo/libcifpp/actions/workflows/cmake-multi-platform.yml/badge.svg)](https://github.com/pdb-redo/libcifpp/actions)
-[![GitHub License](https://img.shields.io/github/license/pdb-redo/libcifpp)](https://github.com/pdb-redo/libcifpp/LICENSE)
-
 # libcifpp

-As the name implies, this library was originally written to work with mmCIF files
-using C++ as programming language. The design of this library leanes heavily on
-the structure of CIF files. These files can be thought of as a text dump of a
-relational databank with, often but not always, a very strict schema describing
-the data. These schema's are called dictionaries.
-
-Using information from the content of a mmCIF file and an optional schema,
-libcifpp allows you to access the data in the file as a collection of datablock
-each containing a collection of categories with rows of data. The categories can
-be searched for data using queries written in regular C++ syntax. When a dictionary
-was specified, inserted data is checked for validity. Likewise removal of data
-may result in cascaded removal of linked data in other categories using
-parent/child relationship information.
-
-Since there were still many programs using the legacy PDB format at the time
-development started, a layer was added that converts data to and from PDB format
-into mmCIF format. This means you can manipulate PDB files as if they were
-normal mmCIF files.
-
-Apart from this basic functionality, libcifpp also offers code to help with
-symmetry calculations, 3d manipulations and obtaining information from the CCD
-[Chemical Component Dictionary](https://www.wwpdb.org/data/ccd).
+This library contains code to work with mmCIF and legacy PDB files.

 ## Documentation

-The documentation can be found at [github.io](https://pdb-redo.github.io/libcifpp/)
+The documentation can be found at https://www.hekkelman.com/libcifpp-doc/

 ## Synopsis

@@ -88,138 +64,54 @@ int main(int argc, char *argv[])
 You might be able to use libcifpp from a package manager used by your
 OS distribution. But most likely this package will be out-of-date.
 Therefore it is recommended to build *libcifpp* from code. It is not
-hard to do. But it is recommended to read the following instructions
-carefully.
+hard to do.

 ### Requirements

 The code for this library was written in C++17. You therefore need a
-recent compiler to build it. For the development gcc >= 9.4 and clang >= 9.0
+recent compiler to build it. For the development gcc 9.4 and clang 9.0
 have been used as well as MSVC version 2019.

-The other requirement you really need to have installed on your computer
-is a version of [CMake](https://cmake.org). For now the minimum version
-is 3.16 but that may soon change into a higher version. You should also
-install the gui version of CMake to set build options easily, on Debian
-I prefer to use the curses version installed with `cmake-curses-gui`.
-
-It is very useful to have [mrc](https://github.com/mhekkel/mrc) available.
-However, this is only an option if you use Windows or an operating system
-using the ELF executable format (i.e. Linux or FreeBSD). MRC is a resource
-compiler that allows including data files into the executable making them
-easier to install.
-
-Other libraries you might want to install beforehand are:
+Other requirements are:

+- [cmake](https://cmake.org) A build tool.
+- [mrc](https://github.com/mhekkel/mrc), a resource compiler that
+  allows including data files into the executable making them easier to
+  install. Strictly speaking this is optional, but at the expense of
+  functionality.
 - [libeigen](https://eigen.tuxfamily.org/index.php?title=Main_Page), a
  library to do amongst others matrix calculations. This usually can be
  installed using your package manager, in Debian/Ubuntu it is called
  `libeigen3-dev`
 - [zlib](https://github.com/madler/zlib), the development version of this
  library. On Debian/Ubuntu this is the package `zlib1g-dev`.
- [boost](https://www.boost.org), in Debian/Ubuntu this is `libboost-dev`.
-  
-  The Boost libraries are only needed in case you are using GCC due to a long
-  standing bug in GNU's implementation of std::regex. It simply crashes
-  on the regular expressions used in the mmcif_pdbx dictionary and so
-  we use the boost regex implementation instead.
+- [boost](https://www.boost.org).
+
+The Boost libraries are only needed in case you want to build the test
+code or if you are using GCC. That last condition is due to a long
+standing bug in the implementation of std::regex. It simply crashes
+on the regular expressions used in the mmcif_pdbx dictionary and so
+we use the boost regex implementation instead.

 ### Building

-First you need to download the code:
+Building the code is as simple as typing:

 ```console
- git clone https://github.com/PDB-REDO/libcifpp.git
+ git clone https://github.com/PDB-REDO/libcifpp.git --recurse-submodules
 cd libcifpp
+ cmake -S . -B build -DCMAKE_INSTALL_PREFIX=$HOME/.local -DCMAKE_BUILD_TYPE=Release
+ cmake --build build
+ cmake --install build
 ```

-You should start by considering where to install libcifpp. If you have
-sufficient permissions on your computer you perhaps should use the
-default but libcifpp can be configured to be installed anywhere
-including e.g. *$HOME/.local*.
-
-Next step is to configure, for this use the CMake gui application. If you
-installed the curses version of cmake you can type `ccmake`. On Windows
-you can use `cmake-gui.exe`.
-
-To install in the default location:
-
-```console
-ccmake -S . -B build
-```
-
-To install elsewhere, e.g. *$HOME/.local*:
-
-```console
-ccmake -S . -B build -DCMAKE_INSTALL_PREFIX=$HOME/.local
-```
-
-In the cmake window, start the configure command (use button or press 'c').
-After the first configure step you will see a list of settable options.
-Alter these to match your preferences. Most options are self explaining
-and contain a description. Some may need a bit more explanation:
-
- CIFPP_DATA_DIR, this directory will be used to store initial versions
-  of the mmcif_pdbx dictionary as well as the optional CCD file.
-
- CIFPP_DOWNLOAD_CCD
-
-  The CCD file is huge and perhaps you think you don't
-  need it. In that case you can leave this OFF. But that will limit the
-  use cases.
-
- CIFPP_INSTALL_UPDATE_SCRIPT
-  
-  The files in CIFPP_DATA_DIR are quickly becoming out of date. On
-  FreeBSD and Linux you can install a script that updates these files
-  on a weekly basis.
-
- CIFPP_CRON_DIR
-  
-  The directory where the update script is to be installed.
-
- CIFPP_ETC_DIR
-  
-  The update script will only work if the file called *libcifpp.conf*
-  in this *etc* directory will contain an uncommented line with
-
-```console
-update=true
-```
-
- CIFPP_CACHE_DIR
-
-  When you installed and enabled the update script, new files are
-  written to this directory.
-
- CIFPP_RECREATE_SYMOP_DATA
-  
-  If you had CCP4 sourced into your environment, this option allows
-  you to recreate the symop data file.
-
- BUILD_FOR_CCP4
-  
-  Build a special version of libcifpp to be installed in the CCP4
-  environment.
-
-After setting these options you can run the configure step again and
-then use generate to create the makefiles.
-
-Building and installing is then as simple as:
-
-```console
-cmake --build build
-cmake --install build
-```
-
-If this fails due to lack of permissions, you can try:
-
-```console
-sudo cmake --install build
-```
+This checks out the source code from github, creates a new directory
+where cmake stores its files. Run a configure, build the code and then
+it installs the library and auxiliary files.

 Tests are created by default, and to test the code you can run:

 ```console
-ctest --test-dir build
+ cmake --build build
+ ctest --test-dir build
 ```
--- a/8
+++ b/8
@@ -1,11 +1,3 @@
-Version 6.1.0
- Add formula weight to entity in pdb2cif
- Change order of categories inside a datablock to match order in file
- Change default order to write out categories in a file based on
-  parent/child relationship
- Added validate_pdbx and recover_pdbx
- Fixed a serious bug in category_index when moving categories
-
 Version 6.0.0
 - Drop the use of CCP4's monomer library for compound information

--- a/cmake/cifpp-config.cmake.in
+++ b/cmake/cifpp-config.cmake.in
@@ -2,7 +2,7 @@

 include("${CMAKE_CURRENT_LIST_DIR}/cifpp-targets.cmake")

-set_and_check(CIFPP_SHARE_DIR "@PACKAGE_CIFPP_DATA_DIR@")
+# set_and_check(CIFPP_SHARE_DIR "@PACKAGE_CIFPP_DATA_DIR@")

 include(CMakeFindDependencyMacro)

--- a/data/ccd-subset.cif
+++ b/data/ccd-subset.cif
--- a/docs/resources.rst
+++ b/docs/resources.rst
@@ -18,7 +18,7 @@ Loading Resources

 No matter where the resource is located, you should always use the single libcifpp API call :cpp:func:`cif::load_resource` to load them. This function returns a *std::istream* wrapped inside a *std::unique_ptr*. 

-The order in which resources are searched for is:
+The order in which resources are search for is:

 * Use the resource that was defined by calling :cpp:func:`cif::add_file_resource`
  for this name.
--- a/include/cif++/compound.hpp
+++ b/include/cif++/compound.hpp
@@ -166,22 +166,17 @@ class compound
 		return m_id == "HOH" or m_id == "H2O" or m_id == "WAT";
 	}

-	char one_letter_code() const { return m_one_letter_code; }; ///< Return the one letter code to use in a canonical sequence. If unknown the value '\0' is returned
-	std::string parent_id() const { return m_parent_id; };      ///< Return the parent id code in case a parent is specified (e.g. MET for MSE)
-
  private:
 	friend class compound_factory_impl;
-	friend class local_compound_factory_impl;

 	compound(cif::datablock &db);
-	compound(cif::datablock &db, int);
+	compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group);

 	std::string m_id;
 	std::string m_name;
 	std::string m_type;
+	std::string m_group;
 	std::string m_formula;
-	char m_one_letter_code = 0;
-	std::string m_parent_id;
 	float m_formula_weight = 0;
 	int m_formal_charge = 0;
 	std::vector<compound_atom> m_atoms;
@@ -219,20 +214,6 @@ class compound_factory
 	/// Override any previously loaded dictionary with @a inDictFile
 	void push_dictionary(const std::filesystem::path &inDictFile);

-	/** @brief Override any previously loaded dictionary with the data in @a file
-	 *
-	 * @note experimental feature
-	 *
-	 * Load the file @a file as a source for compound information. This may
-	 * be e.g. a regular mmCIF file with extra files containing compound
-	 * information.
-	 *
-	 * Be carefull to remove the block again, best use @ref cif::compound_source
-	 * as a stack based object.
-	 */
-
-	void push_dictionary(const file &file);
-
 	/// Remove the last pushed dictionary
 	void pop_dictionary();

@@ -270,35 +251,4 @@ class compound_factory
 	std::shared_ptr<compound_factory_impl> m_impl;
 };

-// --------------------------------------------------------------------
-
-/**
- * @brief Stack based source for compound info.
- *
- * Use this class to temporarily add a compound source to the
- * compound_factory.
- *
- * @code{.cpp}
- * cif::file f("1cbs-with-custom-rea.cif");
- * cif::compound_source cs(f);
- *
- * auto &cf = cif::compound_factory::instance();
- * auto rea_compound = cf.create("REA");
- * @endcode
- */
-
-class compound_source
-{
-  public:
-	compound_source(const cif::file &file)
-	{
-		cif::compound_factory::instance().push_dictionary(file);
-	}
-
-	~compound_source()
-	{
-		cif::compound_factory::instance().pop_dictionary();
-	}
-};
-
 } // namespace cif
--- a/include/cif++/model.hpp
+++ b/include/cif++/model.hpp
@@ -1115,4 +1115,11 @@ class structure
 	std::vector<residue> m_non_polymers;
 };

+// --------------------------------------------------------------------
+
+/// \brief Reconstruct all missing categories for an assumed PDBx file.
+/// Some people believe that simply dumping some atom records is enough.
+/// \param db The cif::datablock that hopefully contains some valid data
+void reconstruct_pdbx(datablock &db);
+
 } // namespace cif::mm
--- a/include/cif++/pdb.hpp
+++ b/include/cif++/pdb.hpp
@@ -1,17 +1,17 @@
 /*-
 * SPDX-License-Identifier: BSD-2-Clause
- *
+ * 
 * Copyright (c) 2023 NKI/AVL, Netherlands Cancer Institute
- *
+ * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
- *
+ * 
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
- *
+ * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -30,13 +30,13 @@

 /**
 * @file pdb.hpp
- *
+ * 
 * This file presents the API to read and write files in the
 * legacy and ancient PDB format.
- *
+ * 
 * The code works on the basis of best effort since it is
 * impossible to have correct round trip fidelity.
- *
+ * 
 */

 namespace cif::pdb
@@ -81,7 +81,7 @@ inline void write(std::ostream &os, const file &f)
 /** @brief Write out the data in @a db to file @a file
 * in legacy PDB format or mmCIF format, depending on the
 * filename extension.
- *
+ * 
 * If extension of @a file is *.gz* the resulting file will
 * be written in gzip compressed format.
 */
@@ -90,7 +90,7 @@ void write(const std::filesystem::path &file, const datablock &db);
 /** @brief Write out the data in @a f to file @a file
 * in legacy PDB format or mmCIF format, depending on the
 * filename extension.
- *
+ * 
 * If extension of @a file is *.gz* the resulting file will
 * be written in gzip compressed format.
 */
@@ -99,34 +99,6 @@ inline void write(const std::filesystem::path &p, const file &f)
 	write(p, f.front());
 }

-// --------------------------------------------------------------------
-
-/** \brief Reconstruct all missing categories for an assumed PDBx file.
- *
- * Some people believe that simply dumping some atom records is enough.
- *
- * \param file The cif::file that hopefully contains some valid data
- * \param dictionary The mmcif dictionary to use
- */
-
-void reconstruct_pdbx(file &pdbx_file, std::string_view dictionary = "mmcif_pdbx");
-
-/** \brief This is an extension to cif::validator, use the logic in common
- * PDBx files to see if the file is internally consistent.
- *
- * This function for now checks if the following categories are consistent:
- *
- * atom_site -> pdbx_poly_seq_scheme -> entity_poly_seq -> entity_poly -> entity
- *
- * Use the common \ref cif::VERBOSE flag to turn on diagnostic messages.
- *
- * \param file The input file
- * \param dictionary The mmcif dictionary to use
- * \result Returns true if the file was valid and consistent
- */
-
-bool is_valid_pdbx_file(const file &pdbx_file, std::string_view dictionary = "mmcif_pdbx");
-
 // --------------------------------------------------------------------
 // Other I/O related routines

@@ -134,7 +106,7 @@ bool is_valid_pdbx_file(const file &pdbx_file, std::string_view dictionary = "mm
 *
 * The line returned should be compatible with the legacy PDB
 * format and is e.g. used in the DSSP program.
- *
+ * 
 * @param data The datablock to use as source for the requested data
 * @param truncate_at The maximum length of the line returned
 */
@@ -144,7 +116,7 @@ std::string get_HEADER_line(const datablock &data, std::string::size_type trunca
 *
 * The line returned should be compatible with the legacy PDB
 * format and is e.g. used in the DSSP program.
- *
+ * 
 * @param data The datablock to use as source for the requested data
 * @param truncate_at The maximum length of the line returned
 */
@@ -154,7 +126,7 @@ std::string get_COMPND_line(const datablock &data, std::string::size_type trunca
 *
 * The line returned should be compatible with the legacy PDB
 * format and is e.g. used in the DSSP program.
- *
+ * 
 * @param data The datablock to use as source for the requested data
 * @param truncate_at The maximum length of the line returned
 */
@@ -164,11 +136,12 @@ std::string get_SOURCE_line(const datablock &data, std::string::size_type trunca
 *
 * The line returned should be compatible with the legacy PDB
 * format and is e.g. used in the DSSP program.
- *
+ * 
 * @param data The datablock to use as source for the requested data
 * @param truncate_at The maximum length of the line returned
 */

 std::string get_AUTHOR_line(const datablock &data, std::string::size_type truncate_at = 127);

-} // namespace cif::pdb
+} // namespace pdbx
+
--- a/include/cif++/row.hpp
+++ b/include/cif++/row.hpp
@@ -290,13 +290,6 @@ class row_handle
 		return operator[](get_column_ix(column)).template as<T>();
 	}

-	/// \brief Get the value of column @a column cast to type @a T
-	template <typename T>
-	T get(std::string_view column) const
-	{
-		return operator[](get_column_ix(column)).template as<T>();
-	}
-
 	/// \brief assign each of the columns named in @a values to their respective value
 	void assign(const std::vector<item> &values)
 	{
--- a/src/category.cpp
+++ b/src/category.cpp
@@ -47,6 +47,7 @@ class row_comparator
 {
  public:
 	row_comparator(category &cat)
+		: m_category(cat)
 	{
 		auto cv = cat.get_cat_validator();

@@ -68,13 +69,13 @@ class row_comparator
 		}
 	}

-	int operator()(const category &cat, const row *a, const row *b) const
+	int operator()(const row *a, const row *b) const
 	{
 		assert(a);
 		assert(b);

-		row_handle rha(cat, *a);
-		row_handle rhb(cat, *b);
+		row_handle rha(m_category, *a);
+		row_handle rhb(m_category, *b);

 		int d = 0;
 		for (const auto &[k, f] : m_comparator)
@@ -91,11 +92,11 @@ class row_comparator
 		return d;
 	}

-	int operator()(const category &cat, const row_initializer &a, const row *b) const
+	int operator()(const row_initializer &a, const row *b) const
 	{
 		assert(b);

-		row_handle rhb(cat, *b);
+		row_handle rhb(m_category, *b);

 		int d = 0;
 		auto ai = a.begin();
@@ -123,6 +124,7 @@ class row_comparator
 	using key_comparator = std::tuple<uint16_t, compareFunc>;

 	std::vector<key_comparator> m_comparator;
+	category &m_category;
 };

 // --------------------------------------------------------------------
@@ -133,18 +135,18 @@ class row_comparator
 class category_index
 {
  public:
-	category_index(category &cat);
+	category_index(category *cat);

 	~category_index()
 	{
 		delete m_root;
 	}

-	row *find(const category &cat, row *k) const;
-	row *find_by_value(const category &cat, row_initializer k) const;
+	row *find(row *k) const;
+	row *find_by_value(row_initializer k) const;

-	void insert(category &cat, row *r);
-	void erase(category &cat, row *r);
+	void insert(row *r);
+	void erase(row *r);

 	// reorder the row's and returns new head and tail
 	std::tuple<row *, row *> reorder()
@@ -190,8 +192,8 @@ class category_index
 		bool m_red;
 	};

-	entry *insert(category &cat, entry *h, row *v);
-	entry *erase(category &cat, entry *h, row *k);
+	entry *insert(entry *h, row *v);
+	entry *erase(entry *h, row *k);

 	//	void validate(entry* h, bool isParentRed, uint32_t blackDepth, uint32_t& minBlack, uint32_t& maxBlack) const;

@@ -322,24 +324,26 @@ class category_index
 		return result;
 	}

+	category &m_category;
 	row_comparator m_row_comparator;
 	entry *m_root;
 };

-category_index::category_index(category &cat)
-	: m_row_comparator(cat)
+category_index::category_index(category *cat)
+	: m_category(*cat)
+	, m_row_comparator(m_category)
 	, m_root(nullptr)
 {
-	for (auto r : cat)
-		insert(cat, r.get_row());
+	for (auto r : m_category)
+		insert(r.get_row());
 }

-row *category_index::find(const category &cat, row *k) const
+row *category_index::find(row *k) const
 {
 	const entry *r = m_root;
 	while (r != nullptr)
 	{
-		int d = m_row_comparator(cat, k, r->m_row);
+		int d = m_row_comparator(k, r->m_row);
 		if (d < 0)
 			r = r->m_left;
 		else if (d > 0)
@@ -351,14 +355,14 @@ row *category_index::find(const category &cat, row *k) const
 	return r ? r->m_row : nullptr;
 }

-row *category_index::find_by_value(const category &cat, row_initializer k) const
+row *category_index::find_by_value(row_initializer k) const
 {
 	// sort the values in k first

 	row_initializer k2;
-	for (auto &f : cat.key_field_indices())
+	for (auto &f : m_category.key_field_indices())
 	{
-		auto fld = cat.get_column_name(f);
+		auto fld = m_category.get_column_name(f);

 		auto ki = find_if(k.begin(), k.end(), [&fld](auto &i) { return i.name() == fld; });
 		if (ki == k.end())
@@ -370,7 +374,7 @@ row *category_index::find_by_value(const category &cat, row_initializer k) const
 	const entry *r = m_root;
 	while (r != nullptr)
 	{
-		int d = m_row_comparator(cat, k2, r->m_row);
+		int d = m_row_comparator(k2, r->m_row);
 		if (d < 0)
 			r = r->m_left;
 		else if (d > 0)
@@ -382,34 +386,34 @@ row *category_index::find_by_value(const category &cat, row_initializer k) const
 	return r ? r->m_row : nullptr;
 }

-void category_index::insert(category &cat, row *k)
+void category_index::insert(row *k)
 {
-	m_root = insert(cat, m_root, k);
+	m_root = insert(m_root, k);
 	m_root->m_red = false;
 }

-category_index::entry *category_index::insert(category &cat, entry *h, row *v)
+category_index::entry *category_index::insert(entry *h, row *v)
 {
 	if (h == nullptr)
 		return new entry(v);

-	int d = m_row_comparator(cat, v, h->m_row);
+	int d = m_row_comparator(v, h->m_row);
 	if (d < 0)
-		h->m_left = insert(cat, h->m_left, v);
+		h->m_left = insert(h->m_left, v);
 	else if (d > 0)
-		h->m_right = insert(cat, h->m_right, v);
+		h->m_right = insert(h->m_right, v);
 	else
 	{
-		row_handle rh(cat, *v);
+		row_handle rh(m_category, *v);

 		std::ostringstream os;
-		for (auto col : cat.key_fields())
+		for (auto col : m_category.key_fields())
 		{
 			if (rh[col])
 				os << col << ": " << std::quoted(rh[col].text()) << "; ";
 		}

-		throw duplicate_key_error("Duplicate Key violation, cat: " + cat.name() + " values: " + os.str());
+		throw duplicate_key_error("Duplicate Key violation, cat: " + m_category.name() + " values: " + os.str());
 	}

 	if (is_red(h->m_right) and not is_red(h->m_left))
@@ -424,25 +428,25 @@ category_index::entry *category_index::insert(category &cat, entry *h, row *v)
 	return h;
 }

-void category_index::erase(category &cat, row *k)
+void category_index::erase(row *k)
 {
-	assert(find(cat, k) == k);
+	assert(find(k) == k);

-	m_root = erase(cat, m_root, k);
+	m_root = erase(m_root, k);
 	if (m_root != nullptr)
 		m_root->m_red = false;
 }

-category_index::entry *category_index::erase(category &cat, entry *h, row *k)
+category_index::entry *category_index::erase(entry *h, row *k)
 {
-	if (m_row_comparator(cat, k, h->m_row) < 0)
+	if (m_row_comparator(k, h->m_row) < 0)
 	{
 		if (h->m_left != nullptr)
 		{
 			if (not is_red(h->m_left) and not is_red(h->m_left->m_left))
 				h = move_red_left(h);

-			h->m_left = erase(cat, h->m_left, k);
+			h->m_left = erase(h->m_left, k);
 		}
 	}
 	else
@@ -450,7 +454,7 @@ category_index::entry *category_index::erase(category &cat, entry *h, row *k)
 		if (is_red(h->m_left))
 			h = rotateRight(h);

-		if (m_row_comparator(cat, k, h->m_row) == 0 and h->m_right == nullptr)
+		if (m_row_comparator(k, h->m_row) == 0 and h->m_right == nullptr)
 		{
 			delete h;
 			return nullptr;
@@ -461,13 +465,13 @@ category_index::entry *category_index::erase(category &cat, entry *h, row *k)
 			if (not is_red(h->m_right) and not is_red(h->m_right->m_left))
 				h = move_red_right(h);

-			if (m_row_comparator(cat, k, h->m_row) == 0)
+			if (m_row_comparator(k, h->m_row) == 0)
 			{
 				h->m_row = find_min(h->m_right)->m_row;
 				h->m_right = erase_min(h->m_right);
 			}
 			else
-				h->m_right = erase(cat, h->m_right, k);
+				h->m_right = erase(h->m_right, k);
 		}
 	}

@@ -516,7 +520,7 @@ category::category(const category &rhs)
 		insert_impl(end(), clone_row(*r));

 	if (m_cat_validator != nullptr and m_index == nullptr)
-		m_index = new category_index(*this);
+		m_index = new category_index(this);
 }

 category::category(category &&rhs)
@@ -560,7 +564,7 @@ category &category::operator=(const category &rhs)
 		m_cat_validator = rhs.m_cat_validator;

 		if (m_cat_validator != nullptr and m_index == nullptr)
-			m_index = new category_index(*this);
+			m_index = new category_index(this);
 	}

 	return *this;
@@ -665,7 +669,7 @@ void category::set_validator(const validator *v, datablock &db)
 			}

 			if (missing.empty())
-				m_index = new category_index(*this);
+				m_index = new category_index(this);
 			else
 			{
 				std::ostringstream msg;
@@ -778,7 +782,7 @@ bool category::is_valid() const
 		for (auto r : *this)
 		{
 			auto p = r.get_row();
-			if (m_index->find(*this, p) != p)
+			if (m_index->find(p) != p)
 				m_validator->report_error("Key not found in index for category " + m_name, true);
 		}
 	}
@@ -900,7 +904,7 @@ row_handle category::operator[](const key_type &key)
 		if (m_index == nullptr)
 			throw std::logic_error("Category " + m_name + " does not have an index");

-		auto row = m_index->find_by_value(*this, key);
+		auto row = m_index->find_by_value(key);
 		if (row != nullptr)
 			result = { *this, *row };
 	}
@@ -1074,7 +1078,7 @@ category::iterator category::erase(iterator pos)
 		throw std::runtime_error("erase");

 	if (m_index != nullptr)
-		m_index->erase(*this, r);
+		m_index->erase(r);

 	if (r == m_head)
 	{
@@ -1246,14 +1250,12 @@ std::string category::get_unique_id(std::function<std::string(int)> generator)
 	std::string id_tag = "id";
 	if (m_cat_validator != nullptr and m_cat_validator->m_keys.size() == 1)
 	{
-		id_tag = m_cat_validator->m_keys.front();
-
 		if (m_index == nullptr and m_cat_validator != nullptr)
-			m_index = new category_index(*this);
+			m_index = new category_index(this);
 		
 		for (;;)
 		{
-			if (m_index->find_by_value(*this, {{ id_tag, result }}) == nullptr)
+			if (m_index->find_by_value({{ id_tag, result }}) == nullptr)
 				break;
 			result = generator(static_cast<int>(m_last_unique_num++));
 		}
@@ -1405,7 +1407,7 @@ void category::update_value(row *row, uint16_t column, std::string_view value, b
 {
 	// make sure we have an index, if possible
 	if (m_index == nullptr and m_cat_validator != nullptr)
-		m_index = new category_index(*this);
+		m_index = new category_index(this);

 	auto &col = m_columns[column];

@@ -1431,9 +1433,9 @@ void category::update_value(row *row, uint16_t column, std::string_view value, b
 	if (updateLinked and // an update of an Item's value
 		m_index != nullptr and key_field_indices().count(column))
 	{
-		reinsert = m_index->find(*this, row);
+		reinsert = m_index->find(row);
 		if (reinsert)
-			m_index->erase(*this, row);
+			m_index->erase(row);
 	}

 	// first remove old value with cix
@@ -1444,7 +1446,7 @@ void category::update_value(row *row, uint16_t column, std::string_view value, b
 		row->append(column, { value });

 	if (reinsert)
-		m_index->insert(*this, row);
+		m_index->insert(row);

 	// see if we need to update any child categories that depend on this value
 	auto iv = col.m_validator;
@@ -1600,7 +1602,7 @@ row_handle category::create_copy(row_handle r)
 category::iterator category::insert_impl(const_iterator pos, row *n)
 {
 	if (m_index == nullptr and m_cat_validator != nullptr)
-		m_index = new category_index(*this);
+		m_index = new category_index(this);

 	assert(n != nullptr);
 	assert(n->m_next == nullptr);
@@ -1640,7 +1642,7 @@ category::iterator category::insert_impl(const_iterator pos, row *n)
 		}

 		if (m_index != nullptr)
-			m_index->insert(*this, n);
+			m_index->insert(n);

 		// insert at end, most often this is the case
 		if (pos.m_current == nullptr)
--- a/src/compound.cpp
+++ b/src/compound.cpp
@@ -136,17 +136,14 @@ compound::compound(cif::datablock &db)
 	if (chemComp.size() != 1)
 		throw std::runtime_error("Invalid compound file, chem_comp should contain a single row");

-	std::string one_letter_code;
-
-	cif::tie(m_id, m_name, m_type, m_formula, m_formula_weight, m_formal_charge, one_letter_code, m_parent_id) =
-		chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge", "one_letter_code", "mon_nstd_parent_comp_id");
-	
-	if (one_letter_code.length() == 1)
-		m_one_letter_code = one_letter_code.front();
+	cif::tie(m_id, m_name, m_type, m_formula, m_formula_weight, m_formal_charge) =
+		chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge");

 	// The name should not contain newline characters since that triggers validation errors later on
 	cif::replace_all(m_name, "\n", "");

+	m_group = "non-polymer";
+
 	auto &chemCompAtom = db["chem_comp_atom"];
 	for (auto row : chemCompAtom)
 	{
@@ -156,9 +153,6 @@ compound::compound(cif::datablock &db)
 			row.get("atom_id", "type_symbol", "charge", "pdbx_aromatic_flag", "pdbx_leaving_atom_flag", "pdbx_stereo_config",
 				"model_Cartn_x", "model_Cartn_y", "model_Cartn_z");
 		atom.type_symbol = atom_type_traits(type_symbol).type();
-		if (stereo_config.empty())
-			atom.stereo_config = stereo_config_type::N;
-		else
 		atom.stereo_config = parse_stereo_config_from_string(stereo_config);
 		m_atoms.push_back(std::move(atom));
 	}
@@ -169,28 +163,17 @@ compound::compound(cif::datablock &db)
 		compound_bond bond;
 		std::string valueOrder;
 		cif::tie(bond.atom_id[0], bond.atom_id[1], valueOrder, bond.aromatic, bond.stereo_config) = row.get("atom_id_1", "atom_id_2", "value_order", "pdbx_aromatic_flag", "pdbx_stereo_config");
-		if (valueOrder.empty())
-			bond.type = bond_type::sing;
-		else
 		bond.type = parse_bond_type_from_string(valueOrder);
 		m_bonds.push_back(std::move(bond));
 	}
 }

-compound::compound(cif::datablock &db, int)
+compound::compound(cif::datablock &db, const std::string &id, const std::string &name, const std::string &type, const std::string &group)
+	: m_id(id)
+	, m_name(name)
+	, m_type(type)
+	, m_group(group)
 {
-	auto &chemComp = db["chem_comp"];
-
-	if (chemComp.size() != 1)
-		throw std::runtime_error("Invalid compound file, chem_comp should contain a single row");
-
-	cif::tie(m_id, m_name) =
-		chemComp.front().get("id", "name");
-	
-	cif::trim(m_name);
-
-	m_type = "NON-POLYMER";
-
 	auto &chemCompAtom = db["chem_comp_atom"];
 	for (auto row : chemCompAtom)
 	{
@@ -201,6 +184,7 @@ compound::compound(cif::datablock &db, int)
 		atom.type_symbol = atom_type_traits(type_symbol).type();

 		m_formal_charge += atom.charge;
+		m_formula_weight += atom_type_traits(atom.type_symbol).weight();

 		m_atoms.push_back(std::move(atom));
 	}
@@ -225,39 +209,11 @@ compound::compound(cif::datablock &db, int)
 		else
 		{
 			if (cif::VERBOSE > 0)
-				std::cerr << "Unimplemented chem_comp_bond.type " << btype << " in " << db.name() << '\n';
+				std::cerr << "Unimplemented chem_comp_bond.type " << btype << " in " << id << '\n';
 			bond.type = bond_type::sing;
 		}
 		m_bonds.push_back(std::move(bond));
 	}
-
-	// reconstruct a formula and weight
-
-	m_formula_weight = 0;
-
-	std::map<atom_type, int> f;
-	for (auto &atom : m_atoms)
-		f[atom.type_symbol] += 1;
-	
-	if (f.count(atom_type::C))
-	{
-		atom_type_traits att(atom_type::C);
-		m_formula += att.symbol() + std::to_string(f[atom_type::C]) + ' ';
-		m_formula_weight += att.weight() * f[atom_type::C];
-	}
-
-	for (const auto &[type, count] : f)
-	{
-		if (type == atom_type::C)
-			continue;
-
-		atom_type_traits att(type);
-		m_formula += att.symbol() + std::to_string(count) + ' ';
-		m_formula_weight += att.weight() * count;
-	}
-	
-	if (not m_formula.empty())
-		m_formula.pop_back();
 }

 compound_atom compound::get_atom_by_atom_id(const std::string &atom_id) const
@@ -304,12 +260,13 @@ float compound::bond_length(const std::string &atomId_1, const std::string &atom
 		auto a = get_atom_by_atom_id(atomId_1);
 		auto b = get_atom_by_atom_id(atomId_2);

-		result = distance(point{ a.x, a.y, a.z }, point{ b.x, b.y, b.z });
+		result = distance(point{a.x, a.y, a.z}, point{b.x, b.y, b.z});
 	}

 	return result;
 }

+
 // --------------------------------------------------------------------
 // known amino acids and bases

@@ -359,7 +316,7 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto
 	compound_factory_impl();
 	compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next);

-	virtual ~compound_factory_impl()
+	~compound_factory_impl()
 	{
 		for (auto c : m_compounds)
 			delete c;
@@ -416,15 +373,13 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto
 			os << "CCD components.cif resource\n";
 		else
 			os << "CCD components file: " << std::quoted(m_file.string()) << '\n';
-
+		
 		if (m_next)
 			m_next->describe(os);
 	}

-  protected:
-	compound_factory_impl(std::shared_ptr<compound_factory_impl> next);
-
-	virtual compound *create(const std::string &id);
+  private:
+	compound *create(const std::string &id);

 	std::shared_timed_mutex mMutex;

@@ -440,15 +395,10 @@ compound_factory_impl::compound_factory_impl()
 {
 }

-compound_factory_impl::compound_factory_impl(std::shared_ptr<compound_factory_impl> next)
-	: m_next(next)
-{
-}
-
 compound_factory_impl::compound_factory_impl(const fs::path &file, std::shared_ptr<compound_factory_impl> next)
-	: compound_factory_impl(next)
+	: m_file(file)
+	, m_next(next)
 {
-	m_file = file;
 }

 compound *compound_factory_impl::create(const std::string &id)
@@ -526,45 +476,6 @@ compound *compound_factory_impl::create(const std::string &id)

 // --------------------------------------------------------------------

-class local_compound_factory_impl : public compound_factory_impl
-{
-  public:
-	local_compound_factory_impl(const cif::file &file, std::shared_ptr<compound_factory_impl> next)
-		: compound_factory_impl(next)
-		, m_local_file(file)
-	{
-	}
-
-	compound *create(const std::string &id) override;
-
-  private:
-	const cif::file &m_local_file;
-};
-
-compound *local_compound_factory_impl::create(const std::string &id)
-{
-	compound *result = nullptr;
-
-	for (auto &db : m_local_file)
-	{
-		if (db.name() == "comp_" + id)
-		{
-			cif::datablock db_copy(db);
-
-			result = new compound(db_copy, 1);
-
-			std::shared_lock lock(mMutex);
-			m_compounds.push_back(result);
-
-			break;
-		}
-	}
-
-	return result;
-}
-
-// --------------------------------------------------------------------
-
 std::unique_ptr<compound_factory> compound_factory::s_instance;
 thread_local std::unique_ptr<compound_factory> compound_factory::tl_instance;
 bool compound_factory::s_use_thread_local_instance;
@@ -642,18 +553,6 @@ void compound_factory::push_dictionary(const fs::path &inDictFile)
 	}
 }

-void compound_factory::push_dictionary(const cif::file &inDictFile)
-{
-	try
-	{
-		m_impl.reset(new local_compound_factory_impl(inDictFile, m_impl));
-	}
-	catch (const std::exception &)
-	{
-		std::throw_with_nested(std::runtime_error("Error loading dictionary from local mmCIF file"));
-	}
-}
-
 void compound_factory::pop_dictionary()
 {
 	if (m_impl)
@@ -685,26 +584,25 @@ void compound_factory::report_missing_compound(const std::string &compound_id)
 	{
 		using namespace cif::colour;

-		std::clog << "\n"
-				  << cif::coloured("Configuration error:", white, red) << "\n\n"
+		std::clog << "\n" << cif::coloured("Configuration error:", white, red) << "\n\n"
 				  << "The attempt to retrieve compound information for " << std::quoted(compound_id) << " failed.\n\n"
 				  << "This information is searched for in a CCD file called components.cif or\n"
 				  << "components.cif.gz which should be located in one of the following directories:\n\n";
-
+		
 		cif::list_data_directories(std::clog);

 		std::clog << "\n(Note that you can add a directory to the search paths by setting the \n"
 				  << "LIBCIFPP_DATA_DIR environmental variable)\n\n";

-#if defined(CACHE_DIR)
+#if defined(CACHE_DIR)		
 		std::clog << "On Linux an optional cron script might have been installed that automatically updates\n"
 				  << "components.cif and mmCIF dictionary files. This script only works when the file\n"
 				  << "libcifpp.conf contains an uncommented line with the text:\n\n"
 				  << "update=true\n\n"
 				  << "If you do not have a working cron script, you can manually update the files\n"
 				  << "in /var/cache/libcifpp using the following commands:\n\n"
-				  << "curl -o " << CACHE_DIR << "/components.cif https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz\n"
-				  << "curl -o " << CACHE_DIR << "/mmcif_pdbx.dic https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic.gz\n"
+				  << "curl -o " << CACHE_DIR << "/components.cif https://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz\n" 
+				  << "curl -o " << CACHE_DIR << "/mmcif_pdbx.dic https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic.gz\n" 
 				  << "curl -o " << CACHE_DIR << "/mmcif_ma.dic https://github.com/ihmwg/ModelCIF/raw/master/dist/mmcif_ma.dic\n\n";
 #endif

@@ -715,9 +613,9 @@ void compound_factory::report_missing_compound(const std::string &compound_id)
 		}
 		else
 			std::clog << "No compound factory objects are created since none of the data sources is found.\n";
-
+		
 		cif::list_file_resources(std::clog);
-
+		
 		std::clog.flush();
 	}
 }
--- a/src/datablock.cpp
+++ b/src/datablock.cpp
@@ -91,7 +91,7 @@ bool datablock::validate_links() const

 	for (auto &cat : *this)
 		result = cat.validate_links() and result;
-
+	
 	return result;
 }

@@ -158,12 +158,11 @@ std::tuple<datablock::iterator, bool> datablock::emplace(std::string_view name)

 	if (is_new)
 	{
-		auto &c = emplace_back(name);
+		auto &c = emplace_front(name);
 		c.set_validator(m_validator, *this);
 	}

-	assert(end() != begin());
-	return std::make_tuple(std::prev(end()), is_new);
+	return std::make_tuple(begin(), is_new);
 }

 std::vector<std::string> datablock::get_tag_order() const
@@ -172,16 +171,14 @@ std::vector<std::string> datablock::get_tag_order() const

 	// for entry and audit_conform on top

-	auto ci = find_if(begin(), end(), [](const category &cat)
-		{ return cat.name() == "entry"; });
+	auto ci = find_if(begin(), end(), [](const category &cat) { return cat.name() == "entry"; });
 	if (ci != end())
 	{
 		auto cto = ci->get_tag_order();
 		result.insert(result.end(), cto.begin(), cto.end());
 	}

-	ci = find_if(begin(), end(), [](const category &cat)
-		{ return cat.name() == "audit_conform"; });
+	ci = find_if(begin(), end(), [](const category &cat) { return cat.name() == "audit_conform"; });
 	if (ci != end())
 	{
 		auto cto = ci->get_tag_order();
@@ -199,131 +196,42 @@ std::vector<std::string> datablock::get_tag_order() const
 	return result;
 }

-namespace
-{
-	using elem_t = std::tuple<std::string, int, bool>;
-	using cat_order_t = std::vector<elem_t>;
-	using iter_t = cat_order_t::iterator;
-
-	inline int get_count(iter_t i)
-	{
-		return std::get<1>(*i);
-	}
-
-	inline bool is_on_stack(iter_t i)
-	{
-		return std::get<2>(*i);
-	}
-
-	void calculate_cat_order(cat_order_t &cat_order, iter_t i, const validator &validator)
-	{
-		if (i == cat_order.end() or get_count(i) >= 0)
-			return;
-
-		auto &&[cat, count, on_stack] = *i;
-
-		on_stack = true;
-
-		int parent_count = 0;
-
-		for (auto link : validator.get_links_for_child(cat))
-		{
-			auto ei = std::find_if(cat_order.begin(), cat_order.end(), [parent = link->m_parent_category](elem_t &a)
-				{ return std::get<0>(a) == parent; });
-
-			if (ei == cat_order.end())
-				continue;
-
-			if (not is_on_stack(ei))
-				calculate_cat_order(cat_order, ei, validator);
-
-			parent_count += get_count(ei);
-		}
-
-		count = parent_count + 1;
-	}
-} // namespace
-
 void datablock::write(std::ostream &os) const
 {
 	os << "data_" << m_name << '\n'
 	   << "# \n";

-	if (m_validator and size() > 0)
+	// mmcif support, sort of. First write the 'entry' Category
+	// and if it exists, _AND_ we have a Validator, write out the
+	// audit_conform record.
+
+	for (auto &cat : *this)
 	{
-		// If the dictionary declares an audit_conform category, put it in,
-		// but only if it does not exist already!
-		if (get("audit_conform") == nullptr and m_validator->get_validator_for_category("audit_conform") != nullptr)
-		{
-			category auditConform("audit_conform");
-			auditConform.emplace({ { "dict_name", m_validator->name() },
-				{ "dict_version", m_validator->version() } });
-			auditConform.write(os);
-		}
+		if (cat.name() != "entry")
+			continue;

-		// base order on parent child relationships, parents first
+		cat.write(os);

-		cat_order_t cat_order;
-
-		for (auto &cat : *this)
-			cat_order.emplace_back(cat.name(), -1, false);
-
-		for (auto i = cat_order.begin(); i != cat_order.end(); ++i)
-			calculate_cat_order(cat_order, i, *m_validator);
-
-		std::sort(cat_order.begin(), cat_order.end(), [](const elem_t &a, const elem_t &b)
-			{
-			const auto &[cat_a, count_a, on_stack_a] = a;
-			const auto &[cat_b, count_b, on_stack_b] = b;
-
-			int d = 0;
-
-			if (cat_a == "audit_conform")
-				d = -1;
-			else if (cat_b == "audit_conform")
-				d = 1;
-			else if (cat_a == "entry")
-				d = -1;
-			else if (cat_b == "entry")
-				d = 1;
-			else
-			{
-				d = std::get<1>(a) - std::get<1>(b);
-				if (d == 0)
-					d = cat_b.compare(cat_a);
-			}
-
-			return d < 0; });
-
-		for (auto &&[cat, count, on_stack] : cat_order)
-			get(cat)->write(os);
+		break;
 	}
-	else
+
+	// If the dictionary declares an audit_conform category, put it in,
+	// but only if it does not exist already!
+	if (get("audit_conform"))
+		get("audit_conform")->write(os);
+	else if (m_validator != nullptr and m_validator->get_validator_for_category("audit_conform") != nullptr)
 	{
-		// mmcif support, sort of. First write the 'entry' Category
-		// and if it exists, _AND_ we have a Validator, write out the
-		// audit_conform record.
-
-		for (auto &cat : *this)
-		{
-			if (cat.name() != "entry")
-				continue;
+		category auditConform("audit_conform");
+		auditConform.emplace({
+			{"dict_name", m_validator->name()},
+			{"dict_version", m_validator->version()}});
+		auditConform.write(os);
+	}

+	for (auto &cat : *this)
+	{
+		if (cat.name() != "entry" and cat.name() != "audit_conform")
 			cat.write(os);
-
-			break;
-		}
-
-		// If the dictionary declares an audit_conform category, put it in,
-		// but only if it does not exist already!
-		if (get("audit_conform"))
-			get("audit_conform")->write(os);
-
-		for (auto &cat : *this)
-		{
-			if (cat.name() != "entry" and cat.name() != "audit_conform")
-				cat.write(os);
-		}
 	}
 }

@@ -429,7 +337,7 @@ bool datablock::operator==(const datablock &rhs) const
 			++catA_i;
 		else
 		{
-			if (not(*dbA.get(*catA_i) == *dbB.get(*catB_i)))
+			if (not (*dbA.get(*catA_i) == *dbB.get(*catB_i)))
 				return false;
 			++catA_i;
 			++catB_i;
@@ -439,4 +347,4 @@ bool datablock::operator==(const datablock &rhs) const
 	return true;
 }

-} // namespace cif
+} // namespace cif::cif
--- a/src/file.cpp
+++ b/src/file.cpp
@@ -173,12 +173,11 @@ std::tuple<file::iterator, bool> file::emplace(std::string_view name)

 	if (is_new)
 	{
-		auto &db = emplace_back(name);
+		auto &db = emplace_front(name);
 		db.set_validator(m_validator);
 	}

-	assert(begin() != end());
-	return std::make_tuple(std::prev(end()), is_new);
+	return std::make_tuple(begin(), is_new);
 }

 void file::load(const std::filesystem::path &p)
--- a/src/model.cpp
+++ b/src/model.cpp
@@ -2006,10 +2006,7 @@ void structure::change_residue(residue &res, const std::string &newCompound,
 			continue;

 		if (a2.empty() or a2 == ".")
-		{
-			i->set_property("label_comp_id", newCompound);
 			remove_atom(*i);
-		}
 		else if (a1 != a2)
 		{
 			auto ra = r.front();
@@ -2836,4 +2833,15 @@ void structure::validate_atoms() const
 	assert(atoms.empty());
 }

+// --------------------------------------------------------------------
+
+void reconstruct_pdbx(datablock &db)
+{
+	if (db.get("atom_site") == nullptr)
+		throw std::runtime_error("Cannot reconstruct PDBx file, atom data missing");
+	
+	assert(false);
+	throw std::runtime_error("not implemented yet");
+}
+
 } // namespace pdbx
--- a/src/pdb/pdb2cif.cpp
+++ b/src/pdb/pdb2cif.cpp
--- a/src/pdb/reconstruct.cpp
+++ b/src/pdb/reconstruct.cpp
@@ -1,561 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause
- *
- * Copyright (c) 2024 NKI/AVL, Netherlands Cancer Institute
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "cif++.hpp"
-
-// --------------------------------------------------------------------
-
-namespace cif::pdb
-{
-
-
-void checkAtomRecords(datablock &db)
-{
-	using namespace literals;
-
-	auto &cf = compound_factory::instance();
-
-	auto &atom_site = db["atom_site"];
-	auto &atom_type = db["atom_type"];
-	auto &chem_comp = db["chem_comp"];
-
-	for (auto row : atom_site)
-	{
-		const auto &[symbol, label_asym_id, auth_asym_id, label_comp_id, auth_comp_id, label_seq_id, auth_seq_id, label_atom_id, auth_atom_id] =
-			row.get<std::string, std::optional<std::string>, std::optional<std::string>, std::optional<std::string>, std::optional<std::string>,
-				std::optional<int>, std::optional<std::string>, std::optional<std::string>, std::optional<std::string>>(
-				"type_symbol", "label_asym_id", "auth_asym_id", "label_comp_id", "auth_comp_id", "label_seq_id", "auth_seq_id", "label_atom_id", "auth_atom_id");
-
-		if (symbol.empty())
-			throw std::runtime_error("Missing type symbol in atom_site record");
-
-		if (atom_type.count("symbol"_key == symbol) == 0)
-			atom_type.emplace({ { "symbol", symbol } });
-
-		if (not(label_asym_id.has_value() or auth_asym_id.has_value()))
-			throw std::runtime_error("atom_site records does not have a label_asym_id nor an auth_asym_id, cannot continue");
-
-		if (not(label_comp_id.has_value() or auth_comp_id.has_value()))
-			throw std::runtime_error("atom_site records does not have a label_comp_id nor an auth_comp_id, cannot continue");
-
-		if (not(label_atom_id.has_value() or auth_atom_id.has_value()))
-			throw std::runtime_error("atom_site records does not have a label_atom_id nor an auth_atom_id, cannot continue");
-
-		std::string asym_id = label_asym_id.value_or(*auth_asym_id);
-		std::string comp_id = label_comp_id.value_or(*auth_comp_id);
-
-		bool is_peptide = cf.is_known_peptide(comp_id);
-		auto compound = cf.create(comp_id);
-
-		if (not compound)
-			throw std::runtime_error("Missing compound information for " + comp_id);
-
-		std::string mon_nstd_flag(".");
-		if (is_peptide)
-		{
-			if (compound_factory::kAAMap.find(comp_id) != compound_factory::kAAMap.end())
-				mon_nstd_flag = "y";
-			else
-				mon_nstd_flag = "n";
-		}
-
-		auto chem_comp_entry = chem_comp.find_first("id"_key == comp_id);
-
-		if (not chem_comp_entry)
-		{
-			chem_comp.emplace({ //
-				{ "id", comp_id },
-				{ "type", compound->type() },
-				{ "mon_nstd_flag", mon_nstd_flag },
-				{ "name", compound->name() },
-				{ "formula", compound->formula() },
-				{ "formula_weight", compound->formula_weight() } });
-		}
-		else
-		{
-			std::vector<item> items;
-
-			if (not chem_comp_entry["type"])
-				items.emplace_back(item{ "type", compound->type() });
-			if (not chem_comp_entry["mon_nstd_flag"])
-				items.emplace_back(item{ "mon_nstd_flag", mon_nstd_flag });
-			if (not chem_comp_entry["name"])
-				items.emplace_back(item{ "name", compound->name() });
-			if (not chem_comp_entry["formula"])
-				items.emplace_back(item{ "formula", compound->formula() });
-			if (not chem_comp_entry["formula_weight"])
-				items.emplace_back(item{ "formula_weight", compound->formula_weight() });
-
-			if (not items.empty())
-				chem_comp_entry.assign(std::move(items));
-		}
-
-		if (is_peptide and not(label_seq_id.has_value() or auth_seq_id.has_value()))
-			throw std::runtime_error("atom_site record has peptide comp_id but no sequence number, cannot continue");
-
-		std::string seq_id;
-		if (label_seq_id.has_value())
-			seq_id = std::to_string(*label_seq_id);
-		else if (auth_seq_id.has_value())
-			seq_id = *auth_seq_id;
-
-		row.assign({ //
-			{ "auth_asym_id", auth_asym_id.value_or(*label_asym_id) },
-			{ "auth_seq_id", auth_seq_id.value_or(std::to_string(*label_seq_id)) },
-			{ "auth_comp_id", auth_comp_id.value_or(*label_comp_id) },
-			{ "auth_atom_id", auth_atom_id.value_or(*label_atom_id) } });
-	}
-}
-
-void createStructAsym(datablock &db)
-{
-	auto &atom_site = db["atom_site"];
-	auto &struct_asym = db["struct_asym"];
-
-	for (auto label_asym_id : atom_site.rows<std::string>("label_asym_id"))
-	{
-		if (label_asym_id.empty())
-			throw std::runtime_error("File contains atom_site records without a label_asym_id");
-		if (struct_asym.count(key("id") == label_asym_id) == 0)
-		{
-			struct_asym.emplace({ //
-				{ "id", label_asym_id } });
-		}
-	}
-}
-
-void createEntity(datablock &db)
-{
-	using namespace literals;
-
-	auto &cf = compound_factory::instance();
-
-	auto &atom_site = db["atom_site"];
-	atom_site.add_column("label_entity_id");
-
-	auto &struct_asym = db["struct_asym"];
-	struct_asym.add_column("entity_id");
-
-	std::map<std::string,std::vector<std::tuple<std::string,int>>> asyms;
-
-	for (auto asym_id : db["struct_asym"].rows<std::string>("id"))
-	{
-		int last_seq_id = -1;
-
-		for (const auto &[comp_id, seq_id] : atom_site.find<std::string,int>("label_asym_id"_key == asym_id, "label_comp_id", "label_seq_id"))
-		{
-			if (seq_id == last_seq_id)
-				continue;
-			
-			last_seq_id = seq_id;
-
-			asyms[asym_id].emplace_back(comp_id, last_seq_id);
-		}
-	}
-
-	auto less = [](const std::vector<std::tuple<std::string,int>> &a, const std::vector<std::tuple<std::string,int>> &b)
-	{
-		int d = static_cast<int>(a.size()) - static_cast<int>(b.size());
-		return d == 0 ? a > b : d > 0;
-	};
-
-	std::set<std::vector<std::tuple<std::string,int>>,decltype(less)> entities(less);
-
-	for (const auto &[asym_id, content] : asyms)
-		entities.emplace(content);
-	
-	auto water_weight = cf.create("HOH")->formula_weight();
-
-	int poly_count = 0;
-
-	auto &entity = db["entity"];
-	for (auto &content : entities)
-	{
-		auto entity_id = entity.get_unique_id("");
-
-		std::string type, desc;
-		float weight = 0;
-		int count = 0;
-
-		auto first_comp_id = std::get<0>(content.front());
-
-		if (first_comp_id == "HOH")
-		{
-			type = "water";
-			desc = "water";
-			weight = water_weight;
-		}
-		else if (content.size() == 1)
-		{
-			auto c = cf.create(first_comp_id);
-
-			type = "non-polymer";
-			desc = c->name();
-			weight = c->formula_weight();
-		}
-		else
-		{
-			type = "polymer";
-			desc = "polymer-" + std::to_string(++poly_count);
-
-			weight = water_weight;
-			for (const auto &[comp_id, seq_id] : content)
-				weight += cf.create(comp_id)->formula_weight() - water_weight;
-		}
-
-		for (const auto &[asym_id, ac] : asyms)
-		{
-			if (ac != content)
-				continue;
-			
-			atom_site.update_value("label_asym_id"_key == asym_id, "label_entity_id", entity_id);
-			struct_asym.update_value("id"_key == asym_id, "entity_id", entity_id);
-
-			if (type != "water")
-				++count;
-			else
-				count = atom_site.count("label_asym_id"_key == asym_id and "label_atom_id"_key == "O");
-		}
-
-		entity.emplace({ // 
-			{ "id", entity_id },
-			{ "type", type },
-			{ "pdbx_description", desc },
-			{ "formula_weight", weight },
-			{ "pdbx_number_of_molecules", count }
-		});
-	}
-}
-
-void createEntityPoly(datablock &db)
-{
-	using namespace literals;
-
-	auto &cf = compound_factory::instance();
-
-	auto &atom_site = db["atom_site"];
-	auto &entity_poly = db["entity_poly"];
-
-	for (auto entity_id : db["entity"].find<std::string>("type"_key == "polymer", "id"))
-	{
-		std::string type;
-		int last_seq_id = -1;
-		std::string seq, seq_can;
-		bool non_std_monomer = false;
-		bool non_std_linkage = false;
-		std::string pdb_strand_id;
-
-		for (const auto &[comp_id, seq_id, auth_asym_id] : atom_site.find<std::string,int,std::string>("label_entity_id"_key == entity_id, "label_comp_id", "label_seq_id", "auth_asym_id"))
-		{
-			if (seq_id == last_seq_id)
-				continue;
-			
-			last_seq_id = seq_id;
-
-			auto c = cf.create(comp_id);
-
-			std::string letter;
-			char letter_can;
-
-			// TODO: Perhaps we should improve this... 
-			if (type != "other")
-			{
-				std::string c_type;
-				if (cf.is_known_base(comp_id))
-				{
-					c_type = "polydeoxyribonucleotide";
-					letter = letter_can = compound_factory::kBaseMap.at(comp_id);
-				}
-				else if (cf.is_known_peptide(comp_id))
-				{
-					c_type = "polypeptide(L)";
-					letter = letter_can = compound_factory::kAAMap.at(comp_id);
-				}
-				else if (iequals(c->type(), "D-PEPTIDE LINKING"))
-				{
-					c_type = "polypeptide(D)";
-
-					letter_can = c->one_letter_code();
-					if (letter_can == 0)
-						letter_can = 'X';
-					
-					letter = '(' + comp_id + ')';
-
-					non_std_linkage = true;
-					non_std_monomer = true;
-				}
-				else if (iequals(c->type(), "L-PEPTIDE LINKING") or iequals(c->type(), "PEPTIDE LINKING"))
-				{
-					c_type = "polypeptide(L)";
-
-					letter_can = c->one_letter_code();
-					if (letter_can == 0)
-						letter_can = 'X';
-
-					letter = '(' + comp_id + ')';
-
-					non_std_monomer = true;
-				}
-
-				if (type.empty())
-					type = c_type;
-				else if (type != c_type)
-					type = "other";
-			}
-
-			seq += letter;
-			seq_can += letter_can;
-
-			pdb_strand_id = auth_asym_id;
-		}
-
-		for (auto i = seq.begin() + 80; i < seq.end(); i += 80)
-			i = seq.insert(i, '\n') + 1;
-		
-		for (auto i = seq_can.begin() + 76; i < seq_can.end(); i += 76)
-		{
-			auto j = i;
-			while (j < i + 4 and j < seq_can.end())
-			{
-				if (*j == '(')
-					break;
-				++j;
-			}
-
-			if (j < seq_can.end())
-				i = seq_can.insert(j, '\n') + 1;
-			else
-				i = j;
-		}
-
-		entity_poly.emplace({ // 
-			{ "entity_id", entity_id },
-			{ "type", type },
-			{ "nstd_linkage", non_std_linkage },
-			{ "nstd_monomer", non_std_monomer },
-			{ "pdbx_seq_one_letter_code", seq },
-			{ "pdbx_seq_one_letter_code_can", seq_can },
-			{ "pdbx_strand_id", pdb_strand_id }
-		});
-	}
-}
-
-void createEntityPolySeq(datablock &db)
-{
-	if (db.get("entity_poly") == nullptr)
-		createEntityPoly(db);
-
-	using namespace literals;
-
-	auto &atom_site = db["atom_site"];
-	auto &entity_poly = db["entity_poly"];
-	auto &entity_poly_seq = db["entity_poly_seq"];
-	auto &struct_asym = db["struct_asym"];
-
-	for (auto entity_id : entity_poly.rows<std::string>("entity_id"))
-	{
-		int last_seq_id = -1;
-		std::string last_comp_id;
-		std::string asym_id = struct_asym.find_first<std::string>("entity_id"_key == entity_id, "id");
-
-		for (const auto &[comp_id, seq_id] : atom_site.find<std::string,int>("label_entity_id"_key == entity_id and "label_asym_id"_key == asym_id, "label_comp_id", "label_seq_id"))
-		{
-			bool hetero = false;
-
-			if (seq_id == last_seq_id)
-			{
-				if (last_comp_id != comp_id)
-					hetero = true;
-				else
-					continue;
-			}
-
-			if (hetero)
-			{
-				entity_poly_seq.back().assign({
-					{ "hetero", true }
-				});
-			}
-
-			entity_poly_seq.emplace({ // 
-				{ "entity_id", entity_id },
-				{ "num", seq_id },
-				{ "mon_id", comp_id },
-				{ "hetero", hetero }
-			});
-			
-			last_seq_id = seq_id;
-			last_comp_id = comp_id;
-		}
-
-		// you cannot assume this is correct...
-		entity_poly_seq.sort([](row_handle a, row_handle b)
-		{
-			return a.get<int>("num") < b.get<int>("num");
-		});
-	}
-}
-
-void createPdbxPolySeqScheme(datablock &db)
-{
-	if (db.get("entity_poly_seq") == nullptr)
-		createEntityPolySeq(db);
-
-	using namespace literals;
-
-	auto &atom_site = db["atom_site"];
-	auto &entity_poly = db["entity_poly"];
-	auto &entity_poly_seq = db["entity_poly_seq"];
-	auto &struct_asym = db["struct_asym"];
-	auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"];
-
-	for (const auto &[entity_id, pdb_strand_id] : entity_poly.rows<std::string, std::string>("entity_id", "pdbx_strand_id"))
-	{
-		for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
-		{
-			for (const auto &[comp_id, num, hetero] : entity_poly_seq.find<std::string,int,bool>("entity_id"_key == entity_id, "mon_id", "num", "hetero"))
-			{
-				const auto &[auth_seq_num, auth_mon_id, ins_code] =
-					atom_site.find_first<std::string,std::string,std::optional<std::string>>(
-						"label_asym_id"_key == asym_id and "label_seq_id"_key == num,
-						"auth_seq_id", "auth_comp_id", "pdbx_PDB_ins_code"
-					);
-				
-				pdbx_poly_seq_scheme.emplace({ //
-					{ "asym_id", asym_id },
-					{ "entity_id", entity_id  },
-					{ "seq_id", num },
-					{ "mon_id", comp_id },
-					{ "ndb_seq_num", num },
-					{ "pdb_seq_num", auth_seq_num },
-					{ "auth_seq_num", auth_seq_num },
-					{ "pdb_mon_id", auth_mon_id },
-					{ "auth_mon_id", auth_mon_id },
-					{ "pdb_strand_id", pdb_strand_id },
-					{ "pdb_ins_code", ins_code },
-					{ "hetero", hetero }
-				});
-			}
-		}
-	}
-}
-
-void reconstruct_pdbx(file &file, std::string_view dictionary)
-{
-	if (file.empty())
-		throw std::runtime_error("Cannot reconstruct PDBx, file seems to be empty");
-
-	// assuming the first datablock contains the entry ...
-	auto &db = file.front();
-
-	// ... and any additional datablock will contain compound information
-	cif::compound_source cs(file);
-
-	if (db.get("atom_site") == nullptr)
-		throw std::runtime_error("Cannot reconstruct PDBx file, atom data missing");
-
-	auto &validator = validator_factory::instance()[dictionary];
-
-	std::string entry_id;
-
-	// Phenix files do not have an entry record
-	if (db.get("entry") == nullptr)
-	{
-		entry_id = db.name();
-		category entry("entry");
-		entry.emplace({ { "id", entry_id } });
-		db.emplace_back(std::move(entry));
-	}
-	else
-	{
-		auto &entry = db["entry"];
-		if (entry.size() != 1)
-			throw std::runtime_error("Unexpected size of entry category");
-
-		entry_id = entry.front().get<std::string>("id");
-	}
-
-	for (auto &cat : db)
-	{
-		auto cv = validator.get_validator_for_category(cat.name());
-		if (not cv)
-			continue;
-
-		for (auto link : validator.get_links_for_child(cat.name()))
-		{
-			if (link->m_parent_category != "entry")
-				continue;
-
-			// So, this cat should have a link to the entry
-
-			auto pk = find(link->m_parent_keys.begin(), link->m_parent_keys.end(), "id");
-			if (pk == link->m_parent_keys.end())
-				continue;
-
-			auto ix = pk - link->m_parent_keys.begin();
-			auto key = link->m_child_keys[ix];
-
-			for (auto row : cat)
-			{
-				row.assign({ { key, entry_id } });
-			}
-		}
-
-		// See if all categories that need a key do have a value
-		if (cv->m_keys.size() == 1)
-		{
-			auto key = cv->m_keys.front();
-			for (auto row : cat)
-			{
-				auto ord = row.get<std::string>(key.c_str());
-				if (ord.empty())
-					row.assign({ //
-						{ key, cat.get_unique_id([](int nr)
-								   { return std::to_string(nr); }) } });
-			}
-		}
-	}
-
-	file.load_dictionary(dictionary);
-
-	// Now create any missing categories
-
-	// First, see if atom records make sense at all
-	// Will take care of atom_type and chem_comp as well.
-	checkAtomRecords(db);
-
-	// Next make sure we have struct_asym records
-	if (db.get("struct_asym") == nullptr)
-		createStructAsym(db);
-	
-	if (db.get("entity") == nullptr)
-		createEntity(db);
-
-	if (db.get("pdbx_poly_seq_scheme") == nullptr)
-		createPdbxPolySeqScheme(db);
-}
-
-} // namespace cif::pdb
--- a/src/pdb/validate-pdbx.cpp
+++ b/src/pdb/validate-pdbx.cpp
@@ -1,284 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause
- *
- * Copyright (c) 2024 NKI/AVL, Netherlands Cancer Institute
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "cif++.hpp"
-
-namespace cif::pdb
-{
-
-condition get_parents_condition(const validator &validator, row_handle rh, const category &parentCat)
-{
-	condition result;
-
-	auto &childCat = rh.get_category();
-	auto childName = childCat.name();
-	auto parentName = parentCat.name();
-
-	auto links = validator.get_links_for_child(childName);
-	links.erase(remove_if(links.begin(), links.end(), [n = parentName](auto &l)
-					{ return l->m_parent_category != n; }),
-		links.end());
-
-	if (not links.empty())
-	{
-		for (auto &link : links)
-		{
-			condition cond;
-
-			for (size_t ix = 0; ix < link->m_child_keys.size(); ++ix)
-			{
-				auto childValue = rh[link->m_child_keys[ix]];
-
-				if (childValue.empty())
-					continue;
-
-				cond = std::move(cond) and key(link->m_parent_keys[ix]) == childValue.text();
-			}
-
-			result = std::move(result) or std::move(cond);
-		}
-	}
-	else if (cif::VERBOSE > 0)
-		std::cerr << "warning: no child to parent links were found for child " << childName << " and parent " << parentName << '\n';
-
-	return result;
-}
-
-bool is_valid_pdbx_file(const file &file, std::string_view dictionary)
-{
-	using namespace cif::literals;
-
-	auto &cf = cif::compound_factory::instance();
-	auto &validator = cif::validator_factory::instance().operator[](dictionary);
-
-	bool result = true;
-
-	try
-	{
-		if (file.empty())
-			throw validation_error("Empty file");
-
-		auto &db = file.front();
-
-		if (db.empty())
-			throw validation_error("Empty datablock");
-
-		auto &atom_site = db["atom_site"];
-		if (atom_site.empty())
-			throw validation_error("Empty or missing atom_site category");
-
-		auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"];
-
-		std::string last_asym_id;
-		int last_seq_id = -1;
-		for (auto r : atom_site)
-		{
-			auto seq_id = r.get<std::optional<int>>("label_seq_id");
-			if (not seq_id.has_value()) // not a residue in a polymer
-				continue;
-
-			if (*seq_id == last_seq_id)
-				continue;
-
-			last_seq_id = *seq_id;
-
-			auto comp_id = r.get<std::string>("label_comp_id");
-			if (not cf.is_known_peptide(comp_id))
-				continue;
-
-			auto p = pdbx_poly_seq_scheme.find(get_parents_condition(validator, r, pdbx_poly_seq_scheme));
-			if (p.size() != 1)
-				throw validation_error("For each residue in atom_site that is a residue in a polymer there should be exactly one pdbx_poly_seq_scheme record");
-		}
-
-		auto &entity = db["entity"];
-		if (entity.empty())
-			throw validation_error("Entity category is missing or empty");
-
-		auto &entity_poly = db["entity_poly"];
-		if (entity_poly.empty())
-			throw validation_error("Entity_poly category is missing or empty");
-
-		auto &entity_poly_seq = db["entity_poly_seq"];
-		if (entity_poly_seq.empty())
-			throw validation_error("Entity_poly_seq category is missing or empty");
-
-		auto &struct_asym = db["struct_asym"];
-		if (struct_asym.empty())
-			throw validation_error("struct_asym category is missing or empty");
-
-		for (auto entity_id : entity.find<std::string>("type"_key == "polymer", "id"))
-		{
-			if (entity_poly.count("entity_id"_key == entity_id) != 1)
-				throw validation_error("There should be exactly one entity_poly record per polymer entity");
-
-			const auto entity_poly_type = entity_poly.find1<std::string>("entity_id"_key == entity_id, "type");
-
-			std::map<int,std::set<std::string>> mon_per_seq_id;
-
-			for (const auto &[num, mon_id, hetero] : entity_poly_seq.find<int, std::string, bool>("entity_id"_key == entity_id, "num", "mon_id", "hetero"))
-			{
-				mon_per_seq_id[num].emplace(mon_id);
-
-				for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
-				{
-					if (pdbx_poly_seq_scheme.count(
-							"asym_id"_key == asym_id and
-							"mon_id"_key == mon_id and
-							"seq_id"_key == num and
-							"hetero"_key == hetero) != 1)
-					{
-						throw validation_error("For each entity_poly_seq record there should be exactly one pdbx_poly_seq record");
-					}
-				}
-			}
-
-			for (const auto &[seq_id, mon_id, hetero] : pdbx_poly_seq_scheme.find<int, std::string, bool>("entity_id"_key == entity_id, "seq_id", "mon_id", "hetero"))
-			{
-				if (entity_poly_seq.count(
-						"mon_id"_key == mon_id and
-						"num"_key == seq_id and
-						"hetero"_key == hetero) != 1)
-				{
-					throw validation_error("For each pdbx_poly_seq/struct_asym record there should be exactly one entity_poly_seq record");
-				}
-
-				if ((mon_per_seq_id[seq_id].size() > 1) != hetero)
-					throw validation_error("Mismatch between the hetero flag in the poly seq schemes and the number residues per seq_id");
-			}
-
-			for (const auto &[seq_id, mon_ids] : mon_per_seq_id)
-			{
-				for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
-				{
-					condition cond;
-					
-					for (auto mon_id : mon_ids)
-						cond = std::move(cond) or "label_comp_id"_key == mon_id;
-
-					cond = "label_entity_id"_key == entity_id and
-						"label_asym_id"_key == asym_id and
-						"label_seq_id"_key == seq_id and not std::move(cond);
-					
-					if (atom_site.exists(std::move(cond)))
-						throw validation_error("An atom_site record exists that has no parent in the poly seq scheme categories");
-				}
-			}
-
-			auto &&[seq, seq_can] = entity_poly.find1<std::optional<std::string>, std::optional<std::string>>("entity_id"_key == entity_id,
-				"pdbx_seq_one_letter_code", "pdbx_seq_one_letter_code_can");
-			
-			std::string::const_iterator si, sci, se, sce;
-
-			auto seq_match = [&](bool can, std::string::const_iterator si, std::string::const_iterator se)
-			{
-				for (const auto &[seq_id, comp_ids] : mon_per_seq_id)
-				{
-					if (si == se)
-						return false;
-
-					bool match = false;
-
-					for (auto comp_id : comp_ids)
-					{
-						std::string letter;
-						if (cf.is_known_base(comp_id))
-							letter = compound_factory::kBaseMap.at(comp_id);
-						else if (cf.is_known_peptide(comp_id))
-							letter = compound_factory::kAAMap.at(comp_id);
-						else
-						{
-							if (can)
-							{
-								auto c = cf.create(comp_id);
-								if (c and c->one_letter_code())
-									letter = c->one_letter_code();
-								else
-									letter = "X";
-							}
-							else
-								letter = '(' + comp_id + ')';
-						}
-						
-						if (iequals(std::string{si, si + letter.length()}, letter))
-						{
-							match = true;
-							si += letter.length();
-							break;
-						}
-						else
-							return false;
-					}
-
-					if (not match)
-						break;
-				}
-
-				return si == se;
-			};
-
-			if (not seq.has_value())
-			{
-				if (cif::VERBOSE > 0)
-					std::clog << "Warning: entity_poly has no sequence for entity_id " << entity_id << '\n';
-			}
-			else
-			{
-				seq->erase(std::remove_if(seq->begin(), seq->end(), [](char ch) { return std::isspace(ch); }), seq->end());
-
-				if (not seq_match(false, seq->begin(), seq->end()))
-					throw validation_error("Sequences do not match for entity " + entity_id);
-			}
-
-			if (not seq_can.has_value())
-			{
-				if (cif::VERBOSE > 0)
-					std::clog << "Warning: entity_poly has no sequence for entity_id " << entity_id << '\n';
-			}
-			else
-			{
-				seq_can->erase(std::remove_if(seq_can->begin(), seq_can->end(), [](char ch) { return std::isspace(ch); }), seq_can->end());
-				
-				if (not seq_match(true, seq_can->begin(), seq_can->end()))
-					throw validation_error("Canonical sequences do not match for entity " + entity_id);
-			}
-			
-		}
-
-		result = true;
-	}
-	catch (const std::exception &ex)
-	{
-		result = false;
-		if (cif::VERBOSE > 0)
-			std::clog << ex.what() << '\n';
-	}
-
-	return result;
-}
-
-} // namespace cif::pdb
-  
--- a/test/.1juh.cif.gz.swp
+++ b/test/.1juh.cif.gz.swp
--- a/test/pdb1cbs.ent.gz
+++ b/test/pdb1cbs.ent.gz
--- a/test/rename-compound-test.cpp
+++ b/test/rename-compound-test.cpp
@@ -37,8 +37,8 @@ TEST_CASE("rename")
 {
 	cif::VERBOSE = 3;

-	if (std::filesystem::exists(gTestDir / ".." / "rsrc" / "ccd-subset.cif"))
-		cif::add_file_resource("components.cif", gTestDir / ".." / "rsrc" / "ccd-subset.cif");
+	if (std::filesystem::exists(gTestDir / ".." / "data" / "ccd-subset.cif"))
+		cif::add_file_resource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");

 	if (std::filesystem::exists(gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic"))
 		cif::add_file_resource("mmcif_pdbx.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic");
--- a/test/test-main.cpp
+++ b/test/test-main.cpp
@@ -13,11 +13,10 @@ int main(int argc, char *argv[])

 	// Build a new parser on top of Catch2's
 	using namespace Catch::clara;
-	auto cli = session.cli()                               // Get Catch2's command line parser
-	           | Opt(gTestDir, "data-dir")                 // bind variable to a new option, with a hint string
-	                 ["-D"]["--data-dir"]                  // the option names it will respond to
-	           ("The directory containing the data files") // description string for the help output
-	           | Opt(cif::VERBOSE, "verbose")["-v"]["--cif-verbose"]("Flag for cif::VERBOSE");
+	auto cli = session.cli()                                // Get Catch2's command line parser
+	           | Opt(gTestDir, "data-dir")                // bind variable to a new option, with a hint string
+	                 ["-D"]["--data-dir"]                   // the option names it will respond to
+	           ("The directory containing the data files"); // description string for the help output

 	// Now pass the new composite back to Catch2 so it uses that
 	session.cli(cli);
@@ -31,9 +30,10 @@ int main(int argc, char *argv[])
 	cif::add_file_resource("mmcif_pdbx.dic", gTestDir / ".." / "rsrc" / "mmcif_pdbx.dic");

 	// initialize CCD location
-	cif::add_file_resource("components.cif", gTestDir / ".." / "rsrc" / "ccd-subset.cif");
+	cif::add_file_resource("components.cif", gTestDir / ".." / "data" / "ccd-subset.cif");

 	cif::compound_factory::instance().push_dictionary(gTestDir / "HEM.cif");

+
 	return session.run();
 }
--- a/test/unit-v2-test.cpp
+++ b/test/unit-v2-test.cpp
@@ -3468,22 +3468,3 @@ TEST_CASE("compound_not_found_test_1")
 	auto cmp = cif::compound_factory::instance().create("&&&");
 	REQUIRE(cmp == nullptr);
 }
-
-// --------------------------------------------------------------------
-// PDB2CIF tests
-
-TEST_CASE("pdb2cif_formula_weight")
-{
-	cif::compound_factory::instance().push_dictionary(gTestDir / "REA.cif");
-
-	cif::file a = cif::pdb::read(gTestDir / "pdb1cbs.ent.gz");
-	
-	auto fw = a.front()["entity"].find1<float>(cif::key("id") == 1, "formula_weight");
-	CHECK(std::abs(fw - 15581.802f) < 0.1f);
-
-	fw = a.front()["entity"].find1<float>(cif::key("id") == 2, "formula_weight");
-	CHECK(fw == 300.435f);
-
-	fw = a.front()["entity"].find1<float>(cif::key("id") == 3, "formula_weight");
-	CHECK(fw == 18.015f);
-}
--- a/test/validate-pdbx-test.cpp
+++ b/test/validate-pdbx-test.cpp
@@ -1,286 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause
- *
- * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include "test-main.hpp"
-
-#include <catch2/catch.hpp>
-
-#include <cif++.hpp>
-
-#include <stdexcept>
-
-// --------------------------------------------------------------------
-
-cif::file operator""_cf(const char *text, size_t length)
-{
-	struct membuf : public std::streambuf
-	{
-		membuf(char *text, size_t length)
-		{
-			this->setg(text, text, text + length);
-		}
-	} buffer(const_cast<char *>(text), length);
-
-	std::istream is(&buffer);
-	return cif::file(is);
-}
-
-// --------------------------------------------------------------------
-
-TEST_CASE("test-1")
-{
-	auto f = R"(data_1CBS
-# 
-_entry.id   1CBS 
-# 
-_entity.id                     1
-_entity.type                   polymer
-# 
-_entity_poly.entity_id                      1 
-_entity_poly.type                           'polypeptide(L)' 
-_entity_poly.nstd_linkage                   no 
-_entity_poly.nstd_monomer                   no 
-_entity_poly.pdbx_seq_one_letter_code       
-;PNFSG
-;
-_entity_poly.pdbx_seq_one_letter_code_can   
-;PNFSG
-;
-_entity_poly.pdbx_strand_id                 A 
-_entity_poly.pdbx_target_identifier         ? 
-# 
-loop_
-_entity_poly_seq.entity_id 
-_entity_poly_seq.num 
-_entity_poly_seq.mon_id 
-_entity_poly_seq.hetero 
-1 1   PRO n 
-1 2   ASN n 
-1 3   PHE n 
-1 4   SER n 
-1 5   GLY n 
-#
-loop_
-_struct_asym.id 
-_struct_asym.pdbx_blank_PDB_chainid_flag 
-_struct_asym.pdbx_modified 
-_struct_asym.entity_id 
-_struct_asym.details 
-A N N 1 ? 
-# 
-loop_
-_atom_type.symbol 
-C 
-N 
-O 
-S 
-# 
-loop_
-_atom_site.group_PDB 
-_atom_site.id 
-_atom_site.type_symbol 
-_atom_site.label_atom_id 
-_atom_site.label_alt_id 
-_atom_site.label_comp_id 
-_atom_site.label_asym_id 
-_atom_site.label_entity_id 
-_atom_site.label_seq_id 
-_atom_site.pdbx_PDB_ins_code 
-_atom_site.Cartn_x 
-_atom_site.Cartn_y 
-_atom_site.Cartn_z 
-_atom_site.occupancy 
-_atom_site.B_iso_or_equiv 
-_atom_site.pdbx_formal_charge 
-_atom_site.auth_seq_id 
-_atom_site.auth_comp_id 
-_atom_site.auth_asym_id 
-_atom_site.auth_atom_id 
-_atom_site.pdbx_PDB_model_num 
-ATOM   2    C CA  . PRO A 1 1   ? 18.150 13.525 43.680 1.00 28.82 ? 1   PRO A CA  1 
-ATOM   9    C CA  . ASN A 1 2   ? 20.576 16.457 43.578 1.00 20.79 ? 2   ASN A CA  1 
-ATOM   17   C CA  . PHE A 1 3   ? 21.144 17.838 40.087 1.00 12.62 ? 3   PHE A CA  1 
-ATOM   28   C CA  . SER A 1 4   ? 23.170 20.780 41.464 1.00 11.30 ? 4   SER A CA  1 
-ATOM   34   C CA  . GLY A 1 5   ? 26.628 21.486 40.103 1.00 10.86 ? 5   GLY A CA  1 
-# 
-loop_
-_pdbx_poly_seq_scheme.asym_id 
-_pdbx_poly_seq_scheme.entity_id 
-_pdbx_poly_seq_scheme.seq_id 
-_pdbx_poly_seq_scheme.mon_id 
-_pdbx_poly_seq_scheme.ndb_seq_num 
-_pdbx_poly_seq_scheme.pdb_seq_num 
-_pdbx_poly_seq_scheme.auth_seq_num 
-_pdbx_poly_seq_scheme.pdb_mon_id 
-_pdbx_poly_seq_scheme.auth_mon_id 
-_pdbx_poly_seq_scheme.pdb_strand_id 
-_pdbx_poly_seq_scheme.pdb_ins_code 
-_pdbx_poly_seq_scheme.hetero 
-A 1 1   PRO 1   1   1   PRO PRO A . n 
-A 1 2   ASN 2   2   2   ASN ASN A . n 
-A 1 3   PHE 3   3   3   PHE PHE A . n 
-A 1 4   SER 4   4   4   SER SER A . n 
-A 1 5   GLY 5   5   5   GLY GLY A . n 
-# 
-)"_cf;
-
-	SECTION("Plain file")
-	{
-		REQUIRE(cif::pdb::is_valid_pdbx_file(f));
-	}
-
-	SECTION("Delete one atom_site")
-	{
-		auto &db = f.front();
-		auto n = db["atom_site"].erase(cif::key("id") == 2);
-
-		REQUIRE(n == 1);
-
-		REQUIRE(cif::pdb::is_valid_pdbx_file(f));
-	}
-
-	SECTION("Delete a pdbx_poly_seq_scheme record")
-	{
-		auto &db = f.front();
-		auto n = db["pdbx_poly_seq_scheme"].erase(cif::key("seq_id") == 2);
-
-		REQUIRE(n == 1);
-
-		REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
-	}
-
-	SECTION("Delete an entity_poly_seq record")
-	{
-		auto &db = f.front();
-		auto n = db["entity_poly_seq"].erase(cif::key("num") == 2);
-
-		REQUIRE(n == 1);
-
-		REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
-	}
-
-	SECTION("Delete an entity_poly record")
-	{
-		auto &db = f.front();
-		auto n = db["entity_poly"].erase(cif::key("entity_id") == 1);
-
-		REQUIRE(n == 1);
-
-		REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
-	}
-
-	SECTION("Mutate an atom_site record")
-	{
-		auto &db = f.front();
-		auto r = db["atom_site"].find1(cif::key("id") == 9);
-		r.assign({
-			{ "label_comp_id", "ALA" },
-			{ "auth_comp_id", "ALA" }
-		});
-
-		REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
-	}
-
-	SECTION("Hetero consistency")
-	{
-		auto &db = f.front();
-		db["entity_poly_seq"].emplace({ //
-			{ "entity_id", 1 },
-			{ "num", 1 },
-			{ "mon_id", "ALA" },
-			{ "hetero", "n" }
-		});
-
-		db["pdbx_poly_seq_scheme"].emplace({ //
-			{ "asym_id", "A" },
-			{ "entity_id", "1" },
-			{ "seq_id", "1" },
-			{ "mon_id", "ALA" },
-			{ "ndb_seq_num", "1" },
-			{ "pdb_seq_num", "1" },
-			{ "auth_seq_num", "1" },
-			{ "pdb_mon_id", "ALA" },
-			{ "auth_mon_id", "ALA" },
-			{ "pdb_strand_id", "A" },
-			{ "pdb_ins_code", "." },
-			{ "hetero", "n" }
-		});
-
-		REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
-	}
-
-	SECTION("Missing hetero for record in atom_site")
-	{
-		auto &db = f.front();
-		
-		auto r1 = db["atom_site"].front();
-		cif::row_initializer cr(r1);
-		cr.set_value("id", "3");
-		cr.set_value("label_comp_id", "ALA");
-
-		db["atom_site"].emplace(std::move(cr));
-
-		REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
-	}
-
-	SECTION("Missing letter in entity_poly.pdbx_seq_one_letter_code")
-	{
-		auto &db = f.front();
-		auto &entity_poly = db["entity_poly"];
-
-		entity_poly.front().assign({
-			{ "pdbx_seq_one_letter_code", "PNSG" }
-		});
-
-		REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
-	}
-
-	SECTION("Too many letters in entity_poly.pdbx_seq_one_letter_code")
-	{
-		auto &db = f.front();
-		auto &entity_poly = db["entity_poly"];
-
-		entity_poly.front().assign({
-			{ "pdbx_seq_one_letter_code", "PNFSGX" }
-		});
-
-		REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
-	}
-
-	SECTION("Mismatch in entity_poly.pdbx_seq_one_letter_code")
-	{
-		auto &db = f.front();
-		auto &entity_poly = db["entity_poly"];
-
-		entity_poly.front().assign({
-			{ "pdbx_seq_one_letter_code", "PNASG" }
-		});
-
-		REQUIRE_FALSE(cif::pdb::is_valid_pdbx_file(f));
-	}
-
-
-}
--- a/tools/update-libcifpp-data.in
+++ b/tools/update-libcifpp-data.in
@@ -10,7 +10,7 @@ euid=${EUID:-$(id -u)}

 if [ "${euid}" -ne 0 ]; then
 	echo "Please run as root"
-	exit 1
+	exit
 fi

 if [ -f "@CIFPP_ETC_DIR@/libcifpp.conf" ]; then
@@ -19,13 +19,12 @@ fi

 # check to see if we're supposed to run at all
 if [ "$update" != "true" ]; then
-	exit 0
+	exit
 fi

 # if cache directory doesn't exist, exit.
 if ! [ -d "@CIFPP_CACHE_DIR@" ]; then
-	echo "Cache directory '@CIFPP_CACHE_DIR@' does not exist"
-	exit 1
+	exit
 fi

 # Create a temp file in the right directory and
@@ -61,16 +60,12 @@ update_dictionary() {

 # Update the dictionaries

-update_dictionary "@CIFPP_CACHE_DIR@/components.cif" "https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz"
+update_dictionary "@CIFPP_CACHE_DIR@/components.cif" "https://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz"
 update_dictionary "@CIFPP_CACHE_DIR@/mmcif_pdbx.dic" "https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic.gz"
 update_dictionary "@CIFPP_CACHE_DIR@/mmcif_ma.dic" "https://github.com/ihmwg/ModelCIF/raw/master/dist/mmcif_ma.dic"

-# notify subscribers, using find instead of run-parts to make it work on FreeBSD as well
+# notify subscribers, will fail on FreeBSD

-if [ -d "@CIFPP_ETC_DIR@/libcifpp/cache-update.d" ]; then
-	find "@CIFPP_ETC_DIR@/libcifpp/cache-update.d" \
-		-exec test -x {} \; -and -not -exec test -d {} \; \
-		-exec {} "@CIFPP_CACHE_DIR@" \;
+if [ -d "@CIFPP_ETC_DIR@/libcifpp/cache-update.d" ] && [ -x /bin/run-parts ]; then
+	run-parts --arg "@CIFPP_CACHE_DIR@" -- "@CIFPP_ETC_DIR@/libcifpp/cache-update.d"
 fi
-
-exit 0
Author	SHA1	Message	Date
Maarten L. Hekkelman	92a9a067c6	for write_data_files=off	2024-01-31 10:55:08 +01:00
Maarten L. Hekkelman	9acc343145	added option to not write data files	2024-01-31 10:37:02 +01:00