begin with curl

2026-06-05 14:34:21 +08:00 · 2025-08-13 10:10:45 +02:00
35 changed files with 4145 additions and 4570 deletions
--- a/.clang-format
+++ b/.clang-format
@@ -1,22 +0,0 @@
-BasedOnStyle: LLVM
-UseTab: AlignWithSpaces
-IndentWidth: 4
-TabWidth: 4
-BreakBeforeBraces: Allman
-ColumnLimit: 0
-NamespaceIndentation: Inner
-FixNamespaceComments: true
-AccessModifierOffset: -2
-AllowShortCaseLabelsOnASingleLine: true
-IndentCaseLabels: true
-BreakConstructorInitializers: BeforeComma
-BraceWrapping:
-  BeforeLambdaBody: false
-AlignAfterOpenBracket: DontAlign
-Cpp11BracedListStyle: false
-IncludeBlocks: Regroup
-LambdaBodyIndentation: Signature
-AllowShortLambdasOnASingleLine: Inline
-EmptyLineBeforeAccessModifier: LogicalBlock
-IndentPPDirectives: AfterHash
-PPIndentWidth: 1
--- a/.github/workflows/cmake-multi-platform.yml
+++ b/.github/workflows/cmake-multi-platform.yml
@@ -33,18 +33,13 @@ jobs:

    - name: Install dependencies Ubuntu
      if: matrix.os == 'ubuntu-latest'
-      run: sudo apt-get update && sudo apt-get install mrc catch2
+      run: sudo apt-get update && sudo apt-get install mrc

    - name: Install dependencies Window
      if: matrix.os == 'windows-latest'
      run: ./tools/depends.cmd
      shell: cmd

-    - name: Install Catch2 macOS
-      if: matrix.os == 'macos-latest'
-      run: >
-        brew install catch2
-
    - name: Configure CMake
      run: >
        cmake -B ${{ steps.strings.outputs.build-output-dir }}
--- a/.gitignore
+++ b/.gitignore
@@ -13,6 +13,3 @@ docs/api
 docs/conf.py
 build_ci/
 data/components.cif
-perf.data*
-.cache/
-
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -32,17 +32,26 @@ endif()
 # set the project name
 project(
 	libcifpp
-	VERSION 9.0.2
-	LANGUAGES CXX C)
+	VERSION 9.0.0
+	LANGUAGES CXX)

 list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

 include(FindAtomic)
+include(CheckFunctionExists)
+include(CheckIncludeFiles)
+include(CheckLibraryExists)
 include(CMakePackageConfigHelpers)
+include(CheckCXXSourceCompiles)
 include(GenerateExportHeader)
 include(CTest)
-include(ExternalProject)
 include(FetchContent)
+include(ExternalProject)
+
+# FindBoost, take care of it now.
+if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.30)
+	cmake_policy(SET CMP0167 NEW)
+endif()

 # When building with ninja-multiconfig, build both debug and release by default
 if(CMAKE_GENERATOR STREQUAL "Ninja Multi-Config")
@@ -64,6 +73,9 @@ set(BUILD_DOCUMENTATION OFF CACHE BOOL "Build the documentation")
 # Optionally build a version to be installed inside CCP4
 set(BUILD_FOR_CCP4 OFF CACHE BOOL "Build a version to be installed in CCP4")

+# Optionally use libcurl to fetch compound files
+set(USE_CURL_FOR_CCD ON CACHE BOOL "Use curl to fetch missing CCD files")
+
 # Building shared libraries?
 if(NOT(BUILD_FOR_CCP4 AND WIN32))
 	set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build a shared library instead of a static one")
@@ -140,8 +152,51 @@ endif()

 # Libraries

+# Start by finding out if std:regex is usable. Note that the current
+# implementation in GCC is not acceptable, it crashes on long lines. The
+# implementation in libc++ (clang) and MSVC seem to be OK.
+check_cxx_source_compiles(
+	"
+#include <iostream>
+#ifndef __GLIBCXX__
+#error
+#endif
+int main(int argc, char *argv[]) { return 0; }"
+	GXX_LIBSTDCPP)
+
+if(GXX_LIBSTDCPP)
+	message(
+		STATUS "cifpp: Testing for known regex bug, since you're using GNU libstdc++")
+
+	try_run(STD_REGEX_RUNNING STD_REGEX_COMPILING
+		${CMAKE_CURRENT_BINARY_DIR}/test
+		${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-rx.cpp)
+
+	if(STD_REGEX_RUNNING STREQUAL FAILED_TO_RUN)
+		message(
+			STATUS
+			"cifpp: You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will use boost::regex instead"
+		)
+
+		find_package(Boost 1.80 QUIET COMPONENTS regex)
+
+		if(NOT Boost_FOUND)
+			set(BOOST_REGEX_STANDALONE ON)
+
+			FetchContent_Declare(
+				boost-rx
+				GIT_REPOSITORY https://github.com/boostorg/regex
+				GIT_TAG boost-1.83.0)
+
+			FetchContent_MakeAvailable(boost-rx)
+		endif()
+
+		set(BOOST_REGEX ON)
+	endif()
+endif()
+
 if(MSVC)
-	# Avoid linking the shared library of zlib. Search ZLIB_ROOT first if it is
+	# Avoid linking the shared library of zlib Search ZLIB_ROOT first if it is
 	# set.
 	if(ZLIB_ROOT)
 		set(_ZLIB_SEARCH_ROOT PATHS ${ZLIB_ROOT} NO_DEFAULT_PATH)
@@ -168,37 +223,17 @@ if(MSVC)
 	endforeach()
 endif()

-# First check if <format> is available
-find_file(FMT NAME format)
-
-if(FMT EQUAL "FMT-NOTFOUND")
-	if(NOT (fmt_FOUND OR TARGET fmt))
-		find_package(fmt REQUIRED)
-		message(FATAL_ERROR "fmt not found, compiler too old, you're out of luck")
-	endif()
+if(USE_CURL_FOR_CCD)
+	find_package(CURL REQUIRED)
 endif()

-find_package(Threads)
 find_package(ZLIB QUIET)
+find_package(Threads)

 if(NOT ZLIB_FOUND)
 	message(FATAL_ERROR "cifpp: The zlib development files were not found you this system, please install them and try again (hint: on debian/ubuntu use apt-get install zlib1g-dev)")
 endif()

-include(FindPkgConfig)
-
-if(PKG_CONFIG_FOUND)
-	pkg_check_modules(PCRE2 IMPORTED_TARGET libpcre2-8)
-
-	if(PCRE2_FOUND)
-		message(STATUS "Using pcre2 found using pkg-config")
-	endif()
-endif()
-
-if(NOT PCRE2_FOUND)
-	add_subdirectory(pcre2-simple)
-endif()
-
 # Using Eigen3 is a bit of a thing. We don't want to build it completely since
 # we only need a couple of header files. Nothing special. But often, eigen3 is
 # already installed and then we prefer that.
@@ -210,16 +245,16 @@ if(Eigen3_FOUND AND TARGET Eigen3::Eigen)
 else()
 	# Use ExternalProject since FetchContent always tries to install the result...
 	ExternalProject_Add(my-eigen3
-		URL https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.zip
-		DOWNLOAD_EXTRACT_TIMESTAMP TRUE
-		CONFIGURE_COMMAND ""
-		BUILD_COMMAND ""
+		GIT_REPOSITORY https://gitlab.com/libeigen/eigen.git
+		GIT_TAG 3.4.0
 		INSTALL_COMMAND "")

 	ExternalProject_Get_Property(my-eigen3 SOURCE_DIR)
 	set(EIGEN_INCLUDE_DIR ${SOURCE_DIR})
 endif()

+message(STATUS "cifpp: Eigen include dir is ${EIGEN_INCLUDE_DIR}")
+
 # Create a revision file, containing the current git version info
 include(VersionString)
 write_version_header(${CMAKE_CURRENT_SOURCE_DIR}/src/ LIB_NAME "LibCIFPP")
@@ -322,9 +357,19 @@ target_sources(cifpp
 # The code now really requires C++20
 target_compile_features(cifpp PUBLIC cxx_std_20)

+set(CMAKE_DEBUG_POSTFIX d)
+set_target_properties(cifpp PROPERTIES DEBUG_POSTFIX "d")
+
 generate_export_header(cifpp EXPORT_FILE_NAME
 	${CMAKE_CURRENT_SOURCE_DIR}/include/cif++/exports.hpp)

+if(BOOST_REGEX)
+	target_compile_definitions(cifpp PRIVATE USE_BOOST_REGEX=1
+		BOOST_REGEX_STANDALONE=1)
+	get_target_property(BOOST_REGEX_INCLUDE_DIR Boost::regex
+		INTERFACE_INCLUDE_DIRECTORIES)
+endif()
+
 if(MSVC)
 	target_compile_definitions(cifpp PUBLIC NOMINMAX=1)
 endif()
@@ -335,20 +380,14 @@ target_include_directories(
 	cifpp
 	PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
 	"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
-	PRIVATE "${EIGEN_INCLUDE_DIR}")
+	PRIVATE "${BOOST_REGEX_INCLUDE_DIR}" "${EIGEN_INCLUDE_DIR}")

 target_link_libraries(cifpp
-	PUBLIC Threads::Threads ZLIB::ZLIB $<$<TARGET_EXISTS:std::atomic>:std::atomic>)
+	PUBLIC Threads::Threads ZLIB::ZLIB $<$<TARGET_EXISTS:std::atomic>:std::atomic>
+	$<IF:$<BOOL:${USE_CURL_FOR_CCD}>,CURL::libcurl,>)

-if(PCRE2_FOUND)
-	target_include_directories(cifpp PRIVATE ${PCRE2_INCLUDE_DIRS})
-	target_link_libraries(cifpp PRIVATE ${PCRE2_LINK_LIBRARIES})
-else()
-	target_link_libraries(cifpp PRIVATE $<BUILD_INTERFACE:pcre2s>)
-endif()
-
-if(fmt_FOUND)
-	target_link_libraries(cifpp PUBLIC fmt)
+if (USE_CURL_FOR_CCD)
+	target_compile_definitions(cifpp PUBLIC HAVE_CURL)
 endif()

 if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
@@ -487,7 +526,6 @@ if(CIFPP_DATA_DIR AND CIFPP_DOWNLOAD_CCD)
 endif()

 set(CONFIG_TEMPLATE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cifpp-config.cmake.in)
-set(REQUIRE_FMT ${fmt_FOUND})

 configure_package_config_file(
 	${CONFIG_TEMPLATE_FILE} ${CMAKE_CURRENT_BINARY_DIR}/cifpp/cifpp-config.cmake
--- a/README.md
+++ b/README.md
@@ -117,8 +117,12 @@ Other libraries you might want to install beforehand are:
  `libeigen3-dev`
 - [zlib](https://github.com/madler/zlib), the development version of this
  library. On Debian/Ubuntu this is the package `zlib1g-dev`.
- [pcre2](https://www.pcre.org/), the Perl Compatible Regular Expression
-  library. On Debian/Ubuntu this is the package `libpcre2-dev`.
+- [boost](https://www.boost.org), in Debian/Ubuntu this is `libboost-dev`.
+  
+  The Boost libraries are only needed in case you are using GCC due to a long
+  standing bug in GNU's implementation of std::regex. It simply crashes
+  on the regular expressions used in the mmcif_pdbx dictionary and so
+  we use the boost regex implementation instead.

 ### Building

--- a/8
+++ b/8
@@ -1,11 +1,3 @@
-Version 9.0.2
- Fix code that reconstructs sequences, could throw a map::at
- Many optimisations in validation and reconstruction code.
-
-Version 9.0.1
- Use pcre2 from pkg-config if available, if not
-  build a version from the original code.
-
 Version 9.0.0
 - Rename fields of cif::mm::polymer to match the naming
  in mmcif_pdbx.dic. Also, related, fix building mm::structure
--- a/cmake/FindPCRE2.cmake
+++ b/cmake/FindPCRE2.cmake
@@ -1,12 +0,0 @@
-# The problem is, find_package(PCRE2) does not work
-# and using pkg-config results in linking to a shared library
-# causing all kinds of trouble later on
-
-find_path(PCRE2_INCLUDEDIR NAMES pcre2.h HINTS "C:/Program Files (x86)/PCRE2/include" REQUIRED)
-find_library(PCRE2_LIBRARY NAMES pcre2-8-static libpcre2-8.a HINTS "C:/Program Files (x86)/PCRE2/lib" REQUIRED)
-
-add_library(pcre2-8 IMPORTED STATIC)
-target_include_directories(pcre2-8 INTERFACE ${PCRE2_INCLUDEDIR})
-target_compile_definitions(pcre2-8 INTERFACE PCRE2_STATIC)
-set_target_properties(pcre2-8 PROPERTIES IMPORTED_LOCATION ${PCRE2_LIBRARY})
-set_target_properties(pcre2-8 PROPERTIES IMPORTED_IMPLIB ${PCRE2_LIBRARY})
--- a/cmake/cifpp-config.cmake.in
+++ b/cmake/cifpp-config.cmake.in
@@ -8,8 +8,6 @@ include(CMakeFindDependencyMacro)

 find_dependency(Threads)
 find_dependency(ZLIB REQUIRED)
-if(@REQUIRE_FMT@)
-    find_dependency(fmt REQUIRED)
-endif()
+find_dependency(CURL REQUIRED)

 check_required_components(cifpp)
--- a/cmake/test-rx.cpp
+++ b/cmake/test-rx.cpp
@@ -0,0 +1,18 @@
+// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86164
+
+#include <iostream>
+#include <regex>
+
+int main()
+{
+	std::string s(100'000, '*');
+	std::smatch m;
+	std::regex r("^(.*?)$");
+
+	std::regex_search(s, m, r);
+
+	std::cout << s.substr(0, 10) << '\n';
+	std::cout << m.str(1).substr(0, 10) << '\n';
+
+	return 0;
+}
--- a/include/cif++/category.hpp
+++ b/include/cif++/category.hpp
@@ -157,7 +157,7 @@ class category
 		emplace(std::forward<row_initializer>(rows));
 	}

-	category(const category &rhs); ///< Copy constructor
+	category(const category &rhs);   ///< Copy constructor

 	category(category &&rhs) noexcept ///< Move constructor
 	{
@@ -332,16 +332,8 @@ class category
 	// --------------------------------------------------------------------
 	// A category can have a key, as defined by the validator/dictionary

-	/// @brief The type of an element of the key_type
-	struct key_element_type
-	{
-		std::string name;         ///< Name of the item
-		std::string value;        ///< Value to be found
-		bool may_be_null = false; ///< If true, value should be same or empty
-	};
-
 	/// @brief The key type
-	using key_type = std::vector<key_element_type>;
+	using key_type = row_initializer;

 	/// @brief Return a row_handle for the row specified by \a key
 	/// @param key The value for the key, items specified in the dictionary should have a value
@@ -1257,7 +1249,7 @@ class category
 		{
 		}

-		// TODO: NEED TO FIX THIS!
+// TODO: NEED TO FIX THIS!
 		category *linked;
 		const link_validator *v;
 	};
--- a/include/cif++/compound.hpp
+++ b/include/cif++/compound.hpp
@@ -180,7 +180,7 @@ class compound
 	friend class local_compound_factory_impl;

 	compound(cif::datablock &db);
-
+	
 	std::string m_id;
 	std::string m_name;
 	std::string m_type;
@@ -196,6 +196,23 @@ class compound
 // --------------------------------------------------------------------
 // Factory class for compound and Link objects

+/// @brief Options available to configure a compound factory
+struct compound_factory_options
+{
+	/// If you have a multithreaded application and want to have different
+	/// compounds in each thread (e.g. a web service processing user requests
+	/// with different sets of compounds) you can set this flag to true.
+	bool use_thread_local_instance_only = false;
+
+#if HAVE_CURL
+	// Various locations for chem_comp data files:
+	// - ftp://files.ebi.ac.uk/pub/databases/pdb/refdata/chem_comp
+	// - https://files.rcsb.org/pub/pdb/refdata/chem_comp/
+
+	std::string remote_chem_comp_url = "ftp://files.ebi.ac.uk/pub/databases/pdb/refdata/chem_comp";
+#endif
+};
+
 /// Use the compound_factory singleton instance to create compound objects

 class compound_factory
@@ -208,8 +225,12 @@ class compound_factory
 	/// with different sets of compounds) you can set the \a useThreadLocalInstanceOnly
 	/// flag to true.

+	[[deprecated("Use version with compound_factory_options instead")]]
 	static void init(bool useThreadLocalInstanceOnly);

+	/// \brief Initialise a singleton instance.
+	static void init(compound_factory_options options = {});
+
 	/// Return the singleton instance. If initialized with local threads, this is the
 	/// instance for the current thread.
 	static compound_factory &instance();
@@ -239,6 +260,8 @@ class compound_factory
 	void push_dictionary(const file &file);

 	/// Remove the last pushed dictionary
+
+	// TODO: check if the popped dict is the correct one
 	void pop_dictionary();

 	/// Return whether @a res_name is a valid and known peptide
@@ -290,13 +313,6 @@ class compound_factory

 	void report_missing_compound(std::string_view compound_id);

-	bool get_report_missing() const { return m_report_missing; }
-
-	void set_report_missing(bool report)
-	{
-		m_report_missing = report;
-	}
-
  private:
 	compound_factory();

@@ -305,10 +321,9 @@ class compound_factory

 	static std::unique_ptr<compound_factory> s_instance;
 	static thread_local std::unique_ptr<compound_factory> tl_instance;
-	static bool s_use_thread_local_instance;
+	static compound_factory_options s_options;

 	std::shared_ptr<compound_factory_impl> m_impl;
-	bool m_report_missing = true;
 };

 // --------------------------------------------------------------------
@@ -328,6 +343,9 @@ class compound_factory
 * @endcode
 */

+
+// TODO: check if pushed and popped dicts are the same!
+
 class compound_source
 {
  public:
--- a/include/cif++/condition.hpp
+++ b/include/cif++/condition.hpp
@@ -27,7 +27,6 @@
 #pragma once

 #include "cif++/row.hpp"
-#include "cif++/format.hpp"

 #include <cassert>
 #include <concepts>
@@ -50,49 +49,49 @@
 * @code {.cpp}
 * cif::condition c = cif::key("id") == 1;
 * @endcode
- *
+ * 
 * That will find rows where the ID item contains the number 1. If
 * using cif::key is a bit too much typing, you can also write:
- *
+ * 
 * @code{.cpp}
 * using namespace cif::literals;
- *
+ * 
 * cif::condition c2 = "id"_key == 1;
 * @endcode
- *
+ * 
 * Now if you want both ID = 1 and ID = 2 in the result:
- *
+ * 
 * @code{.cpp}
 * auto c3 = "id"_key == 1 or "id"_key == 2;
 * @endcode
- *
+ * 
 * There are some special values you can use. To find rows with item that
 * do not have a value:
- *
+ * 
 * @code{.cpp}
 * auto c4 = "type"_key == cif::null;
- * @endcode
- *
+ * @endcode 
+ * 
 * Of if it should not be NULL:
- *
+ * 
 * @code{.cpp}
 * auto c5 = "type"_key != cif::null;
- * @endcode
- *
+ * @endcode 
+ * 
 * There's even a way to find all records:
- *
+ * 
 * @code{.cpp}
 * auto c6 = cif::all;
 * @endcode
- *
+ * 
 * And when you want to search for any item containing the value 'foo':
- *
+ * 
 * @code{.cpp}
 * auto c7 = cif::any == "foo";
- * @endcode
- *
+ * @endcode 
+ * 
 * All these conditions can be chained together again:
- *
+ * 
 * @code{.cpp}
 * auto c8 = std::move(c3) and std::move(c5);
 * @endcode
@@ -107,7 +106,7 @@ namespace cif

 /**
 * @brief Get the items that can be used as key in conditions for a category
- *
+ * 
 * @param cat The category whose items to return
 * @return iset The set of key item names
 */
@@ -116,7 +115,7 @@ iset get_category_fields(const category &cat);

 /**
 * @brief Get the items that can be used as key in conditions for a category
- *
+ * 
 * @param cat The category whose items to return
 * @return iset The set of key field names
 */
@@ -124,7 +123,7 @@ iset get_category_items(const category &cat);

 /**
 * @brief Get the item index for item @a col in category @a cat
- *
+ * 
 * @param cat The category
 * @param col The name of the item
 * @return uint16_t The index
@@ -133,7 +132,7 @@ uint16_t get_item_ix(const category &cat, std::string_view col);

 /**
 * @brief Return whether the item @a col in category @a cat has a primitive type of *uchar*
- *
+ * 
 * @param cat The category
 * @param col The item name
 * @return true If the primitive type is of type *uchar*
@@ -176,13 +175,14 @@ namespace detail
 class condition
 {
  public:
+
 	/** @cond */
 	using condition_impl = detail::condition_impl;
 	/** @endcond */

 	/**
 	 * @brief Construct a new, empty condition object
-	 *
+	 * 
 	 */
 	condition()
 		: m_impl(nullptr)
@@ -191,7 +191,7 @@ class condition

 	/**
 	 * @brief Construct a new condition object with implementation @a impl
-	 *
+	 * 
 	 * @param impl The implementation to use
 	 */
 	explicit condition(condition_impl *impl)
@@ -230,15 +230,15 @@ class condition
 	/**
 	 * @brief Prepare the condition to be used on category @a c. This will
 	 * take care of setting the correct indices for items e.g.
-	 *
+	 * 
 	 * @param c The category this query should act upon
 	 */
 	void prepare(const category &c);

 	/**
-	 * @brief This operator returns true if the row referenced by @a r is
+	 * @brief This operator returns true if the row referenced by @a r is 
 	 * a match for this condition.
-	 *
+	 * 
 	 * @param r The reference to a row.
 	 * @return true If there is a match
 	 * @return false If there is no match
@@ -263,7 +263,7 @@ class condition
 	/**
 	 * @brief If the prepare step found out there is only one hit
 	 * this single hit can be returned by this method.
-	 *
+	 * 
 	 * @return std::optional<row_handle> The result will contain
 	 * a row reference if there is a single hit, it will be empty otherwise
 	 */
@@ -292,7 +292,7 @@ class condition

 	/**
 	 * @brief Operator to use to write out a condition to @a os, for debugging purposes
-	 *
+	 * 
 	 * @param os The std::ostream to write to
 	 * @param cond The condition to write
 	 * @return std::ostream& The same as @a os
@@ -752,9 +752,28 @@ namespace detail
 				delete sub;
 		}

-		condition_impl *prepare(const category &c) override;
+		condition_impl *prepare(const category &c) override
+		{
+			for (auto &sub : m_sub)
+				sub = sub->prepare(c);
+			return this;
+		}

-		bool test(row_handle r) const override;
+		bool test(row_handle r) const override
+		{
+			bool result = true;
+
+			for (auto sub : m_sub)
+			{
+				if (sub->test(r))
+					continue;
+
+				result = false;
+				break;
+			}
+
+			return result;
+		}

 		void str(std::ostream &os) const override
 		{
@@ -801,7 +820,6 @@ namespace detail
 		static condition_impl *combine_equal(std::vector<and_condition_impl *> &subs, or_condition_impl *oc);

 		std::vector<condition_impl *> m_sub;
-		std::optional<row_handle> m_single; // Potential result of index lookup
 	};

 	struct or_condition_impl : public condition_impl
@@ -959,9 +977,9 @@ inline condition operator or(condition &&a, condition &&b)
 			if (ci->m_item_name == ce->m_item_name)
 				return condition(new detail::key_equals_or_empty_condition_impl(ci));
 		}
-
+		
 		if (typeid(*b.m_impl) == typeid(detail::key_equals_condition_impl) and
-			typeid(*a.m_impl) == typeid(detail::key_is_empty_condition_impl))
+				 typeid(*a.m_impl) == typeid(detail::key_is_empty_condition_impl))
 		{
 			auto ci = static_cast<detail::key_equals_condition_impl *>(b.m_impl);
 			auto ce = static_cast<detail::key_is_empty_condition_impl *>(a.m_impl);
@@ -979,9 +997,9 @@ inline condition operator or(condition &&a, condition &&b)
 			if (ci->m_item_name == ce->m_item_name)
 				return condition(new detail::key_equals_number_or_empty_condition_impl(ci));
 		}
-
+		
 		if (typeid(*b.m_impl) == typeid(detail::key_equals_number_condition_impl) and
-			typeid(*a.m_impl) == typeid(detail::key_is_empty_condition_impl))
+				 typeid(*a.m_impl) == typeid(detail::key_is_empty_condition_impl))
 		{
 			auto ci = static_cast<detail::key_equals_number_condition_impl *>(b.m_impl);
 			auto ce = static_cast<detail::key_is_empty_condition_impl *>(a.m_impl);
@@ -1001,7 +1019,7 @@ inline condition operator or(condition &&a, condition &&b)

 /**
 * @brief A helper class to make it possible to search for empty items (NULL)
- *
+ * 
 * @code{.cpp}
 * "id"_key == cif::empty_type();
 * @endcode
@@ -1013,7 +1031,7 @@ struct empty_type

 /**
 * @brief A helper to make it possible to have conditions like
- *
+ * 
 * @code{.cpp}
 * "id"_key == cif::null;
 * @endcode
@@ -1023,14 +1041,14 @@ inline constexpr empty_type null = empty_type();

 /**
 * @brief Class to use in creating conditions, creates a reference to a item or item
- *
+ * 
 */
 struct key
 {
 	/**
 	 * @brief Construct a new key object using @a item_name as name
-	 *
-	 * @param item_name
+	 * 
+	 * @param item_name 
 	 */
 	explicit key(const std::string &item_name)
 		: m_item_name(item_name)
@@ -1039,8 +1057,8 @@ struct key

 	/**
 	 * @brief Construct a new key object using @a item_name as name
-	 *
-	 * @param item_name
+	 * 
+	 * @param item_name 
 	 */
 	explicit key(const char *item_name)
 		: m_item_name(item_name)
@@ -1049,8 +1067,8 @@ struct key

 	/**
 	 * @brief Construct a new key object using @a item_name as name
-	 *
-	 * @param item_name
+	 * 
+	 * @param item_name 
 	 */
 	explicit key(std::string_view item_name)
 		: m_item_name(item_name)
@@ -1072,8 +1090,7 @@ concept Numeric = ((std::is_floating_point_v<T> or std::is_integral_v<T>) and no
 template <Numeric T>
 condition operator==(const key &key, const T &v)
 {
-	// TODO: change key_equals_etc... to use std::variant<double,int64_t> or something
-	return condition(new detail::key_equals_number_condition_impl(key.m_item_name, static_cast<double>(v)));
+	return condition(new detail::key_equals_number_condition_impl(key.m_item_name, v));
 }

 /**
@@ -1120,10 +1137,13 @@ inline condition operator!=(const key &key, std::string_view value)
 template <Numeric T>
 condition operator>(const key &key, const T &v)
 {
+	std::ostringstream s;
+	s << " > " << v;
+
 	return condition(new detail::key_compare_condition_impl(
 		key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
 		{ return r[item_name].compare(v) > 0; },
-		cif::format(" > {}", v)));
+		s.str()));
 }

 /**
@@ -1132,10 +1152,13 @@ condition operator>(const key &key, const T &v)
 template <Numeric T>
 condition operator>=(const key &key, const T &v)
 {
+	std::ostringstream s;
+	s << " >= " << v;
+
 	return condition(new detail::key_compare_condition_impl(
 		key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
 		{ return r[item_name].compare(v) >= 0; },
-		cif::format(" >= {}", v)));
+		s.str()));
 }

 /**
@@ -1144,10 +1167,13 @@ condition operator>=(const key &key, const T &v)
 template <Numeric T>
 condition operator<(const key &key, const T &v)
 {
+	std::ostringstream s;
+	s << " < " << v;
+
 	return condition(new detail::key_compare_condition_impl(
 		key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
 		{ return r[item_name].compare(v) < 0; },
-		cif::format(" < {}", v)));
+		s.str()));
 }

 /**
@@ -1156,10 +1182,13 @@ condition operator<(const key &key, const T &v)
 template <Numeric T>
 condition operator<=(const key &key, const T &v)
 {
+	std::ostringstream s;
+	s << " <= " << v;
+
 	return condition(new detail::key_compare_condition_impl(
 		key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
 		{ return r[item_name].compare(v) <= 0; },
-		cif::format(" <= {}", v)));
+		s.str()));
 }

 /**
@@ -1167,10 +1196,13 @@ condition operator<=(const key &key, const T &v)
 */
 inline condition operator>(const key &key, std::string_view v)
 {
+	std::ostringstream s;
+	s << " > " << v;
+
 	return condition(new detail::key_compare_condition_impl(
 		key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
 		{ return r[item_name].compare(v, icase) > 0; },
-		cif::format(" > {}", v)));
+		s.str()));
 }

 /**
@@ -1178,10 +1210,13 @@ inline condition operator>(const key &key, std::string_view v)
 */
 inline condition operator>=(const key &key, std::string_view v)
 {
+	std::ostringstream s;
+	s << " >= " << v;
+
 	return condition(new detail::key_compare_condition_impl(
 		key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
 		{ return r[item_name].compare(v, icase) >= 0; },
-		cif::format(" >= {}", v)));
+		s.str()));
 }

 /**
@@ -1189,10 +1224,13 @@ inline condition operator>=(const key &key, std::string_view v)
 */
 inline condition operator<(const key &key, std::string_view v)
 {
+	std::ostringstream s;
+	s << " < " << v;
+
 	return condition(new detail::key_compare_condition_impl(
 		key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
 		{ return r[item_name].compare(v, icase) < 0; },
-		cif::format(" < {}", v)));
+		s.str()));
 }

 /**
@@ -1200,10 +1238,13 @@ inline condition operator<(const key &key, std::string_view v)
 */
 inline condition operator<=(const key &key, std::string_view v)
 {
+	std::ostringstream s;
+	s << " <= " << v;
+
 	return condition(new detail::key_compare_condition_impl(
 		key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
 		{ return r[item_name].compare(v, icase) <= 0; },
-		cif::format(" <= {}", v)));
+		s.str()));
 }

 /**
@@ -1304,7 +1345,7 @@ namespace literals
 {
 	/**
 	 * @brief Return a cif::key for the item name @a text
-	 *
+	 * 
 	 * @param text The name of the item
 	 * @param length The length of @a text
 	 * @return key The cif::key created
--- a/include/cif++/format.hpp
+++ b/include/cif++/format.hpp
@@ -26,28 +26,138 @@

 #pragma once

-#if __has_include(<format>)
-#include <format>
-#define USE_STD_FORMAT 1
-#else
-#include <fmt/format.h>
-#endif
-
 #include <string>

 /**  \file format.hpp
 * 
- * Now using cif::format instead of a home grown rip off
+ * File containing a basic reimplementation of boost::format
+ * but then a bit more simplistic. Still this allowed me to move my code
+ * from using boost::format to something without external dependency easily.
 */

 namespace cif
 {

-#if USE_STD_FORMAT
-using std::format;
-#else
-using fmt::format;
-#endif
+namespace detail
+{
+	template <typename T>
+	struct to_varg
+	{
+		using type = T;
+
+		to_varg(const T &v)
+			: m_value(v)
+		{
+		}
+
+		type operator*() { return m_value; }
+
+		T m_value;
+	};
+
+	template <>
+	struct to_varg<const char *>
+	{
+		using type = const char *;
+
+		to_varg(const char *v)
+			: m_value(v)
+		{
+		}
+
+		type operator*() { return m_value.c_str(); }
+
+		std::string m_value;
+	};
+
+	template <>
+	struct to_varg<std::string>
+	{
+		using type = const char *;
+
+		to_varg(const std::string &v)
+			: m_value(v)
+		{
+		}
+
+		type operator*() { return m_value.c_str(); }
+
+		std::string m_value;
+	};
+
+} // namespace
+
+/** @cond */
+
+template <typename... Args>
+class format_plus_arg
+{
+  public:
+	using args_vector_type = std::tuple<detail::to_varg<Args>...>;
+	using vargs_vector_type = std::tuple<typename detail::to_varg<Args>::type...>;
+
+	format_plus_arg(const format_plus_arg &) = delete;
+	format_plus_arg &operator=(const format_plus_arg &) = delete;
+
+
+	format_plus_arg(std::string_view fmt, Args... args)
+		: m_fmt(fmt)
+		, m_args(std::forward<Args>(args)...)
+	{
+		auto ix = std::make_index_sequence<sizeof...(Args)>();
+		copy_vargs(ix);
+	}
+
+	std::string str()
+	{
+		char buffer[1024];
+		std::string::size_type r = std::apply(snprintf, std::tuple_cat(std::make_tuple(buffer, sizeof(buffer), m_fmt.c_str()), m_vargs));
+		return { buffer, r };
+	}
+
+	friend std::ostream &operator<<(std::ostream &os, const format_plus_arg &f)
+	{
+		char buffer[1024];
+		std::string::size_type r = std::apply(snprintf, std::tuple_cat(std::make_tuple(buffer, sizeof(buffer), f.m_fmt.c_str()), f.m_vargs));
+		os.write(buffer, r);
+		return os;
+	}
+
+  private:
+
+	template <std::size_t... I>
+	void copy_vargs(std::index_sequence<I...>)
+	{
+		((std::get<I>(m_vargs) = *std::get<I>(m_args)), ...);
+	}
+
+	std::string m_fmt;
+	args_vector_type m_args;
+	vargs_vector_type m_vargs;
+};
+
+/** @endcond */
+
+/**
+ * @brief A simplistic reimplementation of boost::format, in fact it is
+ * actually a way to call the C function snprintf to format the arguments
+ * in @a args into the format string @a fmt
+ * 
+ * The string in @a fmt should thus be a C style format string.
+ * 
+ * TODO: Move to C++23 style of printing.
+ * 
+ * @tparam Args The types of the arguments
+ * @param fmt The format string
+ * @param args The arguments
+ * @return An object that can be written out to a std::ostream using operator<<
+ */
+
+template <typename... Args>
+constexpr auto format(std::string_view fmt, Args... args)
+{
+	return format_plus_arg(fmt, std::forward<Args>(args)...);
+}

 // --------------------------------------------------------------------
 /// A streambuf that fills out lines with spaces up until a specified width
--- a/include/cif++/gzio.hpp
+++ b/include/cif++/gzio.hpp
@@ -1,33 +1,7 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause
- * 
- * Copyright (c) 2025 NKI/AVL, Netherlands Cancer Institute
- * 
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- * 
- * 1. Redistributions of source code must retain the above copyright notice, this
- *    list of conditions and the following disclaimer
- * 2. Redistributions in binary form must reproduce the above copyright notice,
- *    this list of conditions and the following disclaimer in the documentation
- *    and/or other materials provided with the distribution.
- * 
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
- * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
- * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
- * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
- * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-/*
-	Original code comes from libgxrio at https://github.com/mhekkel/gxrio
-	This is a stripped down version.
-*/
+//          Copyright Maarten L. Hekkelman, 2022
+// Distributed under the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE_1_0.txt or copy at
+//          http://www.boost.org/LICENSE_1_0.txt)

 #pragma once

--- a/include/cif++/item.hpp
+++ b/include/cif++/item.hpp
@@ -250,8 +250,6 @@ class item
 			return value();
 	}

-	auto operator<=>(const item &rhs) const = default;
-
  private:
 	std::string_view m_name;
 	std::string m_value;
--- a/include/cif++/utilities.hpp
+++ b/include/cif++/utilities.hpp
@@ -53,7 +53,6 @@
 #pragma warning(disable : 4068) // unknown pragma
 #pragma warning(disable : 4100) // unreferenced formal parameter
 #pragma warning(disable : 4101) // unreferenced local variable
-#pragma warning(disable : 4702) // unreachable code (too bad, this one. Happens in for loops)
 #define _SILENCE_CXX17_CODECVT_HEADER_DEPRECATION_WARNING 1
 #endif

--- a/pcre2-simple/CMakeLists.txt
+++ b/pcre2-simple/CMakeLists.txt
@@ -1,316 +0,0 @@
-# SPDX-License-Identifier: BSD-2-Clause
-# 
-# Copyright (c) 2025 Maarten L. Hekkelman
-# 
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are met:
-# 
-# 1. Redistributions of source code must retain the above copyright notice, this
-#    list of conditions and the following disclaimer
-# 2. Redistributions in binary form must reproduce the above copyright notice,
-#    this list of conditions and the following disclaimer in the documentation
-#    and/or other materials provided with the distribution.
-# 
-# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
-# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
-# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
-# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
-# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-# A simplified wrapper CMakeLists.txt file for PCRE2
-#
-# This will generate an OBJECT library so it can be linked into another library
-
-cmake_minimum_required(VERSION 3.25)
-
-include(FetchContent)
-
-project(pcre2s VERSION 1.0.0 LANGUAGES C CXX)
-
-# The original code:
-
-file(DOWNLOAD https://github.com/PCRE2Project/pcre2/releases/download/pcre2-10.46/pcre2-10.46.tar.gz
-    ${CMAKE_CURRENT_BINARY_DIR}/pcre2-code.tgz
-    EXPECTED_HASH SHA256=8d28d7f2c3b970c3a4bf3776bcbb5adfc923183ce74bc8df1ebaad8c1985bd07)
-file(ARCHIVE_EXTRACT INPUT ${CMAKE_CURRENT_BINARY_DIR}/pcre2-code.tgz
-    DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
-set(PCRE2_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/pcre2-10.46)
-set(PCRE2_MAJOR 10)
-set(PCRE2_MINOR 46)
-set(PCRE2_VERSION "${PCRE2_MAJOR}.${PCRE2_MINOR}")
-set(PCRE2_DATE "2024-06-09")
-
-# Some needed configuration options
-
-# option(PCRE2_BUILD_PCRE2_8 "Build 8 bit PCRE2 library" ON)
-# option(PCRE2_BUILD_PCRE2_16 "Build 16 bit PCRE2 library" OFF)
-# option(PCRE2_BUILD_PCRE2_32 "Build 32 bit PCRE2 library" OFF)
-
-option(PCRE2_STATIC_PIC "Build the static library with the option position independent code enabled." OFF)
-
-set(PCRE2_NEWLINE "LF" CACHE STRING "What to recognize as a newline (one of CR, LF, CRLF, ANY, ANYCRLF, NUL)." FORCE)
-set_property(CACHE PCRE2_NEWLINE PROPERTY STRINGS "CR" "LF" "CRLF" "ANY" "ANYCRLF" "NUL")
-
-set(PCRE2_LINK_SIZE "2" CACHE STRING "Internal link size (2, 3 or 4 allowed). See LINK_SIZE in config.h.in for details.")
-set_property(CACHE PCRE2_LINK_SIZE PROPERTY STRINGS "2" "3" "4")
-
-set(PCRE2_PARENS_NEST_LIMIT "250" CACHE STRING "Default nested parentheses limit. See PARENS_NEST_LIMIT in config.h.in for details.")
-set(PCRE2_HEAP_LIMIT "20000000" CACHE STRING "Default limit on heap memory (kibibytes). See HEAP_LIMIT in config.h.in for details.")
-set(PCRE2_MAX_VARLOOKBEHIND "255" CACHE STRING "Default limit on variable lookbehinds.")
-set(PCRE2_MATCH_LIMIT "10000000" CACHE STRING "Default limit on internal looping. See MATCH_LIMIT in config.h.in for details.")
-set(PCRE2_MATCH_LIMIT_DEPTH "MATCH_LIMIT" CACHE STRING "Default limit on internal depth of search. See MATCH_LIMIT_DEPTH in config.h.in for details.")
-set(PCRE2GREP_BUFSIZE "20480" CACHE STRING "Buffer starting size parameter for pcre2grep. See PCRE2GREP_BUFSIZE in config.h.in for details.")
-set(PCRE2GREP_MAX_BUFSIZE "1048576" CACHE STRING "Buffer maximum size parameter for pcre2grep. See PCRE2GREP_MAX_BUFSIZE in config.h.in for details.")
-set(PCRE2_SUPPORT_JIT OFF CACHE BOOL "Enable support for Just-in-time compiling.")
-
-if(${CMAKE_SYSTEM_NAME} MATCHES Linux|NetBSD)
-    set(PCRE2_SUPPORT_JIT_SEALLOC OFF CACHE BOOL "Enable SELinux compatible execmem allocator in JIT (experimental).")
-else()
-    set(PCRE2_SUPPORT_JIT_SEALLOC IGNORE)
-endif()
-
-set(PCRE2GREP_SUPPORT_JIT ON CACHE BOOL "Enable use of Just-in-time compiling in pcre2grep.")
-set(PCRE2GREP_SUPPORT_CALLOUT ON CACHE BOOL "Enable callout string support in pcre2grep.")
-set(PCRE2GREP_SUPPORT_CALLOUT_FORK ON CACHE BOOL "Enable callout string fork support in pcre2grep.")
-set(PCRE2_SUPPORT_UNICODE ON CACHE BOOL "Enable support for Unicode and UTF-8/UTF-16/UTF-32 encoding.")
-set(PCRE2_SUPPORT_BSR_ANYCRLF OFF CACHE BOOL "ON=Backslash-R matches only LF CR and CRLF, OFF=Backslash-R matches all Unicode Linebreaks")
-set(PCRE2_NEVER_BACKSLASH_C OFF CACHE BOOL "If ON, backslash-C (upper case C) is locked out.")
-set(PCRE2_SUPPORT_VALGRIND OFF CACHE BOOL "Enable Valgrind support.")
-
-if(MINGW)
-    option(NON_STANDARD_LIB_PREFIX "ON=Shared libraries built in mingw will be named pcre2.dll, etc., instead of libpcre2.dll, etc." OFF)
-    option(NON_STANDARD_LIB_SUFFIX "ON=Shared libraries built in mingw will be named libpcre2-0.dll, etc., instead of libpcre2.dll, etc." OFF)
-endif()
-
-# 
-
-set(NEWLINE_DEFAULT "")
-
-if(PCRE2_NEWLINE STREQUAL "CR")
-    set(NEWLINE_DEFAULT "1")
-elseif(PCRE2_NEWLINE STREQUAL "LF")
-    set(NEWLINE_DEFAULT "2")
-elseif(PCRE2_NEWLINE STREQUAL "CRLF")
-    set(NEWLINE_DEFAULT "3")
-elseif(PCRE2_NEWLINE STREQUAL "ANY")
-    set(NEWLINE_DEFAULT "4")
-elseif(PCRE2_NEWLINE STREQUAL "ANYCRLF")
-    set(NEWLINE_DEFAULT "5")
-elseif(PCRE2_NEWLINE STREQUAL "NUL")
-    set(NEWLINE_DEFAULT "6")
-else()
-    message(FATAL_ERROR "The PCRE2_NEWLINE variable must be set to one of the following values: \"LF\", \"CR\", \"CRLF\", \"ANY\", \"ANYCRLF\".")
-endif()
-
-# Some tests
-
-include(CheckCSourceCompiles)
-include(CheckFunctionExists)
-include(CheckSymbolExists)
-include(CheckIncludeFile)
-
-check_include_file(assert.h HAVE_ASSERT_H)
-check_include_file(dirent.h HAVE_DIRENT_H)
-check_include_file(sys/stat.h HAVE_SYS_STAT_H)
-check_include_file(sys/types.h HAVE_SYS_TYPES_H)
-check_include_file(unistd.h HAVE_UNISTD_H)
-check_include_file(windows.h HAVE_WINDOWS_H)
-
-check_symbol_exists(bcopy "strings.h" HAVE_BCOPY)
-check_symbol_exists(memfd_create "sys/mman.h" HAVE_MEMFD_CREATE)
-check_symbol_exists(memmove "string.h" HAVE_MEMMOVE)
-check_symbol_exists(secure_getenv "stdlib.h" HAVE_SECURE_GETENV)
-check_symbol_exists(strerror "string.h" HAVE_STRERROR)
-
-check_c_source_compiles(
-  "int main(void) { char buf[128] __attribute__((uninitialized)); (void)buf; return 0; }"
-  HAVE_ATTRIBUTE_UNINITIALIZED
-)
-
-check_c_source_compiles(
-  [=[
-  extern __attribute__ ((visibility ("default"))) int f(void);
-  int main(void) { return f(); }
-  int f(void) { return 42; }
-  ]=]
-  HAVE_VISIBILITY
-)
-
-if(HAVE_VISIBILITY)
-  set(PCRE2_EXPORT [=[__attribute__ ((visibility ("default")))]=])
-else()
-  set(PCRE2_EXPORT)
-endif()
-
-check_c_source_compiles("int main(void) { __assume(1); return 0; }" HAVE_BUILTIN_ASSUME)
-
-check_c_source_compiles(
-  [=[
-  #include <stddef.h>
-  int main(void) { int a,b; size_t m; __builtin_mul_overflow(a,b,&m); return 0; }
-  ]=]
-  HAVE_BUILTIN_MUL_OVERFLOW
-)
-
-check_c_source_compiles(
-  "int main(int c, char *v[]) { if (c) __builtin_unreachable(); return (int)(*v[0]); }"
-  HAVE_BUILTIN_UNREACHABLE
-)
-
-# # Check whether Intel CET is enabled, and if so, adjust compiler flags. This
-# # code was written by PH, trying to imitate the logic from the autotools
-# # configuration.
-
-# check_c_source_compiles(
-#   [=[
-#   #ifndef __CET__
-#   #error CET is not enabled
-#   #endif
-#   int main() { return 0; }
-#   ]=]
-#   INTEL_CET_ENABLED
-# )
-
-# if(INTEL_CET_ENABLED)
-#   set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mshstk")
-# endif()
-
-# Set up some dependencies first
-
-configure_file(
-    ${PCRE2_SOURCE_DIR}/src/pcre2_chartables.c.dist
-    ${CMAKE_CURRENT_BINARY_DIR}/pcre2_chartables.c
-    COPYONLY
-)
-
-configure_file(
-    ${PCRE2_SOURCE_DIR}/config-cmake.h.in
-    ${CMAKE_CURRENT_BINARY_DIR}/interface/config.h
-    @ONLY
-)
-
-configure_file(
-    ${PCRE2_SOURCE_DIR}/src/pcre2.h.in
-    ${CMAKE_CURRENT_BINARY_DIR}/interface/pcre2.h
-    @ONLY
-)
-
-# Define our library
-
-list(APPEND PCRE2_HEADERS
-    ${CMAKE_CURRENT_BINARY_DIR}/interface/pcre2.h)
-
-list(APPEND PCRE2_SOURCES
-    ${PCRE2_SOURCE_DIR}/src/pcre2_auto_possess.c
-    ${CMAKE_CURRENT_BINARY_DIR}/pcre2_chartables.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_chkdint.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_compile.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_compile_class.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_config.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_context.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_convert.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_dfa_match.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_error.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_extuni.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_find_bracket.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_jit_compile.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_maketables.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_match.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_match_data.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_newline.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_ord2utf.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_pattern_info.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_script_run.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_serialize.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_string_utils.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_study.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_substitute.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_substring.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_tables.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_ucd.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_valid_utf.c
-    ${PCRE2_SOURCE_DIR}/src/pcre2_xclass.c
-)
-
-add_library(pcre2s OBJECT)
-
-target_sources(pcre2s
-    PRIVATE ${PCRE2_SOURCES}
-    PUBLIC
-    FILE_SET pcre2_headers TYPE HEADERS
-    BASE_DIRS ${PCRE2_SOURCE_DIR}/include ${CMAKE_CURRENT_BINARY_DIR}/interface
-    FILES ${PCRE2_HEADERS}
-)
-
-target_compile_definitions(pcre2s PUBLIC PCRE2_CODE_UNIT_WIDTH=8 HAVE_CONFIG_H)
-if(NOT BUILD_SHARED_LIBS)
-    target_compile_definitions(pcre2s PUBLIC PCRE2_STATIC)
-endif()
-
-target_include_directories(pcre2s PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/interface ${PCRE2_SOURCE_DIR}/src)
-
-if(PCRE2_STATIC_PIC)
-    set_target_properties(pcre2s PROPERTIES POSITION_INDEPENDENT_CODE 1)
-endif()
-
-# # Installation and config files
-
-# include(CMakePackageConfigHelpers)
-# include(GenerateExportHeader)
-
-# # Install rules
-# install(TARGETS pcre2s
-#     EXPORT pcre2s
-#     FILE_SET pcre2_headers DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
-
-# if(MSVC AND BUILD_SHARED_LIBS)
-#     install(
-#         FILES $<TARGET_PDB_FILE:pcre2s>
-#         DESTINATION ${CMAKE_INSTALL_LIBDIR}
-#         OPTIONAL)
-# endif()
-
-# install(EXPORT pcre2s
-#     NAMESPACE pcre2s::
-#     FILE "pcre2s-targets.cmake"
-#     DESTINATION lib/cmake/pcre2s)
-
-# configure_package_config_file(
-#     ${CMAKE_CURRENT_SOURCE_DIR}/pcre2s-config.cmake.in ${CMAKE_CURRENT_BINARY_DIR}/pcre2s/pcre2s-config.cmake
-#     INSTALL_DESTINATION lib/cmake/pcre2s)
-
-# install(
-#     FILES "${CMAKE_CURRENT_BINARY_DIR}/pcre2s/pcre2s-config.cmake"
-#     "${CMAKE_CURRENT_BINARY_DIR}/pcre2s/pcre2s-config-version.cmake"
-#     DESTINATION lib/cmake/pcre2s)
-
-# set_target_properties(
-#     pcre2s
-#     PROPERTIES VERSION ${PCRE2_VERSION}
-#     SOVERSION ${PCRE2_VERSION}
-#     INTERFACE_pcre2s_MAJOR_VERSION ${PCRE2_MAJOR})
-
-# set_property(
-#     TARGET pcre2s
-#     APPEND
-#     PROPERTY COMPATIBLE_INTERFACE_STRING pcre2s_MAJOR_VERSION)
-
-# write_basic_package_version_file(
-#     "${CMAKE_CURRENT_BINARY_DIR}/pcre2s/pcre2s-config-version.cmake"
-#     VERSION "${PCRE2_VERSION}"
-#     COMPATIBILITY AnyNewerVersion)
-
-# # Testing
-
-# if(PROJECT_IS_TOP_LEVEL)
-#     include(CTest)
-
-#     if(BUILD_TESTING)
-#         add_subdirectory(test)
-#     endif()
-# endif()
--- a/src/category.cpp
+++ b/src/category.cpp
@@ -92,7 +92,7 @@ class row_comparator
 		return d;
 	}

-	int operator()(const category &cat, const category::key_type &a, const row *b) const
+	int operator()(const category &cat, const row_initializer &a, const row *b) const
 	{
 		assert(b);

@@ -105,11 +105,10 @@ class row_comparator
 		{
 			assert(ai != a.end());

-			std::string_view ka = ai->value;
+			std::string_view ka = ai->value();
 			std::string_view kb = rhb[k].text();

-			if (not (ai->may_be_null and rhb[k].empty()))
-				d = f(ka, kb);
+			d = f(ka, kb);

 			if (d != 0)
 				break;
@@ -143,7 +142,7 @@ class category_index
 	}

 	row *find(const category &cat, row *k) const;
-	row *find_by_value(const category &cat, const category::key_type &k) const;
+	row *find_by_value(const category &cat, row_initializer k) const;

 	void insert(category &cat, row *r);
 	void erase(category &cat, row *r);
@@ -353,19 +352,19 @@ row *category_index::find(const category &cat, row *k) const
 	return r ? r->m_row : nullptr;
 }

-row *category_index::find_by_value(const category &cat, const category::key_type &k) const
+row *category_index::find_by_value(const category &cat, row_initializer k) const
 {
 	// sort the values in k first

-	category::key_type k2;
+	row_initializer k2;
 	for (auto &f : cat.key_item_indices())
 	{
 		auto fld = cat.get_item_name(f);

 		auto ki = find_if(k.begin(), k.end(), [&fld](auto &i)
-			{ return i.name == fld; });
+			{ return i.name() == fld; });
 		if (ki == k.end())
-			k2.emplace_back(std::string{ fld }, "");
+			k2.emplace_back(fld, "");
 		else
 			k2.emplace_back(*ki);
 	}
@@ -1339,7 +1338,8 @@ std::string category::get_unique_value(std::string_view item_name)
 		// brain-dead implementation
 		for (std::size_t ix = 0; ix < size(); ++ix)
 		{
-			result = cif_id_for_number(static_cast<int>(ix));
+			// result = m_name + "-" + std::to_string(ix);
+			result = cif_id_for_number(ix);
 			if (not contains(key(item_name) == result))
 				break;
 		}
--- a/src/compound.cpp
+++ b/src/compound.cpp
@@ -26,6 +26,10 @@

 #include "cif++.hpp"

+#if HAVE_CURL
+# include <curl/curl.h>
+#endif
+
 #include <filesystem>
 #include <fstream>
 #include <map>
@@ -140,7 +144,7 @@ compound::compound(cif::datablock &db)

 	cif::tie(m_id, m_name, m_type, m_formula, m_formula_weight, m_formal_charge, one_letter_code, m_parent_id) =
 		chemComp.front().get("id", "name", "type", "formula", "formula_weight", "pdbx_formal_charge", "one_letter_code", "mon_nstd_parent_comp_id");
-	
+
 	if (one_letter_code.length() == 1)
 		m_one_letter_code = one_letter_code.front();

@@ -159,7 +163,7 @@ compound::compound(cif::datablock &db)
 		if (stereo_config.empty())
 			atom.stereo_config = stereo_config_type::N;
 		else
-		atom.stereo_config = parse_stereo_config_from_string(stereo_config);
+			atom.stereo_config = parse_stereo_config_from_string(stereo_config);
 		m_atoms.push_back(std::move(atom));
 	}

@@ -172,7 +176,7 @@ compound::compound(cif::datablock &db)
 		if (valueOrder.empty())
 			bond.type = bond_type::sing;
 		else
-		bond.type = parse_bond_type_from_string(valueOrder);
+			bond.type = parse_bond_type_from_string(valueOrder);
 		m_bonds.push_back(std::move(bond));
 	}
 }
@@ -231,12 +235,12 @@ float compound::bond_length(const std::string &atomId_1, const std::string &atom

 bool compound::is_peptide() const
 {
-	return iequals(m_type, "l-peptide linking")	or iequals(m_type, "peptide linking");
+	return iequals(m_type, "l-peptide linking") or iequals(m_type, "peptide linking");
 }

 bool compound::is_base() const
 {
-	return iequals(m_type, "dna linking")	or iequals(m_type, "rna linking");
+	return iequals(m_type, "dna linking") or iequals(m_type, "rna linking");
 }

 // --------------------------------------------------------------------
@@ -299,7 +303,7 @@ class compound_factory_impl : public std::enable_shared_from_this<compound_facto
 		std::shared_lock lock(mMutex);

 		compound *result = nullptr;
-		
+
 		for (auto impl = shared_from_this(); impl; impl = impl->m_next)
 		{
 			result = impl->create(id);
@@ -363,7 +367,9 @@ compound *compound_factory_impl::create(const std::string &id)
 	if (m_missing.contains(id))
 		return nullptr;

-	if (auto i = find_if(m_compounds.begin(), m_compounds.end(), [id](compound *c) { return c->id() == id; }); i != m_compounds.end())
+	if (auto i = find_if(m_compounds.begin(), m_compounds.end(), [id](compound *c)
+			{ return c->id() == id; });
+		i != m_compounds.end())
 		return *i;

 	compound *result = nullptr;
@@ -454,7 +460,6 @@ class local_compound_factory_impl : public compound_factory_impl
 	compound *create(const std::string &id) override;

  private:
-
 	compound *construct_compound(const datablock &db, const std::string &id, const std::string &name, const std::string &three_letter_code, const std::string &group);

 	cif::file m_local_file;
@@ -465,7 +470,9 @@ compound *local_compound_factory_impl::create(const std::string &id)
 	if (m_missing.contains(id))
 		return nullptr;

-	if (auto i = find_if(m_compounds.begin(), m_compounds.end(), [id](compound *c) { return c->id() == id; }); i != m_compounds.end())
+	if (auto i = find_if(m_compounds.begin(), m_compounds.end(), [id](compound *c)
+			{ return c->id() == id; });
+		i != m_compounds.end())
 		return *i;

 	compound *result = nullptr;
@@ -480,13 +487,10 @@ compound *local_compound_factory_impl::create(const std::string &id)

 			try
 			{
-				const auto &[id2, name, threeLetterCode, group] =
+				const auto &[id, name, threeLetterCode, group] =
 					chem_comp->front().get<std::string, std::string, std::string, std::string>("id", "name", "three_letter_code", "group");

-				if (id == id2)
-					result = construct_compound(db, id, name, threeLetterCode, group);
-				else
-					throw std::runtime_error("Compound ID's don't match: id 1=" + id + ", id 2=" + id2);
+				result = construct_compound(db, id, name, threeLetterCode, group);
 			}
 			catch (const std::exception &ex)
 			{
@@ -510,12 +514,10 @@ compound *local_compound_factory_impl::construct_compound(const datablock &rdb,

 	float formula_weight = 0;
 	int formal_charge = 0;
-	std::map<std::string,std::size_t> formula_data;
+	std::map<std::string, std::size_t> formula_data;

 	for (std::size_t ord = 1; const auto &[atom_id, type_symbol, type, charge, x, y, z, xi, yi, zi] :
-		rdb["chem_comp_atom"].rows<std::string, std::string, std::string, int,
-			std::optional<float>, std::optional<float>, std::optional<float>,
-			std::optional<float>, std::optional<float>, std::optional<float>>(
+		rdb["chem_comp_atom"].rows<std::string, std::string, std::string, int, std::optional<float>, std::optional<float>, std::optional<float>, std::optional<float>, std::optional<float>, std::optional<float>>(
 			"atom_id", "type_symbol", "type", "charge",
 			"model_Cartn_x", "model_Cartn_y", "model_Cartn_z",
 			"pdbx_model_Cartn_x_ideal", "pdbx_model_Cartn_y_ideal", "pdbx_model_Cartn_z_ideal"))
@@ -525,16 +527,14 @@ compound *local_compound_factory_impl::construct_compound(const datablock &rdb,

 		formula_data[type_symbol] += 1;

-		db["chem_comp_atom"].emplace({
-			{ "comp_id", id },
+		db["chem_comp_atom"].emplace({ { "comp_id", id },
 			{ "atom_id", atom_id },
 			{ "type_symbol", type_symbol },
 			{ "charge", charge },
-			{ "model_Cartn_x",  x.has_value() ? x : xi, 3 },
-			{ "model_Cartn_y",  y.has_value() ? y : yi, 3 },
-			{ "model_Cartn_z",  z.has_value() ? z : zi, 3 },
-			{ "pdbx_ordinal", ord++ }
-		});
+			{ "model_Cartn_x", x.has_value() ? x : xi, 3 },
+			{ "model_Cartn_y", y.has_value() ? y : yi, 3 },
+			{ "model_Cartn_z", z.has_value() ? z : zi, 3 },
+			{ "pdbx_ordinal", ord++ } });

 		formal_charge += charge;
 	}
@@ -551,21 +551,19 @@ compound *local_compound_factory_impl::construct_compound(const datablock &rdb,
 		else if (cif::iequals(type, "triple") or cif::iequals(type, "trip"))
 			value_order = "TRIP";

-		db["chem_comp_bond"].emplace({
-			{ "comp_id", id },
+		db["chem_comp_bond"].emplace({ { "comp_id", id },
 			{ "atom_id_1", atom_id_1 },
 			{ "atom_id_2", atom_id_2 },
 			{ "value_order", value_order },
 			{ "pdbx_aromatic_flag", aromatic },
 			// TODO: fetch stereo_config info from chem_comp_chir
-			{ "pdbx_ordinal", ord++ }
-		});
+			{ "pdbx_ordinal", ord++ } });
 	}

 	db.emplace_back(rdb["pdbx_chem_comp_descriptor"]);

 	std::string formula;
-	for (bool first = true; const auto &[symbol, count]: formula_data)
+	for (bool first = true; const auto &[symbol, count] : formula_data)
 	{
 		if (std::exchange(first, false))
 			formula += ' ';
@@ -584,15 +582,13 @@ compound *local_compound_factory_impl::construct_compound(const datablock &rdb,
 	else
 		type = "NON-POLYMER";

-	db["chem_comp"].emplace({
-		{ "id", id },
+	db["chem_comp"].emplace({ { "id", id },
 		{ "name", name },
 		{ "type", type },
 		{ "formula", formula },
 		{ "pdbx_formal_charge", formal_charge },
 		{ "formula_weight", formula_weight },
-		{ "three_letter_code", three_letter_code }
-	});
+		{ "three_letter_code", three_letter_code } });

 	std::shared_lock lock(mMutex);

@@ -605,11 +601,16 @@ compound *local_compound_factory_impl::construct_compound(const datablock &rdb,

 std::unique_ptr<compound_factory> compound_factory::s_instance;
 thread_local std::unique_ptr<compound_factory> compound_factory::tl_instance;
-bool compound_factory::s_use_thread_local_instance;
+compound_factory_options compound_factory::s_options;

 void compound_factory::init(bool useThreadLocalInstanceOnly)
 {
-	s_use_thread_local_instance = useThreadLocalInstanceOnly;
+	init({ .use_thread_local_instance_only = useThreadLocalInstanceOnly });
+}
+
+void compound_factory::init(compound_factory_options options)
+{
+	s_options = options;
 }

 compound_factory::compound_factory()
@@ -628,7 +629,7 @@ compound_factory::~compound_factory()

 compound_factory &compound_factory::instance()
 {
-	if (s_use_thread_local_instance)
+	if (s_options.use_thread_local_instance_only)
 	{
 		if (not tl_instance)
 			tl_instance.reset(new compound_factory());
@@ -644,7 +645,7 @@ compound_factory &compound_factory::instance()

 void compound_factory::clear()
 {
-	if (s_use_thread_local_instance)
+	if (s_options.use_thread_local_instance_only)
 		tl_instance.reset(nullptr);
 	else
 		s_instance.reset();
@@ -722,7 +723,7 @@ bool compound_factory::is_peptide(std::string_view res_name) const
 	bool result = is_std_peptide(res_name);
 	if (not result and m_impl)
 	{
-		auto compound = const_cast<compound_factory&>(*this).create(res_name);
+		auto compound = const_cast<compound_factory &>(*this).create(res_name);
 		result = compound != nullptr and compound->is_peptide();
 	}
 	return result;
@@ -734,7 +735,7 @@ bool compound_factory::is_base(std::string_view res_name) const
 	bool result = is_std_base(res_name);
 	if (not result and m_impl)
 	{
-		auto compound = const_cast<compound_factory&>(*this).create(res_name);
+		auto compound = const_cast<compound_factory &>(*this).create(res_name);
 		result = compound != nullptr and compound->is_base();
 	}
 	return result;
@@ -760,7 +761,8 @@ bool compound_factory::is_monomer(std::string_view res_name) const

 void compound_factory::report_missing_compound(std::string_view compound_id)
 {
-	if (std::exchange(m_report_missing, false))
+	static bool s_reported = false;
+	if (std::exchange(s_reported, true) == false)
 	{
 		using namespace cif::colour;

--- a/src/condition.cpp
+++ b/src/condition.cpp
@@ -24,8 +24,8 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

-#include "cif++/condition.hpp"
 #include "cif++/category.hpp"
+#include "cif++/condition.hpp"
 #include "cif++/validate.hpp"

 namespace cif
@@ -61,52 +61,6 @@ bool is_item_type_uchar(const category &cat, std::string_view col)

 namespace detail
 {
-	// 	// index lookup
-	// 	struct index_lookup_condition_impl : public condition_impl
-	// 	{
-	// 		index_lookup_condition_impl(row_initializer &&key_values)
-	// 			: m_key_values(std::move(key_values))
-	// 		{
-	// 		}
-	//
-	// 		condition_impl *prepare(const category &c) override
-	// 		{
-	// 			m_single_hit = c[m_key_values];
-	// 			return this;
-	// 		}
-	//
-	// 		bool test(row_handle r) const override
-	// 		{
-	// 			return m_single_hit == r;
-	// 		}
-	//
-	// 		void str(std::ostream &os) const override
-	// 		{
-	// 			os << "index scan";
-	// 		}
-	//
-	// 		virtual std::optional<row_handle> single() const override
-	// 		{
-	// 			return m_single_hit;
-	// 		}
-	//
-	// 		virtual bool equals(const condition_impl *rhs) const override
-	// 		{
-	// 			if (typeid(*rhs) == typeid(index_lookup_condition_impl))
-	// 			{
-	// 				auto ri = static_cast<const index_lookup_condition_impl *>(rhs);
-	// 				if (m_single_hit or ri->m_single_hit)
-	// 					return m_single_hit == ri->m_single_hit;
-	// 				else
-	// 					// watch out, both m_item_ix might be the same while item_names might be diffent (in case they both do not exist in the category)
-	// 					return m_key_values == ri->m_key_values;
-	// 			}
-	// 			return this == rhs;
-	// 		}
-	//
-	// 		row_initializer m_key_values;
-	// 		row_handle m_single_hit;
-	// 	};

 	condition_impl *key_equals_condition_impl::prepare(const category &c)
 	{
@@ -131,8 +85,7 @@ namespace detail
 			c.key_item_indices().contains(m_item_ix) and
 			c.key_item_indices().size() == 1)
 		{
-			item v(m_item_name, m_value);
-			m_single_hit = c[{ { m_item_name, std::string{ v.value() }, false } }];
+			m_single_hit = c[{ { m_item_name, m_value } }];
 		}

 		return this;
@@ -146,8 +99,7 @@ namespace detail
 		{
 			auto &cs = (*s)->m_sub;

-			if (find_if(cs.begin(), cs.end(), [c](const condition_impl *i)
-					{ return i->equals(c); }) == cs.end())
+			if (find_if(cs.begin(), cs.end(), [c](const condition_impl *i) { return i->equals(c); }) == cs.end())
 			{
 				result = false;
 				break;
@@ -167,8 +119,7 @@ namespace detail
 		for (size_t fc_i = 0; fc_i < fc.size();)
 		{
 			auto c = fc[fc_i];
-			if (not found_in_range(c, subs.begin() + 1, subs.end()))
-			{
+			if (not found_in_range(c, subs.begin() + 1, subs.end())) {
 				++fc_i;
 				continue;
 			}
@@ -186,12 +137,11 @@ namespace detail
 				for (size_t ssub_i = 0; ssub_i < ssub.size();)
 				{
 					auto sc = ssub[ssub_i];
-					if (not sc->equals(c))
-					{
+					if (not sc->equals(c)) {
 						++ssub_i;
 						continue;
 					}
-
+					
 					ssub.erase(ssub.begin() + ssub_i);
 					delete sc;
 					break;
@@ -208,99 +158,6 @@ namespace detail
 		return oc;
 	}

-	condition_impl *and_condition_impl::prepare(const category &c)
-	{
-		for (auto &sub : m_sub)
-			sub = sub->prepare(c);
-
-		if (auto cv = c.get_cat_validator(); cv != nullptr)
-		{
-			// See if we can collapse a search part of this and_condition into a single index lookup
-
-			cif::iset keys{ cv->m_keys.begin(), cv->m_keys.end() };
-			category::key_type lookup;
-			std::vector<condition_impl *> subs;
-			std::vector<std::string> may_be_empty;
-
-			for (auto &sub : m_sub)
-			{
-				if (auto s = dynamic_cast<const key_equals_condition_impl *>(sub); s != nullptr)
-				{
-					if (keys.contains(s->m_item_name))
-					{
-						lookup.emplace_back(s->m_item_name, s->m_value);
-						subs.emplace_back(sub);
-					}
-					continue;
-				}
-
-				if (auto s = dynamic_cast<const key_equals_number_condition_impl *>(sub); s != nullptr)
-				{
-					if (keys.contains(s->m_item_name))
-					{
-						item v{ s->m_item_name, s->m_value };
-						lookup.emplace_back(s->m_item_name, std::string{ v.value() } );
-						subs.emplace_back(sub);
-					}
-					continue;
-				}
-
-				if (auto s = dynamic_cast<const key_equals_or_empty_condition_impl *>(sub); s != nullptr)
-				{
-					if (keys.contains(s->m_item_name))
-					{
-						lookup.emplace_back(s->m_item_name, s->m_value, true);
-						subs.emplace_back(sub);
-						may_be_empty.emplace_back(s->m_item_name);
-					}
-					continue;
-				}
-
-				if (auto s = dynamic_cast<const key_equals_number_or_empty_condition_impl *>(sub); s != nullptr)
-				{
-					if (keys.contains(s->m_item_name))
-					{
-						item v{ s->m_item_name, s->m_value };
-						lookup.emplace_back(s->m_item_name, std::string{ v.value() }, true );
-						subs.emplace_back(sub);
-					}
-					continue;
-				}
-			}
-
-			if (lookup.size() == keys.size())
-			{
-				m_single = c[lookup];
-
-				for (auto s : subs)
-					m_sub.erase(std::remove(m_sub.begin(), m_sub.end(), s), m_sub.end());
-			}
-		}
-
-		return this;
-	}
-
-	bool and_condition_impl::test(row_handle r) const
-	{
-		bool result = true;
-
-		if (m_single.has_value() and *m_single != r)
-			result = false;
-		else
-		{
-			for (auto sub : m_sub)
-			{
-				if (sub->test(r))
-					continue;
-
-				result = false;
-				break;
-			}
-		}
-
-		return result;
-	}
-
 	condition_impl *or_condition_impl::prepare(const category &c)
 	{
 		std::vector<and_condition_impl *> and_conditions;
@@ -324,7 +181,7 @@ void condition::prepare(const category &c)
 {
 	if (m_impl)
 		m_impl = m_impl->prepare(c);
-
+	
 	m_prepared = true;
 }

--- a/src/file.cpp
+++ b/src/file.cpp
@@ -25,7 +25,6 @@
 */

 #include "cif++/file.hpp"
-#include "cif++/condition.hpp"
 #include "cif++/gzio.hpp"

 namespace cif
@@ -47,16 +46,8 @@ bool file::is_valid()
 {
 	bool result = not empty();

-	for (bool first = true; auto &d : *this)
-	{
-		if (first)
-		{
-			result = d.is_valid() and result;
-			first = false;
-		}
-		else if (d.get_validator() != nullptr)
-			result = d.is_valid() and result;
-	}
+	for (auto &d : *this)
+		result = d.is_valid() and result;

 	if (result)
 		result = validate_links();
--- a/src/model.cpp
+++ b/src/model.cpp
@@ -47,10 +47,15 @@ void atom::atom_impl::moveTo(const point &p)

 	auto r = row();

-	r.assign("Cartn_x", cif::format("{:.3f}", p.m_x), false, false);
-	r.assign("Cartn_y", cif::format("{:.3f}", p.m_y), false, false);
-	r.assign("Cartn_z", cif::format("{:.3f}", p.m_z), false, false);
-
+#if __cpp_lib_format
+	r.assign("Cartn_x", std::format("{:.3f}", p.m_x), false, false);
+	r.assign("Cartn_y", std::format("{:.3f}", p.m_y), false, false);
+	r.assign("Cartn_z", std::format("{:.3f}", p.m_z), false, false);
+#else
+	r.assign("Cartn_x", cif::format("%.3f", p.m_x).str(), false, false);
+	r.assign("Cartn_y", cif::format("%.3f", p.m_y).str(), false, false);
+	r.assign("Cartn_z", cif::format("%.3f", p.m_z).str(), false, false);
+#endif
 	m_location = p;
 }

@@ -1295,7 +1300,7 @@ void structure::load_atoms_for_model(structure_open_options options)
 	
 		for (auto &&[key, value] : alts)
 		{
-			// const auto &[asym_id, seq_id] = key;
+			const auto &[asym_id, seq_id] = key;
 	
 			// select highest occupancy for this residue's alternates
 			std::string alt_id;
@@ -2869,7 +2874,7 @@ static int compare_numbers(std::string_view a, std::string_view b)

 int compare_cif_id(const std::string &a, const std::string &b)
 {
-	int d = static_cast<int>(a.length() - b.length());
+	int d = a.length() - b.length();
 	if (d == 0)
 		d = a.compare(b);
 	return d;
--- a/src/pdb/cif2pdb.cpp
+++ b/src/pdb/cif2pdb.cpp
@@ -33,6 +33,7 @@
 #include <regex>
 #include <set>

+
 namespace cif::pdb
 {

@@ -57,9 +58,9 @@ std::string cif2pdbDate(const std::string &d)
 		int month = std::stoi(m[2].str());

 		if (m[3].matched)
-			result = cif::format("{:02}-{:3.3}-{:02}", stoi(m[3].str()), kMonths[month - 1], (year % 100));
+			result = cif::format("%02.2d-%3.3s-%02.2d", stoi(m[3].str()), kMonths[month - 1], (year % 100)).str();
 		else
-			result = cif::format("{:3.3}-{:02}", kMonths[month - 1], (year % 100));
+			result = cif::format("%3.3s-%02.2d", kMonths[month - 1], (year % 100)).str();
 	}

 	return result;
@@ -257,14 +258,16 @@ std::size_t WriteCitation(std::ostream &pdbFile, const datablock &db, row_handle
 	{
 		to_upper(pubname);

-		pdbFile << s1 << cif::format("REF {:2.2s} {:<28.28s}  {:2.2s}{:>4.4s} {:>5.5s} {:4.4s}", "" /* continuation */, pubname, (volume.empty() ? "" : "V."), volume, pageFirst, year)
+		const std::string kRefHeader = s1 + "REF %2.2s %-28.28s  %2.2s%4.4s %5.5s %4.4s";
+		pdbFile << cif::format(kRefHeader, "" /* continuation */, pubname, (volume.empty() ? "" : "V."), volume, pageFirst, year)
 				<< '\n';
 		++result;
 	}

 	if (not issn.empty())
 	{
-		pdbFile << s1 << cif::format("REFN                   ISSN {:<25.25s}", issn) << '\n';
+		const std::string kRefHeader = s1 + "REFN                   ISSN %-25.25s";
+		pdbFile << cif::format(kRefHeader, issn) << '\n';
 		++result;
 	}

@@ -273,25 +276,27 @@ std::size_t WriteCitation(std::ostream &pdbFile, const datablock &db, row_handle
 	////    0         1         2         3         4         5         6         7         8
 	////    HEADER    xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxDDDDDDDDD   IIII
 	// const char kRefHeader[] =
-	//          "REMARK   1  REFN    {:4.4s} {:<6.6s}  {:2.2s} {:<25.25s}";
+	//          "REMARK   1  REFN    %4.4s %-6.6s  %2.2s %-25.25s";
 	//
 	//			pdbFile << (boost::cif::format(kRefHeader)
 	//						% (astm.empty() ? "" : "ASTN")
 	//						% astm
 	//						% country
-	//						% issn)
+	//						% issn).str()
 	//					<< '\n';
 	//		}

 	if (not pmid.empty())
 	{
-		pdbFile << s1 << cif::format("PMID   {:<60.60s} ", pmid) << '\n';
+		const std::string kPMID = s1 + "PMID   %-60.60s ";
+		pdbFile << cif::format(kPMID, pmid) << '\n';
 		++result;
 	}

 	if (not doi.empty())
 	{
-		pdbFile << s1 << cif::format("DOI    {:<60.60s} ", doi) << '\n';
+		const std::string kDOI = s1 + "DOI    %-60.60s ";
+		pdbFile << cif::format(kDOI, doi) << '\n';
 		++result;
 	}

@@ -302,10 +307,10 @@ void write_header_lines(std::ostream &pdbFile, const datablock &db)
 {
 	//    0         1         2         3         4         5         6         7         8
 	//    HEADER    xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxDDDDDDDDD   IIII
-	// const char kHeader[] =
-	// 	"HEADER    {:<40.40s}"
-	// 	"{:<9.9s}"
-	// 	"   {:<4.4s}";
+	const char kHeader[] =
+		"HEADER    %-40.40s"
+		"%-9.9s"
+		"   %-4.4s";

 	// HEADER

@@ -340,12 +345,7 @@ void write_header_lines(std::ostream &pdbFile, const datablock &db)
 		}
 	}

-	pdbFile << cif::format(/* kHeader */
-		"HEADER    {:<40.40s}"
-		"{:<9.9s}"
-		"   {:<4.4s}"
-	
-	, keywords, date, db.name()) << '\n';
+	pdbFile << cif::format(kHeader, keywords, date, db.name()) << '\n';

 	// TODO: implement
 	// OBSLTE (skip for now)
@@ -535,6 +535,7 @@ void WriteTitle(std::ostream &pdbFile, const datablock &db)
 	write_header_lines(pdbFile, db);

 	// REVDAT
+	const char kRevDatFmt[] = "REVDAT %3d%2.2s %9.9s %4.4s    %1d      ";
 	auto &cat2 = db["database_PDB_rev"];
 	std::vector<row_handle> rev(cat2.begin(), cat2.end());
 	sort(rev.begin(), rev.end(), [](row_handle a, row_handle b) -> bool
@@ -558,9 +559,9 @@ void WriteTitle(std::ostream &pdbFile, const datablock &db)
 		{
 			std::string cs = ++continuation > 1 ? std::to_string(continuation) : std::string();

-			pdbFile << cif::format("REVDAT {:3}{:2.2s} {:9.9s} {:4.4s}    {:1}      ", revNum, cs, date, db.name(), modType);
+			pdbFile << cif::format(kRevDatFmt, revNum, cs, date, db.name(), modType);
 			for (std::size_t i = 0; i < 4; ++i)
-				pdbFile << cif::format(" {:<6.6s}", (i < types.size() ? types[i] : std::string()));
+				pdbFile << cif::format(" %-6.6s", (i < types.size() ? types[i] : std::string()));
 			pdbFile << '\n';

 			if (types.size() > 4)
@@ -613,7 +614,7 @@ void WriteRemark2(std::ostream &pdbFile, const datablock &db)
 		{
 			float resHigh = refine.front()["ls_d_res_high"].as<float>();
 			pdbFile << "REMARK   2\n"
-					<< cif::format("REMARK   2 RESOLUTION. {:7.2f} ANGSTROMS.", resHigh) << '\n';
+					<< cif::format("REMARK   2 RESOLUTION. %7.2f ANGSTROMS.", resHigh) << '\n';
 		}
 		catch (...)
 		{ /* skip it */
@@ -760,7 +761,10 @@ class Fs : public FBase
 		else
 		{
 			os << '\n';
-			WriteOneContinuedLine(os, cif::format("REMARK {:3} ", mNr), 0, s);
+
+			std::stringstream ss;
+			ss << "REMARK " << std::setw(3) << std::right << mNr << ' ';
+			WriteOneContinuedLine(os, ss.str(), 0, s);
 		}
 	}

@@ -1613,7 +1617,7 @@ void WriteRemark3Phenix(std::ostream &pdbFile, const datablock &db)

 		percent_reflns_obs /= 100;

-		pdbFile << RM3("  ") << cif::format("{:3} {:7.4f} - {:7.4f}    {:4.2f} {:8} {:5}  {:6.4f} {:6.4f}", bin++, d_res_low, d_res_high, percent_reflns_obs, number_reflns_R_work, number_reflns_R_free, R_factor_R_work, R_factor_R_free) << '\n';
+		pdbFile << RM3("  ") << cif::format("%3d %7.4f - %7.4f    %4.2f %8d %5d  %6.4f %6.4f", bin++, d_res_low, d_res_high, percent_reflns_obs, number_reflns_R_work, number_reflns_R_free, R_factor_R_work, R_factor_R_free) << '\n';
 	}

 	pdbFile << RM3("") << '\n'
@@ -2581,7 +2585,7 @@ void WriteRemark465(std::ostream &pdbFile, const datablock &db)
 		cif::tie(modelNr, resName, chainID, iCode, seqNr) =
 			r.get("PDB_model_num", "auth_comp_id", "auth_asym_id", "PDB_ins_code", "auth_seq_id");

-		pdbFile << cif::format("REMARK 465 {:3.3s} {:3.3s} {:1.1s} {:5}{:1.1s}", modelNr, resName, chainID, seqNr, iCode) << '\n';
+		pdbFile << cif::format("REMARK 465 %3.3s %3.3s %1.1s %5d%1.1s", modelNr, resName, chainID, seqNr, iCode) << '\n';
 	}
 }

@@ -2628,7 +2632,7 @@ void WriteRemark470(std::ostream &pdbFile, const datablock &db)

 			while (not a.second.empty())
 			{
-				pdbFile << cif::format("REMARK 470 {:>3.3s} {:3.3s} {:1.1s}{:4}{:1.1s}  ", modelNr, resName, chainID, seqNr, iCode) << "  ";
+				pdbFile << cif::format("REMARK 470 %3.3s %3.3s %1.1s%4d%1.1s  ", modelNr, resName, chainID, seqNr, iCode) << "  ";

 				for (std::size_t i = 0; i < 6 and not a.second.empty(); ++i)
 				{
@@ -2726,16 +2730,16 @@ int WritePrimaryStructure(std::ostream &pdbFile, const datablock &db)

 			if (dbAccession.length() > 8 or db_code.length() > 12 or atoi(dbseqEnd.c_str()) >= 100000)
 				pdbFile << cif::format(
-							   "DBREF1 {:>4.4s} {:1.1s} {:>4.4s}{:1.1s} {:>4.4s}{:1.1s} {:<6.6s}               {:<20.20s}",
+							   "DBREF1 %4.4s %1.1s %4.4s%1.1s %4.4s%1.1s %-6.6s               %-20.20s",
 							   idCode, chainID, seqBegin, insertBegin, seqEnd, insertEnd, db_name, db_code)
 						<< '\n'
 						<< cif::format(
-							   "DBREF2 {:>4.4s} {:1.1s}     {:<22.22s}     {:10.10s}  {:10.10s}",
+							   "DBREF2 %4.4s %1.1s     %-22.22s     %10.10s  %10.10s",
 							   idCode, chainID, dbAccession, dbseqBegin, dbseqEnd)
 						<< '\n';
 			else
 				pdbFile << cif::format(
-							   "DBREF  {:>4.4s} {:1.1s} {:>4.4s}{:1.1s} {:>4.4s}{:1.1s} {:<6.6s} {:<8.8s} {:<12.12s} {:>5.5s}{:1.1s} {:>5.5s}{:1.1s}",
+							   "DBREF  %4.4s %1.1s %4.4s%1.1s %4.4s%1.1s %-6.6s %-8.8s %-12.12s %5.5s%1.1s %5.5s%1.1s",
 							   idCode, chainID, seqBegin, insertBegin, seqEnd, insertEnd, db_name, dbAccession, db_code, dbseqBegin, dbinsBeg, dbseqEnd, dbinsEnd)
 						<< '\n';
 		}
@@ -2754,8 +2758,9 @@ int WritePrimaryStructure(std::ostream &pdbFile, const datablock &db)
 		to_upper(conflict);

 		pdbFile << cif::format(
-					   "SEQADV {:4.4s} {:3.3s} {:1.1s} {:>4.4s}{:1.1s} {:<4.4s} {:<9.9s} {:3.3s} {:>5.5s} {:<21.21s}",
+					   "SEQADV %4.4s %3.3s %1.1s %4.4s%1.1s %-4.4s %-9.9s %3.3s %5.5s %-21.21s",
 					   idCode, resName, chainID, seqNum, iCode, database, dbAccession, dbRes, dbSeq, conflict)
+					   .str()
 				<< '\n';
 	}

@@ -2783,7 +2788,7 @@ int WritePrimaryStructure(std::ostream &pdbFile, const datablock &db)
 				t = 13;

 			pdbFile << cif::format(
-						   "SEQRES {:3} {:1.1s} {:4}  {:<51.51s}          ",
+						   "SEQRES %3d %1.1s %4d  %-51.51s          ",
 						   n++, std::string{ chainID }, seqresl[chainID], join(seq.begin(), seq.begin() + t, " "))
 					<< '\n';

@@ -2803,8 +2808,9 @@ int WritePrimaryStructure(std::ostream &pdbFile, const datablock &db)
 			r.get("auth_asym_id", "auth_seq_id", "auth_comp_id", "PDB_ins_code", "parent_comp_id", "details");

 		pdbFile << cif::format(
-					   "MODRES {:4.4s} {:3.3s} {:1.1s} {:4.4s}{:1.1s} {:3.3s}  {:<41.41s}",
+					   "MODRES %4.4s %3.3s %1.1s %4.4s%1.1s %3.3s  %-41.41s",
 					   db.name(), resName, chainID, seqNum, iCode, stdRes, comment)
+					   .str()
 				<< '\n';
 	}

@@ -2919,7 +2925,7 @@ int WriteHeterogen(std::ostream &pdbFile, const datablock &db)
 	{
 		if (h.water)
 			continue;
-		pdbFile << cif::format("HET    {:3.3s}  {:1c}{:4}{:1c}  {:5}", h.hetID, h.chainID, h.seqNum, h.iCode, h.numHetAtoms) << '\n';
+		pdbFile << cif::format("HET    %3.3s  %c%4d%c  %5d", h.hetID, h.chainID, h.seqNum, h.iCode, h.numHetAtoms) << '\n';
 		++numHet;
 	}

@@ -2934,7 +2940,7 @@ int WriteHeterogen(std::ostream &pdbFile, const datablock &db)

 		for (;;)
 		{
-			pdbFile << cif::format("HETNAM  {:2.2s} {:3.3s} ", (c > 1 ? std::to_string(c) : std::string()), id);
+			pdbFile << cif::format("HETNAM  %2.2s %3.3s ", (c > 1 ? std::to_string(c) : std::string()), id);
 			++c;

 			if (name.length() > 55)
@@ -3026,7 +3032,7 @@ int WriteHeterogen(std::ostream &pdbFile, const datablock &db)
 			{
 				std::stringstream fs;

-				fs << cif::format("FORMUL  {:2}  {:3.3s} {:2.2s}{:1c}", componentNr, hetID, (c > 1 ? std::to_string(c) : std::string()), (hetID == water_comp_id ? '*' : ' '));
+				fs << cif::format("FORMUL  %2d  %3.3s %2.2s%c", componentNr, hetID, (c > 1 ? std::to_string(c) : std::string()), (hetID == water_comp_id ? '*' : ' '));
 				++c;

 				if (formula.length() > 51)
@@ -3093,7 +3099,7 @@ std::tuple<int, int> WriteSecondaryStructure(std::ostream &pdbFile, const databl
 				"pdbx_PDB_helix_class", "pdbx_PDB_helix_length", "beg_auth_seq_id", "end_auth_seq_id");

 		++numHelix;
-		pdbFile << cif::format("HELIX  {:3} {:>3.3s} {:3.3s} {:1.1s} {:4}{:1.1s} {:3.3s} {:1.1s} {:4}{:1.1s}{:2}{:<30.30s} {:5}",
+		pdbFile << cif::format("HELIX  %3d %3.3s %3.3s %1.1s %4d%1.1s %3.3s %1.1s %4d%1.1s%2d%-30.30s %5d",
 					   numHelix, pdbx_PDB_helix_id, beg_label_comp_id, beg_auth_asym_id, beg_auth_seq_id, pdbx_beg_PDB_ins_code, end_label_comp_id, end_auth_asym_id, end_auth_seq_id, pdbx_end_PDB_ins_code, pdbx_PDB_helix_class, details, pdbx_PDB_helix_length)
 				<< '\n';
 	}
@@ -3130,7 +3136,7 @@ std::tuple<int, int> WriteSecondaryStructure(std::ostream &pdbFile, const databl
 					"pdbx_end_PDB_ins_code", "beg_auth_comp_id", "beg_auth_asym_id", "beg_auth_seq_id",
 					"end_auth_comp_id", "end_auth_asym_id", "end_auth_seq_id");

-				pdbFile << cif::format("SHEET  {:>3.3s} {:>3.3s}{:2} {:3.3s} {:1.1s}{:4}{:1.1s} {:3.3s} {:1.1s}{:4}{:1.1s}{:2}", rangeID1, sheetID, numStrands, initResName, initChainID, initSeqNum, initICode, endResName, endChainID, endSeqNum, endICode, 0) << '\n';
+				pdbFile << cif::format("SHEET  %3.3s %3.3s%2d %3.3s %1.1s%4d%1.1s %3.3s %1.1s%4d%1.1s%2d", rangeID1, sheetID, numStrands, initResName, initChainID, initSeqNum, initICode, endResName, endChainID, endSeqNum, endICode, 0) << '\n';

 				first = false;
 			}
@@ -3149,7 +3155,7 @@ std::tuple<int, int> WriteSecondaryStructure(std::ostream &pdbFile, const databl

 			if (h.empty())
 			{
-				pdbFile << cif::format("SHEET  {:>3.3s} {:>3.3s}{:2} {:3.3s} {:1.1s}{:4}{:1.1s} {:3.3s} {:1.1s}{:4}{:1.1s}{:2}", rangeID2, sheetID, numStrands, initResName, initChainID, initSeqNum, initICode, endResName, endChainID, endSeqNum, endICode, sense) << '\n';
+				pdbFile << cif::format("SHEET  %3.3s %3.3s%2d %3.3s %1.1s%4d%1.1s %3.3s %1.1s%4d%1.1s%2d", rangeID2, sheetID, numStrands, initResName, initChainID, initSeqNum, initICode, endResName, endChainID, endSeqNum, endICode, sense) << '\n';
 			}
 			else
 			{
@@ -3162,8 +3168,8 @@ std::tuple<int, int> WriteSecondaryStructure(std::ostream &pdbFile, const databl
 				curAtom = cif2pdbAtomName(curAtom, compID[0], db);
 				prevAtom = cif2pdbAtomName(prevAtom, compID[1], db);

-				pdbFile << cif::format("SHEET  {:>3.3s} {:>3.3s}{:2} {:3.3s} {:1.1s}{:4}{:1.1s} {:3.3s} {:1.1s}{:4}{:1.1s}{:2} "
-										"{:<4.4s}{:3.3s} {:1.1s}{:4}{:1.1s} {:<4.4s}{:3.3s} {:1.1s}{:4}{:1.1s}",
+				pdbFile << cif::format("SHEET  %3.3s %3.3s%2d %3.3s %1.1s%4d%1.1s %3.3s %1.1s%4d%1.1s%2d "
+										"%-4.4s%3.3s %1.1s%4d%1.1s %-4.4s%3.3s %1.1s%4d%1.1s",
 							   rangeID2, sheetID, numStrands, initResName, initChainID, initSeqNum, initICode, endResName, endChainID, endSeqNum, endICode, sense, curAtom, curResName, curChainID, curResSeq, curICode, prevAtom, prevResName, prevChainID, prevResSeq, prevICode)
 						<< '\n';
 			}
@@ -3201,7 +3207,7 @@ void WriteConnectivity(std::ostream &pdbFile, const datablock &db)
 		sym1 = cif2pdbSymmetry(sym1);
 		sym2 = cif2pdbSymmetry(sym2);

-		pdbFile << cif::format("SSBOND {:3} CYS {:1.1s} {:4}{:1.1s}   CYS {:1.1s} {:4}{:1.1s}                       {:6.6s} {:6.6s} {:5.2f}", nr, chainID1, seqNum1, icode1, chainID2, seqNum2, icode2, sym1, sym2, Length) << '\n';
+		pdbFile << cif::format("SSBOND %3d CYS %1.1s %4d%1.1s   CYS %1.1s %4d%1.1s                       %6.6s %6.6s %5.2f", nr, chainID1, seqNum1, icode1, chainID2, seqNum2, icode2, sym1, sym2, Length) << '\n';

 		++nr;
 	}
@@ -3228,10 +3234,10 @@ void WriteConnectivity(std::ostream &pdbFile, const datablock &db)
 		sym1 = cif2pdbSymmetry(sym1);
 		sym2 = cif2pdbSymmetry(sym2);

-		pdbFile << cif::format("LINK        {:<4.4s}{:1.1s}{:3.3s} {:1.1s}{:4}{:1.1s}               {:<4.4s}{:1.1s}{:3.3s} {:1.1s}{:4}{:1.1s}  {:>6.6s} {:>6.6s}", name1, altLoc1, resName1, chainID1, resSeq1, iCode1, name2, altLoc2, resName2, chainID2, resSeq2, iCode2, sym1, sym2);
+		pdbFile << cif::format("LINK        %-4.4s%1.1s%3.3s %1.1s%4d%1.1s               %-4.4s%1.1s%3.3s %1.1s%4d%1.1s  %6.6s %6.6s", name1, altLoc1, resName1, chainID1, resSeq1, iCode1, name2, altLoc2, resName2, chainID2, resSeq2, iCode2, sym1, sym2);

 		if (not Length.empty())
-			pdbFile << cif::format(" {:5.2f}", stod(Length));
+			pdbFile << cif::format(" %5.2f", stod(Length));

 		pdbFile << '\n';
 	}
@@ -3249,7 +3255,7 @@ void WriteConnectivity(std::ostream &pdbFile, const datablock &db)
 				"pdbx_label_comp_id_2", "pdbx_auth_asym_id_2", "pdbx_auth_seq_id_2", "pdbx_PDB_ins_code_2",
 				"pdbx_PDB_model_num", "pdbx_omega_angle");

-		pdbFile << cif::format("CISPEP {:3.3s} {:3.3s} {:1.1s} {:4}{:1.1s}   {:3.3s} {:1.1s} {:4}{:1.1s}       {:3.3s}       {:6.2f}",
+		pdbFile << cif::format("CISPEP %3.3s %3.3s %1.1s %4d%1.1s   %3.3s %1.1s %4d%1.1s       %3.3s       %6.2f",
 			serNum, pep1, chainID1, seqNum1, icode1, pep2, chainID2, seqNum2, icode2, modNum, measure) << '\n';
 	}
 }
@@ -3270,7 +3276,7 @@ int WriteMiscellaneousFeatures(std::ostream &pdbFile, const datablock &db)
 		cif::tie(siteID, resName, chainID, seq, iCode) =
 			r.get("site_id", "auth_comp_id", "auth_asym_id", "auth_seq_id", "pdbx_auth_ins_code");

-		sites[siteID].push_back(cif::format("{:3.3s} {:1.1s}{:4}{:1.1s} ", resName, chainID, seq, iCode));
+		sites[siteID].push_back(cif::format("%3.3s %1.1s%4d%1.1s ", resName, chainID, seq, iCode).str());
 	}

 	for (auto s : sites)
@@ -3283,7 +3289,7 @@ int WriteMiscellaneousFeatures(std::ostream &pdbFile, const datablock &db)
 		int nr = 1;
 		while (res.empty() == false)
 		{
-			pdbFile << cif::format("SITE   {:3} {:3.3s} {:2} ", nr, siteID, numRes);
+			pdbFile << cif::format("SITE   %3d %3.3s %2d ", nr, siteID, numRes);

 			for (int i = 0; i < 4; ++i)
 			{
@@ -3312,7 +3318,7 @@ void WriteCrystallographic(std::ostream &pdbFile, const datablock &db)

 	r = db["cell"].find_first(key("entry_id") == db.name());

-	pdbFile << cif::format("CRYST1{:9.3f}{:9.3f}{:9.3f}{:7.2f}{:7.2f}{:7.2f} {:<11.11s}{:4}", r["length_a"].as<double>(), r["length_b"].as<double>(), r["length_c"].as<double>(), r["angle_alpha"].as<double>(), r["angle_beta"].as<double>(), r["angle_gamma"].as<double>(), symmetry, r["Z_PDB"].as<int>()) << '\n';
+	pdbFile << cif::format("CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f %-11.11s%4d", r["length_a"].as<double>(), r["length_b"].as<double>(), r["length_c"].as<double>(), r["angle_alpha"].as<double>(), r["angle_beta"].as<double>(), r["angle_gamma"].as<double>(), symmetry, r["Z_PDB"].as<int>()) << '\n';
 }

 int WriteCoordinateTransformation(std::ostream &pdbFile, const datablock &db)
@@ -3321,18 +3327,18 @@ int WriteCoordinateTransformation(std::ostream &pdbFile, const datablock &db)

 	for (auto r : db["database_PDB_matrix"])
 	{
-		pdbFile << cif::format("ORIGX{:1}    {:10.6f}{:10.6f}{:10.6f}     {:10.5f}", 1, r["origx[1][1]"].as<float>(), r["origx[1][2]"].as<float>(), r["origx[1][3]"].as<float>(), r["origx_vector[1]"].as<float>()) << '\n';
-		pdbFile << cif::format("ORIGX{:1}    {:10.6f}{:10.6f}{:10.6f}     {:10.5f}", 2, r["origx[2][1]"].as<float>(), r["origx[2][2]"].as<float>(), r["origx[2][3]"].as<float>(), r["origx_vector[2]"].as<float>()) << '\n';
-		pdbFile << cif::format("ORIGX{:1}    {:10.6f}{:10.6f}{:10.6f}     {:10.5f}", 3, r["origx[3][1]"].as<float>(), r["origx[3][2]"].as<float>(), r["origx[3][3]"].as<float>(), r["origx_vector[3]"].as<float>()) << '\n';
+		pdbFile << cif::format("ORIGX%1d    %10.6f%10.6f%10.6f     %10.5f", 1, r["origx[1][1]"].as<float>(), r["origx[1][2]"].as<float>(), r["origx[1][3]"].as<float>(), r["origx_vector[1]"].as<float>()) << '\n';
+		pdbFile << cif::format("ORIGX%1d    %10.6f%10.6f%10.6f     %10.5f", 2, r["origx[2][1]"].as<float>(), r["origx[2][2]"].as<float>(), r["origx[2][3]"].as<float>(), r["origx_vector[2]"].as<float>()) << '\n';
+		pdbFile << cif::format("ORIGX%1d    %10.6f%10.6f%10.6f     %10.5f", 3, r["origx[3][1]"].as<float>(), r["origx[3][2]"].as<float>(), r["origx[3][3]"].as<float>(), r["origx_vector[3]"].as<float>()) << '\n';
 		result += 3;
 		break;
 	}

 	for (auto r : db["atom_sites"])
 	{
-		pdbFile << cif::format("SCALE{:1}    {:10.6f}{:10.6f}{:10.6f}     {:10.5f}", 1, r["fract_transf_matrix[1][1]"].as<float>(), r["fract_transf_matrix[1][2]"].as<float>(), r["fract_transf_matrix[1][3]"].as<float>(), r["fract_transf_vector[1]"].as<float>()) << '\n';
-		pdbFile << cif::format("SCALE{:1}    {:10.6f}{:10.6f}{:10.6f}     {:10.5f}", 2, r["fract_transf_matrix[2][1]"].as<float>(), r["fract_transf_matrix[2][2]"].as<float>(), r["fract_transf_matrix[2][3]"].as<float>(), r["fract_transf_vector[2]"].as<float>()) << '\n';
-		pdbFile << cif::format("SCALE{:1}    {:10.6f}{:10.6f}{:10.6f}     {:10.5f}", 3, r["fract_transf_matrix[3][1]"].as<float>(), r["fract_transf_matrix[3][2]"].as<float>(), r["fract_transf_matrix[3][3]"].as<float>(), r["fract_transf_vector[3]"].as<float>()) << '\n';
+		pdbFile << cif::format("SCALE%1d    %10.6f%10.6f%10.6f     %10.5f", 1, r["fract_transf_matrix[1][1]"].as<float>(), r["fract_transf_matrix[1][2]"].as<float>(), r["fract_transf_matrix[1][3]"].as<float>(), r["fract_transf_vector[1]"].as<float>()) << '\n';
+		pdbFile << cif::format("SCALE%1d    %10.6f%10.6f%10.6f     %10.5f", 2, r["fract_transf_matrix[2][1]"].as<float>(), r["fract_transf_matrix[2][2]"].as<float>(), r["fract_transf_matrix[2][3]"].as<float>(), r["fract_transf_vector[2]"].as<float>()) << '\n';
+		pdbFile << cif::format("SCALE%1d    %10.6f%10.6f%10.6f     %10.5f", 3, r["fract_transf_matrix[3][1]"].as<float>(), r["fract_transf_matrix[3][2]"].as<float>(), r["fract_transf_matrix[3][3]"].as<float>(), r["fract_transf_vector[3]"].as<float>()) << '\n';
 		result += 3;
 		break;
 	}
@@ -3342,9 +3348,9 @@ int WriteCoordinateTransformation(std::ostream &pdbFile, const datablock &db)
 	{
 		std::string given = r["code"] == "given" ? "1" : "";

-		pdbFile << cif::format("MTRIX{:1} {:3}{:10.6f}{:10.6f}{:10.6f}     {:10.5f}    {:1.1s}", 1, nr, r["matrix[1][1]"].as<float>(), r["matrix[1][2]"].as<float>(), r["matrix[1][3]"].as<float>(), r["vector[1]"].as<float>(), given) << '\n';
-		pdbFile << cif::format("MTRIX{:1} {:3}{:10.6f}{:10.6f}{:10.6f}     {:10.5f}    {:1.1s}", 2, nr, r["matrix[2][1]"].as<float>(), r["matrix[2][2]"].as<float>(), r["matrix[2][3]"].as<float>(), r["vector[2]"].as<float>(), given) << '\n';
-		pdbFile << cif::format("MTRIX{:1} {:3}{:10.6f}{:10.6f}{:10.6f}     {:10.5f}    {:1.1s}", 3, nr, r["matrix[3][1]"].as<float>(), r["matrix[3][2]"].as<float>(), r["matrix[3][3]"].as<float>(), r["vector[3]"].as<float>(), given) << '\n';
+		pdbFile << cif::format("MTRIX%1d %3d%10.6f%10.6f%10.6f     %10.5f    %1.1s", 1, nr, r["matrix[1][1]"].as<float>(), r["matrix[1][2]"].as<float>(), r["matrix[1][3]"].as<float>(), r["vector[1]"].as<float>(), given) << '\n';
+		pdbFile << cif::format("MTRIX%1d %3d%10.6f%10.6f%10.6f     %10.5f    %1.1s", 2, nr, r["matrix[2][1]"].as<float>(), r["matrix[2][2]"].as<float>(), r["matrix[2][3]"].as<float>(), r["vector[2]"].as<float>(), given) << '\n';
+		pdbFile << cif::format("MTRIX%1d %3d%10.6f%10.6f%10.6f     %10.5f    %1.1s", 3, nr, r["matrix[3][1]"].as<float>(), r["matrix[3][2]"].as<float>(), r["matrix[3][3]"].as<float>(), r["vector[3]"].as<float>(), given) << '\n';

 		++nr;
 		result += 3;
@@ -3363,6 +3369,10 @@ std::tuple<int, int> WriteCoordinatesForModel(std::ostream &pdbFile, const datab

 	auto &atom_site = db["atom_site"];
 	auto &atom_site_anisotrop = db["atom_site_anisotrop"];
+	auto &entity = db["entity"];
+	// auto &pdbx_poly_seq_scheme = db["pdbx_poly_seq_scheme"];
+	// auto &pdbx_nonpoly_scheme = db["pdbx_nonpoly_scheme"];
+	auto &pdbx_branch_scheme = db["pdbx_branch_scheme"];

 	int serial = 1;
 	auto ri = atom_site.begin();
@@ -3407,7 +3417,7 @@ std::tuple<int, int> WriteCoordinatesForModel(std::ostream &pdbFile, const datab

 			if (terminate)
 			{
-				pdbFile << cif::format("TER   {:5}      {:3.3s} {:1.1s}{:4}{:1.1s}",  serial,  resName,  chainID,  resSeq,  iCode) << '\n';
+				pdbFile << cif::format("TER   %5d      %3.3s %1.1s%4d%1.1s",  serial,  resName,  chainID,  resSeq,  iCode) << '\n';

 				++serial;
 				terminatedChains.insert(chainID);
@@ -3436,6 +3446,26 @@ std::tuple<int, int> WriteCoordinatesForModel(std::ostream &pdbFile, const datab
 			r.get("id", "group_PDB", "label_atom_id", "label_alt_id", "auth_comp_id", "auth_asym_id", "auth_seq_id",
 				"pdbx_PDB_ins_code", "Cartn_x", "Cartn_y", "Cartn_z", "occupancy", "B_iso_or_equiv", "type_symbol", "pdbx_formal_charge");

+		if (resName != "HOH")
+		{
+			int entity_id = r.get<int>("label_entity_id");
+			try
+			{
+				auto type = entity.find1<std::string>("id"_key == entity_id, "type");
+
+				if (type == "branched")	// find the real auth_seq_num, since sugars have their auth_seq_num reused as sugar number... sigh.
+					resSeq = pdbx_branch_scheme.find1<int>("asym_id"_key == r.get<std::string>("label_asym_id") and "pdb_seq_num"_key == resSeq, "auth_seq_num");
+				// else if (type == "non-polymer")	// same for non-polymers
+				// 	resSeq = pdbx_nonpoly_scheme.find1<int>("asym_id"_key == r.get<std::string>("label_asym_id") and "pdb_seq_num"_key == resSeq, "auth_seq_num");
+				// else if (type == "polymer")
+				// 	resSeq = pdbx_poly_seq_scheme.find1<int>("asym_id"_key == r.get<std::string>("label_asym_id") and "pdb_seq_num"_key == resSeq, "auth_seq_num");
+			}
+			catch (const std::exception &ex)
+			{
+				std::cerr << "Oops, there was not exactly one entity with id " << entity_id << '\n';
+			}
+		}
+		
 		if (chainID.length() > 1)
 			throw std::runtime_error("Chain ID " + chainID + " won't fit into a PDB file");

@@ -3446,8 +3476,7 @@ std::tuple<int, int> WriteCoordinatesForModel(std::ostream &pdbFile, const datab
 		if (charge != 0)
 			sCharge = std::to_string(charge) + (charge > 0 ? '+' : '-');

-		pdbFile << cif::format("{:<6.6s}{:5} {:<4.4s}{:1.1s}{:3.3s} {:1.1s}{:4}{:1.1s}   {:8.3f}{:8.3f}{:8.3f}{:6.2f}{:6.2f}          {:>2.2s}{:2.2s}",
-			group, serial, name, altLoc, resName, chainID, resSeq, iCode, x, y, z, occupancy, tempFactor, element, sCharge) << '\n';
+		pdbFile << cif::format("%-6.6s%5d %-4.4s%1.1s%3.3s %1.1s%4d%1.1s   %8.3f%8.3f%8.3f%6.2f%6.2f          %2.2s%2.2s", group, serial, name, altLoc, resName, chainID, resSeq, iCode, x, y, z, occupancy, tempFactor, element, sCharge) << '\n';

 		++numCoord;

@@ -3462,7 +3491,7 @@ std::tuple<int, int> WriteCoordinatesForModel(std::ostream &pdbFile, const datab
 			tie(u11, u22, u33, u12, u13, u23) =
 				ai.get("U[1][1]", "U[2][2]", "U[3][3]", "U[1][2]", "U[1][3]", "U[2][3]");

-			pdbFile << cif::format("ANISOU{:5} {:<4.4s}{:1.1s}{:3.3s} {:1.1s}{:4}{:1.1s} {:7}{:7}{:7}{:7}{:7}{:7}      {:2.2s}{:2.2s}", serial, name, altLoc, resName, chainID, resSeq, iCode, std::lrintf(u11 * 10000), std::lrintf(u22 * 10000), std::lrintf(u33 * 10000), std::lrintf(u12 * 10000), std::lrintf(u13 * 10000), std::lrintf(u23 * 10000), element, sCharge) << '\n';
+			pdbFile << cif::format("ANISOU%5d %-4.4s%1.1s%3.3s %1.1s%4d%1.1s %7d%7d%7d%7d%7d%7d      %2.2s%2.2s", serial, name, altLoc, resName, chainID, resSeq, iCode, std::lrintf(u11 * 10000), std::lrintf(u22 * 10000), std::lrintf(u33 * 10000), std::lrintf(u12 * 10000), std::lrintf(u13 * 10000), std::lrintf(u23 * 10000), element, sCharge) << '\n';
 		}

 		++serial;
@@ -3514,7 +3543,7 @@ std::tuple<int, int> WriteCoordinate(std::ostream &pdbFile, const datablock &db)
 		for (int model_nr : models)
 		{
 			if (models.size() > 1)
-				pdbFile << cif::format("MODEL     {:4}",  model_nr) << '\n';
+				pdbFile << cif::format("MODEL     %4d",  model_nr) << '\n';

 			std::set<std::string> TERminatedChains;
 			auto n = WriteCoordinatesForModel(pdbFile, db, last_resseq_for_chain_map, TERminatedChains, model_nr);
@@ -3586,7 +3615,7 @@ std::string get_HEADER_line(const datablock &db, std::string::size_type truncate
 		}
 	}

-	return FixStringLength(cif::format("HEADER    {:<40.40s}{:<9.9s}   {:<4.4s}", keywords, date, db.name()), truncate_at);
+	return FixStringLength(cif::format("HEADER    %-40.40s%-9.9s   %-4.4s", keywords, date, db.name()).str(), truncate_at);
 }

 std::string get_COMPND_line(const datablock &db, std::string::size_type truncate_at)
@@ -3759,7 +3788,7 @@ void write(std::ostream &os, const datablock &db)
 	numXform = WriteCoordinateTransformation(os, db);
 	std::tie(numCoord, numTer) = WriteCoordinate(os, db);

-	os << cif::format("MASTER    {:5}    0{:5}{:5}{:5}{:5}{:5}{:5}{:5}{:5}{:5}{:5}",  numRemark,  numHet,  numHelix,  numSheet,  numTurn,  numSite,  numXform,  numCoord,  numTer,  numConect,  numSeq) << '\n'
+	os << cif::format("MASTER    %5d    0%5d%5d%5d%5d%5d%5d%5d%5d%5d%5d",  numRemark,  numHet,  numHelix,  numSheet,  numTurn,  numSite,  numXform,  numCoord,  numTer,  numConect,  numSeq) << '\n'
 			<< "END\n";
 }

--- a/src/pdb/pdb2cif.cpp
+++ b/src/pdb/pdb2cif.cpp
@@ -32,7 +32,6 @@
 #include <map>
 #include <set>
 #include <stack>
-#include <stdexcept>

 using cif::category;
 using cif::datablock;
@@ -896,7 +895,12 @@ class PDBFileParser
 				if (year < 1950)
 					year += 100;

-				s = cif::format("{:04}-{:02}-{:02}", year, month, day);
+				std::stringstream ss;
+				ss << std::setw(4) << std::setfill('0') << year << '-'
+				   << std::setw(2) << std::setfill('0') << month << '-'
+				   << std::setw(2) << std::setfill('0') << day;
+
+				s = ss.str();
 			}
 			else if (regex_match(s, m, rx2))
 			{
@@ -908,7 +912,7 @@ class PDBFileParser
 				if (year < 1950)
 					year += 100;

-				s = cif::format("{:04}-{:02}", year, month);
+				s = cif::format("%04d-%02d", year, month).str();
 			}
 			else
 				ec = error::make_error_code(error::pdbErrors::invalidDate);
@@ -3337,18 +3341,18 @@ void PDBFileParser::ParseRemark350()
 								{ "type", type },
 								// { "name", "" },
 							    // { "symmetryOperation", "" },
-								{ "matrix[1][1]", cif::format("{:12.10f}", mat[0]) },
-								{ "matrix[1][2]", cif::format("{:12.10f}", mat[1]) },
-								{ "matrix[1][3]", cif::format("{:12.10f}", mat[2]) },
-								{ "vector[1]", cif::format("{:12.10f}", vec[0]) },
-								{ "matrix[2][1]", cif::format("{:12.10f}", mat[3]) },
-								{ "matrix[2][2]", cif::format("{:12.10f}", mat[4]) },
-								{ "matrix[2][3]", cif::format("{:12.10f}", mat[5]) },
-								{ "vector[2]", cif::format("{:12.10f}", vec[1]) },
-								{ "matrix[3][1]", cif::format("{:12.10f}", mat[6]) },
-								{ "matrix[3][2]", cif::format("{:12.10f}", mat[7]) },
-								{ "matrix[3][3]", cif::format("{:12.10f}", mat[8]) },
-								{ "vector[3]", cif::format("{:12.10f}", vec[2]) }
+								{ "matrix[1][1]", cif::format("%12.10f", mat[0]).str() },
+								{ "matrix[1][2]", cif::format("%12.10f", mat[1]).str() },
+								{ "matrix[1][3]", cif::format("%12.10f", mat[2]).str() },
+								{ "vector[1]", cif::format("%12.10f", vec[0]).str() },
+								{ "matrix[2][1]", cif::format("%12.10f", mat[3]).str() },
+								{ "matrix[2][2]", cif::format("%12.10f", mat[4]).str() },
+								{ "matrix[2][3]", cif::format("%12.10f", mat[5]).str() },
+								{ "vector[2]", cif::format("%12.10f", vec[1]).str() },
+								{ "matrix[3][1]", cif::format("%12.10f", mat[6]).str() },
+								{ "matrix[3][2]", cif::format("%12.10f", mat[7]).str() },
+								{ "matrix[3][3]", cif::format("%12.10f", mat[8]).str() },
+								{ "vector[3]", cif::format("%12.10f", vec[2]).str() }
 							});
 																			// clang-format on

@@ -4314,7 +4318,7 @@ void PDBFileParser::ConstructEntities()
 	}

 	// build sugar trees first
-	// ConstructSugarTrees(asymNr);
+	ConstructSugarTrees(asymNr);

 	// done with the sugar, resume operation as before

@@ -5768,9 +5772,6 @@ void PDBFileParser::ParseCoordinate(int modelNr)
 		std::string element = vS(77, 78);    //	77 - 78        LString(2)    element      Element symbol, right-justified.
 		std::string charge = vS(79, 80);     //	79 - 80        LString(2)    charge       Charge  on the atom.

-		if (element.empty())
-			throw std::runtime_error("Empty element column in PDB file at line " + std::to_string(mRec->mLineNr));
-
 		std::string entityID = mAsymID2EntityID[asymID];

 		charge = pdb2cifCharge(charge);
@@ -5849,7 +5850,7 @@ void PDBFileParser::ParseCoordinate(int modelNr)

 			auto f = [](float f) -> std::string
 			{
-				return cif::format("{:6.4f}", f);
+				return cif::format("%6.4f", f).str();
 			};

 			// clang-format off
@@ -6405,7 +6406,7 @@ file read(std::istream &is)
 		if (std::isalpha(ch) and std::toupper(ch) != 'D')
 		{
 			read_pdb_file(is, result);
-			fixup_pdbx(result);
+			reconstruct_pdbx(result);
 		}
 		else
 		{
@@ -6441,14 +6442,8 @@ file read(std::istream &is)
 	}

 	// Must be a PDB like file, right?
-	if (not result.empty())
-	{
-		auto &db = result.front();
-		if (db.get_validator() == nullptr)
-			db.set_validator(&validator_factory::instance().get("mmcif_pdbx.dic"));
-		if (db.is_valid())
-			db.get_validator()->fill_audit_conform(db["audit_conform"]);
-	}
+	if (not result.empty() and result.front().get_validator() == nullptr)
+		result.front().set_validator(&validator_factory::instance().get("mmcif_pdbx.dic"));

 	return result;
 }
--- a/src/pdb/reconstruct.cpp
+++ b/src/pdb/reconstruct.cpp
@@ -25,8 +25,6 @@
 */

 #include "cif++.hpp"
-#include "cif++/compound.hpp"
-#include "cif++/row.hpp"

 // --------------------------------------------------------------------

@@ -140,15 +138,15 @@ void checkEntities(datablock &db)
 				auto compound = cf.create(comp_id);
 				if (compound)
 					formula_weight += compound->formula_weight();
-				// else if (cif::VERBOSE > 0)
-				// 	std::clog << "missing information for compound " + comp_id << '\n';
+				else if (cif::VERBOSE > 0)
+					std::clog << "missing information for compound " + comp_id << '\n';
 				++n;
 			}

-			formula_weight -= (n - 1) * 18.015f;
+			formula_weight -= (n - 1) * 18.015;
 		}
 		else if (type == "water")
-			formula_weight = 18.015f;
+			formula_weight = 18.015;
 		else if (type == "branched")
 		{
 			int n = 0;
@@ -158,12 +156,12 @@ void checkEntities(datablock &db)
 				auto compound = cf.create(comp_id);
 				if (compound)
 					formula_weight += compound->formula_weight();
-				// else if (cif::VERBOSE > 0)
-				// 	std::clog << "missing information for compound " + comp_id << '\n';
+				else if (cif::VERBOSE > 0)
+					std::clog << "missing information for compound " + comp_id << '\n';
 				++n;
 			}

-			formula_weight -= (n - 1) * 18.015f;
+			formula_weight -= (n - 1) * 18.015;
 		}
 		else if (type == "non-polymer")
 		{
@@ -173,7 +171,7 @@ void checkEntities(datablock &db)
 				auto compound = cf.create(*comp_id);
 				if (not compound)
 				{
-					// std::cerr << "missing information for compound " << *comp_id << "\n";
+					std::cerr << "missing information for compound " << *comp_id << "\n";
 					continue;
 				}
 				formula_weight = compound->formula_weight();
@@ -200,28 +198,26 @@ void createEntityIDs(datablock &db)
 	auto &atom_site = db["atom_site"];
 	auto &cf = compound_factory::instance();

-	std::vector<std::vector<row_handle>> entities;
+	std::vector<std::vector<residue_key_type>> entities;

 	std::string lastAsymID;
 	int lastSeqID = -1;
-	std::vector<row_handle> waters;
+	std::vector<residue_key_type> waters;

-	for (auto rh : atom_site)
-	{
-		residue_key_type k = rh.get<std::optional<std::string>,
+	for (residue_key_type k : atom_site.rows<std::optional<std::string>,
 			 std::optional<int>,
 			 std::optional<std::string>,
 			 std::optional<std::string>,
 			 std::optional<int>,
 			 std::optional<std::string>>(
 			 "auth_asym_id", "auth_seq_id", "auth_comp_id",
-			 "label_asym_id", "label_seq_id", "label_comp_id");
-
+			 "label_asym_id", "label_seq_id", "label_comp_id"))
+	{
 		std::string comp_id = get_comp_id(k);

 		if (cf.is_water(comp_id))
 		{
-			waters.emplace_back(rh);
+			waters.emplace_back(k);
 			continue;
 		}

@@ -236,7 +232,7 @@ void createEntityIDs(datablock &db)
 		if (asym_id != lastAsymID or (not is_monomer and lastSeqID != seq_id))
 			entities.push_back({});

-		entities.back().emplace_back(rh);
+		entities.back().emplace_back(k);

 		lastAsymID = asym_id;
 		lastSeqID = seq_id;
@@ -263,17 +259,20 @@ void createEntityIDs(datablock &db)

 	for (std::size_t ix = 0; auto &e : entities)
 	{
+		auto k = e.front();
 		const auto &entity_id = entity_ids[ix++];

-		for (auto rh : e)
-			rh["label_entity_id"] = entity_id;
+		std::string comp_id = get_comp_id(k);
+
+		for (auto &k : e)
+			atom_site.update_value(get_condition(k), "label_entity_id", entity_id);
 	}

 	if (not waters.empty())
 	{
 		std::string waterEntityID = std::to_string(entities.size() + 1);
-		for (auto rh : waters)
-			rh["label_entity_id"] = waterEntityID;
+		for (auto &k : waters)
+			atom_site.update_value(get_condition(k), "label_entity_id", waterEntityID);
 	}
 }

@@ -320,7 +319,7 @@ void fillLabelAsymID(category &atom_site)
 		{
 			if (not mapAuthAsymIDAndEntityToLabelAsymID.contains(key))
 			{
-				std::string asym_id = cif_id_for_number(static_cast<int>(mapAuthAsymIDAndEntityToLabelAsymID.size()));
+				std::string asym_id = cif_id_for_number(mapAuthAsymIDAndEntityToLabelAsymID.size());
 				mapAuthAsymIDAndEntityToLabelAsymID[key] = asym_id;
 			}
 		}
@@ -439,39 +438,10 @@ void checkAtomRecords(datablock &db)
 	// And negative seq_id values
 	if (atom_site.contains(key("label_seq_id") < 0))
 		fixNegativeSeqID(atom_site);
-	
-	std::set<std::string> polymer_entities;
-	if (db["entity"].empty())
-	{
-		// No entity, so we have to guess the types based on the content of atom_site

-		std::string last_entity_id;
-		std::optional<int> last_label_seq_id, last_auth_seq_id;
-
-		std::set<std::string> entityIDs;
-		for (auto &[entity_id, label_comp_id, label_seq_id, auth_comp_id, auth_seq_id] :
-			atom_site.rows<std::string, std::string, std::optional<int>, std::string, std::optional<int>>(
-				"label_entity_id", "label_comp_id", "label_seq_id", "auth_comp_id", "auth_seq_id"))
-		{
-			if (cf.is_water(label_comp_id) or cf.is_water(auth_comp_id))
-				continue;
-
-			if (polymer_entities.contains(entity_id))
-				continue;
-
-			if (last_entity_id == entity_id and (last_label_seq_id != label_seq_id or last_auth_seq_id != auth_seq_id))
-				polymer_entities.emplace(entity_id);
-
-			last_entity_id = entity_id;
-			last_label_seq_id = label_seq_id;
-			last_auth_seq_id = auth_seq_id;
-		}
-	}
-	else
-	{
-		for (std::string id : db["entity"].find<std::string>("type"_key == "polymer", "id"))
-			polymer_entities.insert(id);
-	}
+	std::set<int> polymer_entities;
+	for (int id : db["entity"].find<int>("type"_key == "polymer", "id"))
+		polymer_entities.insert(id);

 	std::set<std::string> missingCompounds;

@@ -508,14 +478,13 @@ void checkAtomRecords(datablock &db)
 		if (missingCompounds.contains(comp_id))
 			continue;

-		bool is_polymer = polymer_entities.contains(row["label_entity_id"].as<std::string>());
+		bool is_polymer = polymer_entities.contains(row["label_entity_id"].as<int>());
 		auto compound = cf.create(comp_id);

 		if (not compound)
 		{
 			missingCompounds.insert(comp_id);
-			// if (cif::VERBOSE > 0)
-			// 	std::cerr << "Missing compound information for " << comp_id << "\n";
+			std::cerr << "Missing compound information for " << comp_id << "\n";
 			continue;
 		}

@@ -562,24 +531,18 @@ void checkAtomRecords(datablock &db)
 		if (is_polymer and row["label_seq_id"].empty() and cf.is_monomer(comp_id))
 			row["label_seq_id"] = std::to_string(seq_id);

-		if (row["label_asym_id"].empty())
-			row["label_asym_id"] = row["auth_asym_id"].text();
-		else if (row["auth_asym_id"].empty())
-			row["auth_asym_id"] = row["label_asym_id"].text();
-
-		if (row["label_comp_id"].empty())
-			row["label_comp_id"] = row["auth_comp_id"].text();
-		else if (row["auth_comp_id"].empty())
-			row["auth_comp_id"] = row["label_comp_id"].text();
-
 		if (row["label_atom_id"].empty())
 			row["label_atom_id"] = row["auth_atom_id"].text();
-		else if (row["auth_atom_id"].empty())
-			row["auth_atom_id"] = row["label_atom_id"].text();
-		
+		if (row["label_asym_id"].empty())
+			row["label_asym_id"] = row["auth_asym_id"].text();
+		if (row["label_comp_id"].empty())
+			row["label_comp_id"] = row["auth_comp_id"].text();
+		if (row["label_atom_id"].empty())
+			row["label_atom_id"] = row["auth_atom_id"].text();
+
 		// Rewrite the coordinates and other items that look better in a fixed format
 		// Be careful not to nuke invalidly formatted data here
-		for (auto [item_name, prec] : std::vector<std::tuple<std::string_view, int>>{
+		for (auto [item_name, prec] : std::vector<std::tuple<std::string_view, std::string::size_type>>{
 				 { "cartn_x", 3 },
 				 { "cartn_y", 3 },
 				 { "cartn_z", 3 },
@@ -594,11 +557,11 @@ void checkAtomRecords(datablock &db)
 			if (auto [ptr, ec] = cif::from_chars(s.data(), s.data() + s.length(), v); (bool)ec)
 				continue;

-			if (s.length() < prec + 1UL or s[s.length() - prec - 1] != '.')
+			if (s.length() < prec + 1 or s[s.length() - prec - 1] != '.')
 			{
 				char b[12];

-				if (auto [ptr, ec] = cif::to_chars(b, b + sizeof(b), v, cif::chars_format::fixed, prec); ec == std::errc{})
+				if (auto [ptr, ec] = cif::to_chars(b, b + sizeof(b), v, cif::chars_format::fixed, prec); (bool)ec)
 					row.assign(item_name, { b, static_cast<std::string::size_type>(ptr - b) }, false, false);
 			}
 		}
@@ -640,25 +603,20 @@ void checkAtomAnisotropRecords(datablock &db)

 	std::vector<row_handle> to_be_deleted;

-	std::map<int, row_handle> atoms;
-	for (auto rh : atom_site)
-		atoms[rh.get<int>("id")] = rh;
-
 	bool warnReplaceTypeSymbol = true;
 	for (auto row : atom_site_anisotrop)
 	{
-		auto ai = atoms.find(row.get<int>("id"));
-
-		if (ai == atoms.end())
+		auto parents = atom_site_anisotrop.get_parents(row, atom_site);
+		if (parents.size() != 1)
 		{
 			to_be_deleted.emplace_back(row);
 			continue;
 		}

-		auto parent = ai->second;
-
 		// this happens sometimes (Phenix):

+		auto parent = parents.front();
+
 		if (row["type_symbol"].empty())
 			row["type_symbol"] = parent["type_symbol"].text();
 		else if (row["type_symbol"].text() != parent["type_symbol"].text())
@@ -670,14 +628,16 @@ void checkAtomAnisotropRecords(datablock &db)

 		if (row["pdbx_auth_alt_id"].empty() and not parent["pdbx_auth_alt_id"].empty())
 			row["pdbx_auth_alt_id"] = parent["pdbx_auth_alt_id"].text();
-		if (row["pdbx_label_seq_id"].empty() and not parent["label_seq_id"].empty())
+		if (row["pdbx_label_seq_id"].empty() and not parent["pdbx_label_seq_id"].empty())
 			row["pdbx_label_seq_id"] = parent["label_seq_id"].text();
-		if (row["pdbx_label_asym_id"].empty() and not parent["label_asym_id"].empty())
+		if (row["pdbx_label_asym_id"].empty() and not parent["pdbx_label_asym_id"].empty())
 			row["pdbx_label_asym_id"] = parent["label_asym_id"].text();
-		if (row["pdbx_label_atom_id"].empty() and not parent["label_atom_id"].empty())
+		if (row["pdbx_label_atom_id"].empty() and not parent["pdbx_label_atom_id"].empty())
 			row["pdbx_label_atom_id"] = parent["label_atom_id"].text();
-		if (row["pdbx_label_comp_id"].empty() and not parent["label_comp_id"].empty())
+		if (row["pdbx_label_comp_id"].empty() and not parent["pdbx_label_comp_id"].empty())
 			row["pdbx_label_comp_id"] = parent["label_comp_id"].text();
+		// if (row["pdbx_PDB_model_num"].empty() and not parent["pdbx_PDB_model_num"].empty())
+		// 	row["pdbx_PDB_model_num"] = parent["pdbx_PDB_model_num"].text();
 	}

 	if (not to_be_deleted.empty())
@@ -762,7 +722,7 @@ void createEntity(datablock &db)

 		std::string type, desc;
 		float weight = 0;
-		size_t count = 0;
+		int count = 0;

 		auto first_comp_id = std::get<0>(content.front());

@@ -777,11 +737,8 @@ void createEntity(datablock &db)
 			auto c = cf.create(first_comp_id);

 			type = "non-polymer";
-			if (c)
-			{
-				desc = c->name();
-				weight = c->formula_weight();
-			}
+			desc = c->name();
+			weight = c->formula_weight();
 		}
 		else
 		{
@@ -852,28 +809,28 @@ void createEntityPoly(datablock &db)
 			if (type != "other")
 			{
 				std::string c_type;
-				if (auto i = compound_factory::kBaseMap.find(comp_id); i != compound_factory::kBaseMap.end())
+				if (cf.is_base(comp_id))
 				{
 					c_type = "polydeoxyribonucleotide";
-
-					letter_can = i->second;
-
+					letter_can = compound_factory::kBaseMap.at(comp_id);
 					if (comp_id.length() == 1)
 						letter = letter_can;
 					else
-						letter = '(' + comp_id + ')';
+						letter = '(' + letter_can + ')';
 				}
-				else if (auto i = compound_factory::kAAMap.find(comp_id); i != compound_factory::kAAMap.end())
+				else if (cf.is_peptide(comp_id))
 				{
 					c_type = "polypeptide(L)";
-
-					letter = letter_can = i->second;
+					letter = letter_can = compound_factory::kAAMap.at(comp_id);
 				}
 				else if (iequals(c->type(), "D-PEPTIDE LINKING"))
 				{
 					c_type = "polypeptide(D)";

 					letter_can = c->one_letter_code();
+					if (letter_can == 0)
+						letter_can = 'X';
+
 					letter = '(' + comp_id + ')';

 					non_std_linkage = true;
@@ -884,6 +841,9 @@ void createEntityPoly(datablock &db)
 					c_type = "polypeptide(L)";

 					letter_can = c->one_letter_code();
+					if (letter_can == 0)
+						letter_can = 'X';
+
 					letter = '(' + comp_id + ')';

 					non_std_monomer = true;
@@ -893,6 +853,9 @@ void createEntityPoly(datablock &db)
 					// c_type = "other";

 					letter_can = c->one_letter_code();
+					if (letter_can == 0)
+						letter_can = 'X';
+
 					letter = '(' + comp_id + ')';

 					non_std_monomer = true;
@@ -905,7 +868,7 @@ void createEntityPoly(datablock &db)
 			}

 			seq[auth_asym_id] += letter;
-			seq_can[auth_asym_id] += letter_can ? letter_can : 'X';
+			seq_can[auth_asym_id] += letter_can;

 			if (find(pdb_strand_ids.begin(), pdb_strand_ids.end(), auth_asym_id) == pdb_strand_ids.end())
 				pdb_strand_ids.emplace_back(auth_asym_id);
@@ -1204,7 +1167,7 @@ void createPdbxNonpolyScheme(datablock &db)
 		for (int ndb_nr = 1; auto row : atom_site.find("label_entity_id"_key == entity_id and "label_comp_id"_key == comp_id))
 		{
 			// Skip existing records
-			auto linked = atom_site.get_children(row, pdbx_nonpoly_scheme);
+			auto linked = atom_site.get_linked(row, pdbx_nonpoly_scheme);
 			if (not linked.empty())
 				continue;

@@ -1279,101 +1242,6 @@ void createPdbxBranchScheme(datablock &db)
 	}
 }

-void reconstruct_index_for_category(const validator &validator, category &cat, datablock &db)
-{
-	auto cv = validator.get_validator_for_category(cat.name());
-
-	enum class State
-	{
-		Start,
-		MissingKeys,
-		DuplicateKeys
-	} state = State::Start;
-
-	for (;;)
-	{
-		// See if we can build an index
-		try
-		{
-			cat.set_validator(&validator, db);
-		}
-		catch (const missing_key_error &ex)
-		{
-			if (state == State::MissingKeys)
-			{
-				if (cif::VERBOSE > 0)
-					std::clog << "Repairing failed for category " << cat.name() << ", missing keys remain: " << ex.what() << '\n';
-
-				throw;
-			}
-
-			state = State::MissingKeys;
-
-			auto key = ex.get_key();
-
-			if (cif::VERBOSE > 1)
-				std::clog << "Need to add key " << key << " to category " << cat.name() << '\n';
-
-			for (auto row : cat)
-			{
-				auto ord = row.get<std::string>(key.c_str());
-				if (ord.empty())
-					row.assign({ //
-						{ key, cat.get_unique_value(key) } });
-			}
-
-			continue;
-		}
-		catch (const duplicate_key_error &ex)
-		{
-			if (state == State::DuplicateKeys)
-			{
-				if (cif::VERBOSE > 0)
-					std::clog << "Repairing failed for category " << cat.name() << ", duplicate keys remain: " << ex.what() << '\n';
-
-				throw;
-			}
-
-			state = State::DuplicateKeys;
-
-			if (cif::VERBOSE > 0)
-				std::clog << "Attempt to fix " << cat.name() << " failed: " << ex.what() << '\n';
-
-			// replace items that do not define a relation to a parent
-
-			std::set<std::string> replaceableKeys;
-			for (auto key : cv->m_keys)
-			{
-				bool replaceable = true;
-				for (auto lv : validator.get_links_for_child(cat.name()))
-				{
-					if (find(lv->m_child_keys.begin(), lv->m_child_keys.end(), key) != lv->m_child_keys.end())
-					{
-						replaceable = false;
-						break;
-					}
-				}
-
-				if (replaceable)
-					replaceableKeys.insert(key);
-			}
-
-			if (replaceableKeys.empty())
-				throw std::runtime_error("Cannot repair category " + cat.name() + " since it contains duplicate keys that cannot be replaced");
-
-			for (auto key : replaceableKeys)
-			{
-				for (auto row : cat)
-					row.assign(key, cat.get_unique_value(key), false, false);
-			}
-
-			continue;
-		}
-
-		break;
-	}
-}
-
 bool reconstruct_pdbx(file &file)
 {
 	if (file.empty())
@@ -1469,7 +1337,7 @@ bool reconstruct_pdbx(file &file, const validator &validator)
 				              iv->m_type != nullptr and
 				              iv->m_type->m_primitive_type == cif::DDL_PrimitiveType::Numb;

-				for (int ix = 0; auto row : cat)
+				for (std::size_t ix = 0; auto row : cat)
 				{
 					if (number)
 						row.assign(key, std::to_string(++ix), false, false);
@@ -1540,7 +1408,95 @@ bool reconstruct_pdbx(file &file, const validator &validator)
 				}
 			}

-			reconstruct_index_for_category(validator, cat, db);
+			enum class State
+			{
+				Start,
+				MissingKeys,
+				DuplicateKeys
+			} state = State::Start;
+
+			for (;;)
+			{
+				// See if we can build an index
+				try
+				{
+					cat.set_validator(&validator, db);
+				}
+				catch (const missing_key_error &ex)
+				{
+					if (state == State::MissingKeys)
+					{
+						if (cif::VERBOSE > 0)
+							std::clog << "Repairing failed for category " << cat.name() << ", missing keys remain: " << ex.what() << '\n';
+
+						throw;
+					}
+
+					state = State::MissingKeys;
+
+					auto key = ex.get_key();
+
+					if (cif::VERBOSE > 0)
+						std::clog << "Need to add key " << key << " to category " << cat.name() << '\n';
+
+					for (auto row : cat)
+					{
+						auto ord = row.get<std::string>(key.c_str());
+						if (ord.empty())
+							row.assign({ //
+								{ key, cat.get_unique_value(key) } });
+					}
+
+					continue;
+				}
+				catch (const duplicate_key_error &ex)
+				{
+					if (state == State::DuplicateKeys)
+					{
+						if (cif::VERBOSE > 0)
+							std::clog << "Repairing failed for category " << cat.name() << ", duplicate keys remain: " << ex.what() << '\n';
+
+						throw;
+					}
+
+					state = State::DuplicateKeys;
+
+					if (cif::VERBOSE > 0)
+						std::clog << "Attempt to fix " << cat.name() << " failed: " << ex.what() << '\n';
+
+					// replace items that do not define a relation to a parent
+
+					std::set<std::string> replaceableKeys;
+					for (auto key : cv->m_keys)
+					{
+						bool replaceable = true;
+						for (auto lv : validator.get_links_for_child(cat.name()))
+						{
+							if (find(lv->m_child_keys.begin(), lv->m_child_keys.end(), key) != lv->m_child_keys.end())
+							{
+								replaceable = false;
+								break;
+							}
+						}
+
+						if (replaceable)
+							replaceableKeys.insert(key);
+					}
+
+					if (replaceableKeys.empty())
+						throw std::runtime_error("Cannot repair category " + cat.name() + " since it contains duplicate keys that cannot be replaced");
+
+					for (auto key : replaceableKeys)
+					{
+						for (auto row : cat)
+							row.assign(key, cat.get_unique_value(key), false, false);
+					}
+
+					continue;
+				}
+
+				break;
+			}
 		}
 		catch (const std::exception &ex)
 		{
@@ -1581,7 +1537,7 @@ bool reconstruct_pdbx(file &file, const validator &validator)

 	if (auto cat = db.get("ndb_poly_seq_scheme"); cat == nullptr or cat->empty())
 		comparePolySeqSchemes(db);
-
+	
 	createPdbxNonpolyScheme(db);

 	// Create a minimal set of branch records
@@ -1621,16 +1577,11 @@ void fixup_pdbx(file &file, const validator &validator)
 	// assuming the first datablock contains the entry ...
 	auto &db = file.front();

-	if (auto cat = db.get("atom_site"); cat == nullptr or cat->empty())
-		throw std::runtime_error("Cannot reconstruct PDBx file, atom data missing");
-
 	// ... and any additional datablock will contain compound information
 	cif::compound_source cs(file);

-	// Be silent about missing compound info in fixup
-	auto &cf = compound_factory::instance();
-	bool save_report = cf.get_report_missing();
-	cf.set_report_missing(cif::VERBOSE > 1);
+	if (auto cat = db.get("atom_site"); cat == nullptr or cat->empty())
+		throw std::runtime_error("Cannot reconstruct PDBx file, atom data missing");

 	std::string entry_id;

@@ -1659,24 +1610,11 @@ void fixup_pdbx(file &file, const validator &validator)
 	if (not db["atom_site"].find_first(key("label_entity_id") != null))
 		createEntityIDs(db);

-	// Now see if atom records make sense at all, but in a silent way, this time
+	// Now see if atom records make sense at all
 	checkAtomRecords(db);

 	db["chem_comp"].reorder_by_index();

-	// See if we can easily reconstruct missing data fields in order to create an index
-	for (auto &cat : db)
-	{
-		try
-		{
-			cat.set_validator(&validator, db);
-		}
-		catch (const missing_key_error &)
-		{
-			reconstruct_index_for_category(validator, cat, db);
-		}
-	}
-
 	db.set_validator(&validator);

 	// Now create any missing categories
@@ -1692,7 +1630,7 @@ void fixup_pdbx(file &file, const validator &validator)

 	if (auto cat = db.get("ndb_poly_seq_scheme"); cat == nullptr or cat->empty())
 		comparePolySeqSchemes(db);
-
+	
 	createPdbxNonpolyScheme(db);

 	// Create a minimal set of branch records
@@ -1702,7 +1640,6 @@ void fixup_pdbx(file &file, const validator &validator)
 	checkEntities(db);

 	// That's it
-	cf.set_report_missing(save_report);
 }

 } // namespace cif::pdb
--- a/src/pdb/validate-pdbx.cpp
+++ b/src/pdb/validate-pdbx.cpp
@@ -61,6 +61,8 @@ condition get_parents_condition(const validator &validator, row_handle rh, const
 			result = std::move(result) or std::move(cond);
 		}
 	}
+	else if (cif::VERBOSE > 0)
+		std::cerr << "warning: no child to parent links were found for child " << childName << " and parent " << parentName << '\n';

 	return result;
 }
@@ -69,7 +71,7 @@ bool is_valid_pdbx_file(const file &file, const validator &v)
 {
 	std::error_code ec;
 	bool result = is_valid_pdbx_file(file, v, ec);
-	return result and not(bool) ec;
+	return result and not (bool)ec;
 }

 bool is_valid_pdbx_file(const file &file, std::error_code &ec)
@@ -82,7 +84,7 @@ bool is_valid_pdbx_file(const file &file, std::error_code &ec)
 		result = is_valid_pdbx_file(file, validator_factory::instance().get(*ac), ec);
 	else
 		result = is_valid_pdbx_file(file, validator_factory::instance().get("mmcif_pdbx.dic"), ec);
-
+	
 	return result;
 }

@@ -90,7 +92,7 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
 {
 	using namespace cif::literals;

-	bool result = true, warned_missing_parents = false;
+	bool result = true;

 	try
 	{
@@ -127,18 +129,10 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
 			if (not cf.is_monomer(comp_id))
 				continue;

-			auto cond = get_parents_condition(validator, r, pdbx_poly_seq_scheme);
-			if (not cond)
-			{
-				if (VERBOSE > 0 and std::exchange(warned_missing_parents, true) == false)
-					std::cerr << "warning: missing links for atom_site/pdbx_poly_seq_scheme\n";
-				continue;
-			}
-
-			auto p = pdbx_poly_seq_scheme.find(std::move(cond));
+			auto p = pdbx_poly_seq_scheme.find(get_parents_condition(validator, r, pdbx_poly_seq_scheme));
 			if (p.size() != 1)
 			{
-				if (VERBOSE > 0)
+				if (cif::VERBOSE > 0)
 					std::clog << "In atom_site record: " << r["id"].text() << '\n';
 				throw std::runtime_error("For each monomer in atom_site there should be exactly one pdbx_poly_seq_scheme record");
 			}
@@ -167,7 +161,7 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error

 			const auto entity_poly_type = entity_poly.find1<std::string>("entity_id"_key == entity_id, "type");

-			std::map<int, std::set<std::string>> mon_per_seq_id;
+			std::map<int,std::set<std::string>> mon_per_seq_id;

 			for (const auto &[num, mon_id, hetero] : entity_poly_seq.find<int, std::string, bool>("entity_id"_key == entity_id, "num", "mon_id", "hetero"))
 			{
@@ -202,37 +196,28 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
 					throw std::runtime_error("Mismatch between the hetero flag in the poly seq schemes and the number residues per seq_id");
 			}

-			// This code proved to take too much time ...
-
-			// for (const auto &[seq_id, mon_ids] : mon_per_seq_id)
-			// {
-			// 	for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
-			// 	{
-			// 		condition cond;
-
-			// 		for (auto mon_id : mon_ids)
-			// 			cond = std::move(cond) or "label_comp_id"_key == mon_id;
-
-			// 		cond = "label_entity_id"_key == entity_id and
-			// 			"label_asym_id"_key == asym_id and
-			// 			"label_seq_id"_key == seq_id and not std::move(cond);
-
-			// 		if (atom_site.contains(std::move(cond)))
-			// 			throw std::runtime_error("An atom_site record exists that has no parent in the poly seq scheme categories");
-			// 	}
-			// }
-
-			// ... so we're using this instead, should be almost the same...
-
-			for (const auto &[comp_id, seq_id] :
-				atom_site.find<std::string, int>("label_entity_id"_key == entity_id, "label_comp_id", "label_seq_id"))
+			for (const auto &[seq_id, mon_ids] : mon_per_seq_id)
 			{
-				if (not mon_per_seq_id[seq_id].contains(comp_id))
-					throw std::runtime_error("An atom_site record exists that has no parent in the poly seq scheme categories");
+				for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
+				{
+					condition cond;
+					
+					for (auto mon_id : mon_ids)
+						cond = std::move(cond) or "label_comp_id"_key == mon_id;
+
+					cond = "label_entity_id"_key == entity_id and
+						"label_asym_id"_key == asym_id and
+						"label_seq_id"_key == seq_id and not std::move(cond);
+					
+					if (atom_site.contains(std::move(cond)))
+						throw std::runtime_error("An atom_site record exists that has no parent in the poly seq scheme categories");
+				}
 			}

 			auto &&[seq, seq_can] = entity_poly.find1<std::optional<std::string>, std::optional<std::string>>("entity_id"_key == entity_id,
 				"pdbx_seq_one_letter_code", "pdbx_seq_one_letter_code_can");
+			
+			std::string::const_iterator si, sci, se, sce;

 			auto seq_match = [&](bool can, std::string::const_iterator si, std::string::const_iterator se)
 			{
@@ -269,8 +254,8 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
 							else
 								letter = '(' + comp_id + ')';
 						}
-
-						if (iequals(std::string{ si, si + letter.length() }, letter))
+						
+						if (iequals(std::string{si, si + letter.length()}, letter))
 						{
 							match = true;
 							si += letter.length();
@@ -289,14 +274,12 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error

 			if (not seq.has_value())
 			{
-				if (VERBOSE > 0)
+				if (cif::VERBOSE > 0)
 					std::clog << "Warning: entity_poly has no sequence for entity_id " << entity_id << '\n';
 			}
 			else
 			{
-				seq->erase(std::remove_if(seq->begin(), seq->end(), [](char ch)
-							   { return std::isspace(ch); }),
-					seq->end());
+				seq->erase(std::remove_if(seq->begin(), seq->end(), [](char ch) { return std::isspace(ch); }), seq->end());

 				if (not seq_match(false, seq->begin(), seq->end()))
 					throw std::runtime_error("Sequences do not match for entity " + entity_id);
@@ -304,14 +287,12 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error

 			if (not seq_can.has_value())
 			{
-				if (VERBOSE > 1)
+				if (cif::VERBOSE > 1)
 					std::clog << "Warning: entity_poly has no canonical sequence for entity_id " << entity_id << '\n';
 			}
 			else
 			{
-				seq_can->erase(std::remove_if(seq_can->begin(), seq_can->end(), [](char ch)
-								   { return std::isspace(ch); }),
-					seq_can->end());
+				seq_can->erase(std::remove_if(seq_can->begin(), seq_can->end(), [](char ch) { return std::isspace(ch); }), seq_can->end());

 				if (not seq_match(true, seq_can->begin(), seq_can->end()))
 					throw std::runtime_error("Canonical sequences do not match for entity " + entity_id);
@@ -323,15 +304,16 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
 	catch (const std::exception &ex)
 	{
 		result = false;
-		if (VERBOSE > 0)
+		if (cif::VERBOSE > 0)
 			std::clog << ex.what() << '\n';
 		ec = make_error_code(validation_error::not_valid_pdbx);
 	}

-	if (not result and (bool) ec)
+	if (not result and (bool)ec)
 		ec = make_error_code(validation_error::not_valid_pdbx);

 	return result;
 }

 } // namespace cif::pdb
+  
--- a/src/symmetry.cpp
+++ b/src/symmetry.cpp
@@ -32,11 +32,6 @@

 #include "symop_table_data.hpp"

-#if defined(_MSC_VER)
-#pragma warning (disable : 5054)	// warning C5054: operator '&': deprecated between enumerations of different types
-#pragma warning (disable : 4127)	// conditional expression is constant
-#endif
-
 #include <Eigen/Eigen>

 namespace cif
@@ -95,10 +90,10 @@ float cell::get_volume() const
 	auto cos_beta = std::cos(beta);
 	auto cos_gamma = std::cos(gamma);

-	double vol = m_a * m_b * m_c;
+	auto vol = m_a * m_b * m_c;
 	vol *= std::sqrt(1.0f - cos_alpha * cos_alpha - cos_beta * cos_beta - cos_gamma * cos_gamma + 2.0f * cos_alpha * cos_beta * cos_gamma);

-	return static_cast<float>(vol);
+	return vol;
 }

 // --------------------------------------------------------------------
--- a/src/validate.cpp
+++ b/src/validate.cpp
@@ -36,11 +36,16 @@

 // The validator depends on regular expressions. Unfortunately,
 // the implementation of std::regex in g++ is buggy and crashes
-// on reading the pdbx dictionary. We used to use boost regex
-// instead but using pcre2 is even easier and faster.
+// on reading the pdbx dictionary. Therefore, in case g++ is used
+// the code will use boost::regex instead.

-#define PCRE2_CODE_UNIT_WIDTH 8
-#include <pcre2.h>
+#if USE_BOOST_REGEX
+# include <boost/regex.hpp>
+using boost::regex;
+#else
+# include <regex>
+using std::regex;
+#endif

 namespace cif
 {
@@ -62,57 +67,13 @@ validation_exception::validation_exception(std::error_code ec, std::string_view

 // --------------------------------------------------------------------

-struct regex_impl
+struct regex_impl : public regex
 {
-	regex_impl(std::string_view rx);
-	~regex_impl();
-
-	regex_impl(const regex_impl &) = delete;
-	regex_impl &operator=(const regex_impl &) = delete;
-
-	bool match(std::string_view v) const;
-
-  private:
-	pcre2_code *m_rx = nullptr;
-	pcre2_match_data *m_data = nullptr;
-};
-
-regex_impl::regex_impl(std::string_view rx)
-{
-	int err_code;
-	size_t err_offset;
-	m_rx = pcre2_compile((PCRE2_SPTR)rx.data(), rx.length(), 0, &err_code, &err_offset, nullptr);
-	if (m_rx == nullptr)
+	regex_impl(std::string_view rx)
+		: regex(rx.begin(), rx.end(), regex::extended | regex::optimize)
 	{
-		PCRE2_UCHAR buffer[256];
-		int n = pcre2_get_error_message(err_code, buffer, sizeof(buffer));
-
-		throw std::runtime_error(std::string("PCRE2 compilation failed: ") + std::string{ (char *)buffer, (char *)buffer + n });
 	}
-
-	m_data = pcre2_match_data_create_from_pattern(m_rx, nullptr);
-}
-
-regex_impl::~regex_impl()
-{
-	if (m_data)
-		pcre2_match_data_free(m_data);
-
-	if (m_rx)
-		pcre2_code_free(m_rx);
-}
-
-bool regex_impl::match(std::string_view v) const
-{
-	bool result = false;
-
-	if (int rc = pcre2_match(m_rx, (PCRE2_SPTR)v.data(), v.length(), 0, 0, m_data, nullptr); rc >= 0)
-		result = true;
-	else if (rc != PCRE2_ERROR_NOMATCH)
-		std::cerr << "Error matching with pcre\n";
-
-	return result;
-}
+};

 // --------------------------------------------------------------------

@@ -272,7 +233,7 @@ bool item_validator::validate_value(std::string_view value, std::error_code &ec)

 	if (not value.empty() and value != "?" and value != ".")
 	{
-		if (m_type != nullptr and not m_type->m_rx->match(value))
+		if (m_type != nullptr and not regex_match(value.begin(), value.end(), *m_type->m_rx))
 			ec = make_error_code(validation_error::value_does_not_match_rx);
 		else if (not m_enums.empty() and m_enums.count(std::string{ value }) == 0)
 			ec = make_error_code(validation_error::value_is_not_in_enumeration_list);
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -22,12 +22,12 @@ list(
 	CIFPP_tests
 	unit-v2
 	unit-3d
+	format
 	model
-	query
 	rename-compound
 	sugar
 	spinner
-	reconstruction
+	# reconstruction
 	validate-pdbx
 	)

--- a/test/format-test.cpp
+++ b/test/format-test.cpp
@@ -1,17 +1,17 @@
 /*-
 * SPDX-License-Identifier: BSD-2-Clause
- * 
- * Copyright (c) 2025 NKI/AVL, Netherlands Cancer Institute
- * 
+ *
+ * Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
+ *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
- * 
+ *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
- * 
+ *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -24,31 +24,38 @@
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

+
 #include "test-main.hpp"

+#include <stdexcept>
+
 #include <cif++.hpp>

-#include <iostream>
-#include <fstream>
+// --------------------------------------------------------------------

-TEST_CASE("q-1")
+TEST_CASE("fmt_1")
 {
-	using namespace cif::literals;
+	std::ostringstream os;

-	cif::compound_factory::instance().push_dictionary(gTestDir / "REA.cif");
-
-	cif::file a = cif::pdb::read(gTestDir / "pdb1cbs.ent.gz");
-	auto &pdbx_poly_seq_scheme = a.front()["pdbx_poly_seq_scheme"];
-	REQUIRE_FALSE(pdbx_poly_seq_scheme.empty());
-
-	SECTION("s-11")
-	{
-		CHECK(pdbx_poly_seq_scheme.count("asym_id"_key == "A") == 137);
-
-		CHECK(pdbx_poly_seq_scheme.count("asym_id"_key == "A" and "entity_id"_key == 1 and "seq_id"_key == 1 and "mon_id"_key == "PRO") == 1);
-
-		CHECK(pdbx_poly_seq_scheme.count("asym_id"_key == "A" and "entity_id"_key == 1 and "seq_id"_key == 1 and "mon_id"_key == "PRO" and "hetero"_key == false) == 1);
-	}
-	
+	std::string world("world");
+	os << cif::format("Hello, %-10.10s, the magic number is %d and pi is %g", world, 42, cif::kPI);
+	REQUIRE(os.str() == "Hello, world     , the magic number is 42 and pi is 3.14159");

+	REQUIRE(cif::format("Hello, %-10.10s, the magic number is %d and pi is %g", world, 42, cif::kPI).str() ==
+		"Hello, world     , the magic number is 42 and pi is 3.14159");
 }
+
+// --------------------------------------------------------------------
+
+TEST_CASE("clr_1")
+{
+	using namespace cif::colour;
+
+	std::cout << "Hello, " << cif::coloured("world!", white, red, cif::colour::regular) << '\n'
+			  << "Hello, " << cif::coloured("world!", white, red, bold) << '\n'
+			  << "Hello, " << cif::coloured("world!", black, red) << '\n'
+			  << "Hello, " << cif::coloured("world!", white, green) << '\n'
+			  << "Hello, " << cif::coloured("world!", white, blue) << '\n'
+			  << "Hello, " << cif::coloured("world!", blue, white) << '\n'
+			  << "Hello, " << cif::coloured("world!", red, white, bold) << '\n';
+}
--- a/test/reconstruction-test.cpp
+++ b/test/reconstruction-test.cpp
@@ -28,7 +28,6 @@

 #include <cif++.hpp>

-#include <filesystem>
 #include <iostream>
 #include <fstream>

--- a/test/test-main.hpp
+++ b/test/test-main.hpp
@@ -1,17 +1,17 @@
 /*-
 * SPDX-License-Identifier: BSD-2-Clause
- *
+ * 
 * Copyright (c) 2024 NKI/AVL, Netherlands Cancer Institute
- *
+ * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
- *
+ * 
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
- *
+ * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
--- a/test/unit-3d-test.cpp
+++ b/test/unit-3d-test.cpp
@@ -30,11 +30,6 @@

 #include <cif++.hpp>

-#if defined(_MSC_VER)
-#pragma warning (disable : 5054)	// warning C5054: operator '&': deprecated between enumerations of different types
-#pragma warning (disable : 4127)	// conditional expression is constant
-#endif
-
 #include <Eigen/Eigenvalues>

 // --------------------------------------------------------------------
@@ -301,7 +296,7 @@ TEST_CASE("m2q_0a")
 		auto d = cif::kSymopNrTable[i].symop().data();

 		Eigen::Matrix3f rot;
-		rot << static_cast<float>(d[0]), static_cast<float>(d[1]), static_cast<float>(d[2]), static_cast<float>(d[3]), static_cast<float>(d[4]), static_cast<float>(d[5]), static_cast<float>(d[6]), static_cast<float>(d[7]), static_cast<float>(d[8]);
+		rot << d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7], d[8];

 		// check to see if this matrix contains a true rotation
 		if (rot * rot.transpose() != Eigen::Matrix3f::Identity() or rot.determinant() != 1)
@@ -440,11 +435,11 @@ TEST_CASE("symm_4")

 	// based on 2b8h
 	auto sg = cif::spacegroup(154); // p 32 2 1
-	auto c = cif::cell(107.516f, 107.516f, 338.487f, 90.00f, 90.00f, 120.00f);
+	auto c = cif::cell(107.516, 107.516, 338.487, 90.00, 90.00, 120.00);

-	cif::point a{ -8.688f, 79.351f, 10.439f };  // O6 NAG A 500
-	cif::point b{ -35.356f, 33.693f, -3.236f }; // CG2 THR D 400
-	cif::point sb(-6.916f, 79.34f, 3.236f);     // 4_565 copy of b
+	cif::point a{ -8.688, 79.351, 10.439 };  // O6 NAG A 500
+	cif::point b{ -35.356, 33.693, -3.236 }; // CG2 THR D 400
+	cif::point sb(-6.916, 79.34, 3.236);     // 4_565 copy of b

 	CHECK_THAT(distance(a, sg(a, c, "1_455"_symop)), Catch::Matchers::WithinRel(static_cast<float>(c.get_a()), 0.01f));
 	CHECK_THAT(distance(a, sg(a, c, "1_545"_symop)), Catch::Matchers::WithinRel(static_cast<float>(c.get_b()), 0.01f));
@@ -471,7 +466,7 @@ TEST_CASE("symm_4wvp_1")

 	cif::crystal c(db);

-	cif::point p{ -78.722f, 98.528f, 11.994f };
+	cif::point p{ -78.722, 98.528, 11.994 };
 	auto a = s.get_residue("A", 10, "").get_atom_by_atom_id("O");

 	auto sp1 = c.symmetry_copy(a.get_location(), "2_565"_symop);
--- a/test/unit-v2-test.cpp
+++ b/test/unit-v2-test.cpp
--- a/tools/depends.cmd
+++ b/tools/depends.cmd
@@ -5,8 +5,6 @@ IF NOT EXIST build_ci\libs (
  MKDIR build_ci\libs
 )
 CD build_ci\libs
-
-@REM Install ZLib
 IF NOT EXIST zlib-%ZLIB_VERSION%.zip (
  ECHO Downloading https://github.com/libarchive/zlib/archive/v%ZLIB_VERSION%.zip
  curl -L -o zlib-%ZLIB_VERSION%.zip https://github.com/libarchive/zlib/archive/v%ZLIB_VERSION%.zip || EXIT /b 1
@@ -16,9 +14,9 @@ IF NOT EXIST zlib-%ZLIB_VERSION% (
  C:\windows\system32\tar.exe -x -f zlib-%ZLIB_VERSION%.zip || EXIT /b 1
 )
 CD zlib-%ZLIB_VERSION%
-cmake -B build || EXIT /b 1
-cmake --build build --target ALL_BUILD --config Release || EXIT /b 1
-cmake --build build --target RUN_TESTS --config Release || EXIT /b 1
-cmake --build build --target INSTALL --config Release || EXIT /b 1
+cmake -G "Visual Studio 17 2022" . || EXIT /b 1
+cmake --build . --target ALL_BUILD --config Release || EXIT /b 1
+cmake --build . --target RUN_TESTS --config Release || EXIT /b 1
+cmake --build . --target INSTALL --config Release || EXIT /b 1

@EXIT /b 0