Compare commits

...

17 Commits

Author SHA1 Message Date
Maarten L. Hekkelman
cb02969604 Using std::format 2025-08-25 16:31:00 +02:00
Maarten L. Hekkelman
31090c6ec5 attempt 2 2025-08-25 11:25:10 +02:00
Maarten L. Hekkelman
9e30d2bc1a finding pcre2 2025-08-25 10:39:48 +02:00
Maarten L. Hekkelman
93d703f7a1 Do not buld pcre tests 2025-08-20 20:40:08 +02:00
Maarten L. Hekkelman
3c241048a5 do not install pcre 2025-08-20 17:02:40 +02:00
Maarten L. Hekkelman
2788536799 this should work 2025-08-20 16:58:52 +02:00
Maarten L. Hekkelman
314d435a18 Another way of importing pcre 2025-08-20 16:49:12 +02:00
Maarten L. Hekkelman
37edcd8666 Finding and optionally building pcre 2025-08-20 15:47:49 +02:00
Maarten L. Hekkelman
10e290fbdf pcre2 in github actions? 2025-08-20 14:40:50 +02:00
Maarten L. Hekkelman
58cda1241e cleanup 2025-08-20 13:41:45 +02:00
Maarten L. Hekkelman
3659aaabff remove unneeded allocations 2025-08-20 13:35:15 +02:00
Maarten L. Hekkelman
727a39cc54 Finishing up replacing boost with pcre 2025-08-20 13:28:24 +02:00
Maarten L. Hekkelman
fd9ccdfff9 Using pcre instead of boost::regex 2025-08-19 16:16:43 +02:00
Maarten L. Hekkelman
aabee270b3 update .gitignore 2025-08-19 14:28:51 +02:00
Maarten L. Hekkelman
647c58f8ec allow code to be built with older compilers... 2025-08-19 12:44:41 +02:00
Maarten L. Hekkelman
0b8024d19c Optimise query processing 2025-08-19 12:24:33 +02:00
Maarten L. Hekkelman
d59b0bf27f Remove wrong warnings 2025-08-13 11:30:53 +02:00
24 changed files with 1922 additions and 593 deletions

1
.gitignore vendored
View File

@@ -13,3 +13,4 @@ docs/api
docs/conf.py
build_ci/
data/components.cif
perf.data*

View File

@@ -38,20 +38,12 @@ project(
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
include(FindAtomic)
include(CheckFunctionExists)
include(CheckIncludeFiles)
include(CheckLibraryExists)
include(CMakePackageConfigHelpers)
include(CheckCXXSourceCompiles)
include(GenerateExportHeader)
include(CTest)
include(FetchContent)
include(ExternalProject)
# FindBoost, take care of it now.
if(CMAKE_VERSION VERSION_GREATER_EQUAL 3.30)
cmake_policy(SET CMP0167 NEW)
endif()
include(FetchContent)
include(CPM)
# When building with ninja-multiconfig, build both debug and release by default
if(CMAKE_GENERATOR STREQUAL "Ninja Multi-Config")
@@ -149,51 +141,8 @@ endif()
# Libraries
# Start by finding out if std:regex is usable. Note that the current
# implementation in GCC is not acceptable, it crashes on long lines. The
# implementation in libc++ (clang) and MSVC seem to be OK.
check_cxx_source_compiles(
"
#include <iostream>
#ifndef __GLIBCXX__
#error
#endif
int main(int argc, char *argv[]) { return 0; }"
GXX_LIBSTDCPP)
if(GXX_LIBSTDCPP)
message(
STATUS "cifpp: Testing for known regex bug, since you're using GNU libstdc++")
try_run(STD_REGEX_RUNNING STD_REGEX_COMPILING
${CMAKE_CURRENT_BINARY_DIR}/test
${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-rx.cpp)
if(STD_REGEX_RUNNING STREQUAL FAILED_TO_RUN)
message(
STATUS
"cifpp: You are probably trying to compile using the g++ standard library which contains a crashing std::regex implementation. Will use boost::regex instead"
)
find_package(Boost 1.80 QUIET COMPONENTS regex)
if(NOT Boost_FOUND)
set(BOOST_REGEX_STANDALONE ON)
FetchContent_Declare(
boost-rx
GIT_REPOSITORY https://github.com/boostorg/regex
GIT_TAG boost-1.83.0)
FetchContent_MakeAvailable(boost-rx)
endif()
set(BOOST_REGEX ON)
endif()
endif()
if(MSVC)
# Avoid linking the shared library of zlib Search ZLIB_ROOT first if it is
# Avoid linking the shared library of zlib. Search ZLIB_ROOT first if it is
# set.
if(ZLIB_ROOT)
set(_ZLIB_SEARCH_ROOT PATHS ${ZLIB_ROOT} NO_DEFAULT_PATH)
@@ -220,6 +169,10 @@ if(MSVC)
endforeach()
endif()
# find_package(pcre2) is unfortunately broken
include(FindPCRE2x)
find_package(ZLIB QUIET)
find_package(Threads)
@@ -350,19 +303,9 @@ target_sources(cifpp
# The code now really requires C++20
target_compile_features(cifpp PUBLIC cxx_std_20)
set(CMAKE_DEBUG_POSTFIX d)
set_target_properties(cifpp PROPERTIES DEBUG_POSTFIX "d")
generate_export_header(cifpp EXPORT_FILE_NAME
${CMAKE_CURRENT_SOURCE_DIR}/include/cif++/exports.hpp)
if(BOOST_REGEX)
target_compile_definitions(cifpp PRIVATE USE_BOOST_REGEX=1
BOOST_REGEX_STANDALONE=1)
get_target_property(BOOST_REGEX_INCLUDE_DIR Boost::regex
INTERFACE_INCLUDE_DIRECTORIES)
endif()
if(MSVC)
target_compile_definitions(cifpp PUBLIC NOMINMAX=1)
endif()
@@ -373,9 +316,11 @@ target_include_directories(
cifpp
PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
PRIVATE "${BOOST_REGEX_INCLUDE_DIR}" "${EIGEN_INCLUDE_DIR}")
PRIVATE "${EIGEN_INCLUDE_DIR}")
target_link_libraries(cifpp PUBLIC Threads::Threads ZLIB::ZLIB $<$<TARGET_EXISTS:std::atomic>:std::atomic>)
target_link_libraries(cifpp
PUBLIC Threads::Threads ZLIB::ZLIB $<$<TARGET_EXISTS:std::atomic>:std::atomic>
PRIVATE $<BUILD_INTERFACE:pcre2-8>)
if(CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
target_link_options(cifpp PRIVATE -undefined dynamic_lookup)

View File

@@ -117,12 +117,8 @@ Other libraries you might want to install beforehand are:
`libeigen3-dev`
- [zlib](https://github.com/madler/zlib), the development version of this
library. On Debian/Ubuntu this is the package `zlib1g-dev`.
- [boost](https://www.boost.org), in Debian/Ubuntu this is `libboost-dev`.
The Boost libraries are only needed in case you are using GCC due to a long
standing bug in GNU's implementation of std::regex. It simply crashes
on the regular expressions used in the mmcif_pdbx dictionary and so
we use the boost regex implementation instead.
- [pcre2](https://www.pcre.org/), the Perl Compatible Regular Expression
library. On Debian/Ubuntu this is the package `libpcre2-dev`.
### Building

1291
cmake/CPM.cmake Normal file

File diff suppressed because it is too large Load Diff

33
cmake/FindPCRE2x.cmake Normal file
View File

@@ -0,0 +1,33 @@
set(PCRE2_USE_STATIC_LIBS ON)
find_package(pcre2 CONFIG)
if(PCRE2_FOUND)
message(STATUS "Using pcre2 found using find_package")
else()
include(FindPkgConfig)
if(PKG_CONFIG_FOUND)
pkg_check_modules(PCRE2 IMPORTED_TARGET libpcre2-8)
if(PCRE2_FOUND)
message(STATUS "Using pcre2 found using pkg-config")
add_library(pcre2-8 ALIAS PkgConfig::PCRE2)
endif()
endif()
endif()
if(NOT PCRE2_FOUND)
message(STATUS "Using pcre2 using FetchContent")
set(PCRE2_BUILD_TESTS OFF)
FetchContent_Declare(
pcre2
GIT_REPOSITORY https://github.com/PCRE2Project/pcre2
GIT_TAG pcre2-10.45
EXCLUDE_FROM_ALL)
FetchContent_MakeAvailable(pcre2)
# add_subdirectory(${pcre2_SOURCE_DIR} EXCLUDE_FROM_ALL)
endif()

View File

@@ -1,18 +0,0 @@
// See: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=86164
#include <iostream>
#include <regex>
int main()
{
std::string s(100'000, '*');
std::smatch m;
std::regex r("^(.*?)$");
std::regex_search(s, m, r);
std::cout << s.substr(0, 10) << '\n';
std::cout << m.str(1).substr(0, 10) << '\n';
return 0;
}

View File

@@ -157,7 +157,7 @@ class category
emplace(std::forward<row_initializer>(rows));
}
category(const category &rhs); ///< Copy constructor
category(const category &rhs); ///< Copy constructor
category(category &&rhs) noexcept ///< Move constructor
{
@@ -332,8 +332,16 @@ class category
// --------------------------------------------------------------------
// A category can have a key, as defined by the validator/dictionary
/// @brief The type of an element of the key_type
struct key_element_type
{
std::string name; ///< Name of the item
std::string value; ///< Value to be found
bool may_be_null = false; ///< If true, value should be same or empty
};
/// @brief The key type
using key_type = row_initializer;
using key_type = std::vector<key_element_type>;
/// @brief Return a row_handle for the row specified by \a key
/// @param key The value for the key, items specified in the dictionary should have a value
@@ -1249,7 +1257,7 @@ class category
{
}
// TODO: NEED TO FIX THIS!
// TODO: NEED TO FIX THIS!
category *linked;
const link_validator *v;
};

View File

@@ -180,7 +180,7 @@ class compound
friend class local_compound_factory_impl;
compound(cif::datablock &db);
std::string m_id;
std::string m_name;
std::string m_type;
@@ -290,6 +290,13 @@ class compound_factory
void report_missing_compound(std::string_view compound_id);
bool get_report_missing() const { return m_report_missing; }
void set_report_missing(bool report)
{
m_report_missing = report;
}
private:
compound_factory();
@@ -301,6 +308,7 @@ class compound_factory
static bool s_use_thread_local_instance;
std::shared_ptr<compound_factory_impl> m_impl;
bool m_report_missing = true;
};
// --------------------------------------------------------------------

View File

@@ -27,6 +27,7 @@
#pragma once
#include "cif++/row.hpp"
#include "cif++/format.hpp"
#include <cassert>
#include <concepts>
@@ -49,49 +50,49 @@
* @code {.cpp}
* cif::condition c = cif::key("id") == 1;
* @endcode
*
*
* That will find rows where the ID item contains the number 1. If
* using cif::key is a bit too much typing, you can also write:
*
*
* @code{.cpp}
* using namespace cif::literals;
*
*
* cif::condition c2 = "id"_key == 1;
* @endcode
*
*
* Now if you want both ID = 1 and ID = 2 in the result:
*
*
* @code{.cpp}
* auto c3 = "id"_key == 1 or "id"_key == 2;
* @endcode
*
*
* There are some special values you can use. To find rows with item that
* do not have a value:
*
*
* @code{.cpp}
* auto c4 = "type"_key == cif::null;
* @endcode
*
* @endcode
*
* Of if it should not be NULL:
*
*
* @code{.cpp}
* auto c5 = "type"_key != cif::null;
* @endcode
*
* @endcode
*
* There's even a way to find all records:
*
*
* @code{.cpp}
* auto c6 = cif::all;
* @endcode
*
*
* And when you want to search for any item containing the value 'foo':
*
*
* @code{.cpp}
* auto c7 = cif::any == "foo";
* @endcode
*
* @endcode
*
* All these conditions can be chained together again:
*
*
* @code{.cpp}
* auto c8 = std::move(c3) and std::move(c5);
* @endcode
@@ -106,7 +107,7 @@ namespace cif
/**
* @brief Get the items that can be used as key in conditions for a category
*
*
* @param cat The category whose items to return
* @return iset The set of key item names
*/
@@ -115,7 +116,7 @@ iset get_category_fields(const category &cat);
/**
* @brief Get the items that can be used as key in conditions for a category
*
*
* @param cat The category whose items to return
* @return iset The set of key field names
*/
@@ -123,7 +124,7 @@ iset get_category_items(const category &cat);
/**
* @brief Get the item index for item @a col in category @a cat
*
*
* @param cat The category
* @param col The name of the item
* @return uint16_t The index
@@ -132,7 +133,7 @@ uint16_t get_item_ix(const category &cat, std::string_view col);
/**
* @brief Return whether the item @a col in category @a cat has a primitive type of *uchar*
*
*
* @param cat The category
* @param col The item name
* @return true If the primitive type is of type *uchar*
@@ -175,14 +176,13 @@ namespace detail
class condition
{
public:
/** @cond */
using condition_impl = detail::condition_impl;
/** @endcond */
/**
* @brief Construct a new, empty condition object
*
*
*/
condition()
: m_impl(nullptr)
@@ -191,7 +191,7 @@ class condition
/**
* @brief Construct a new condition object with implementation @a impl
*
*
* @param impl The implementation to use
*/
explicit condition(condition_impl *impl)
@@ -230,15 +230,15 @@ class condition
/**
* @brief Prepare the condition to be used on category @a c. This will
* take care of setting the correct indices for items e.g.
*
*
* @param c The category this query should act upon
*/
void prepare(const category &c);
/**
* @brief This operator returns true if the row referenced by @a r is
* @brief This operator returns true if the row referenced by @a r is
* a match for this condition.
*
*
* @param r The reference to a row.
* @return true If there is a match
* @return false If there is no match
@@ -263,7 +263,7 @@ class condition
/**
* @brief If the prepare step found out there is only one hit
* this single hit can be returned by this method.
*
*
* @return std::optional<row_handle> The result will contain
* a row reference if there is a single hit, it will be empty otherwise
*/
@@ -292,7 +292,7 @@ class condition
/**
* @brief Operator to use to write out a condition to @a os, for debugging purposes
*
*
* @param os The std::ostream to write to
* @param cond The condition to write
* @return std::ostream& The same as @a os
@@ -752,28 +752,9 @@ namespace detail
delete sub;
}
condition_impl *prepare(const category &c) override
{
for (auto &sub : m_sub)
sub = sub->prepare(c);
return this;
}
condition_impl *prepare(const category &c) override;
bool test(row_handle r) const override
{
bool result = true;
for (auto sub : m_sub)
{
if (sub->test(r))
continue;
result = false;
break;
}
return result;
}
bool test(row_handle r) const override;
void str(std::ostream &os) const override
{
@@ -820,6 +801,7 @@ namespace detail
static condition_impl *combine_equal(std::vector<and_condition_impl *> &subs, or_condition_impl *oc);
std::vector<condition_impl *> m_sub;
std::optional<row_handle> m_single; // Potential result of index lookup
};
struct or_condition_impl : public condition_impl
@@ -977,9 +959,9 @@ inline condition operator or(condition &&a, condition &&b)
if (ci->m_item_name == ce->m_item_name)
return condition(new detail::key_equals_or_empty_condition_impl(ci));
}
if (typeid(*b.m_impl) == typeid(detail::key_equals_condition_impl) and
typeid(*a.m_impl) == typeid(detail::key_is_empty_condition_impl))
typeid(*a.m_impl) == typeid(detail::key_is_empty_condition_impl))
{
auto ci = static_cast<detail::key_equals_condition_impl *>(b.m_impl);
auto ce = static_cast<detail::key_is_empty_condition_impl *>(a.m_impl);
@@ -997,9 +979,9 @@ inline condition operator or(condition &&a, condition &&b)
if (ci->m_item_name == ce->m_item_name)
return condition(new detail::key_equals_number_or_empty_condition_impl(ci));
}
if (typeid(*b.m_impl) == typeid(detail::key_equals_number_condition_impl) and
typeid(*a.m_impl) == typeid(detail::key_is_empty_condition_impl))
typeid(*a.m_impl) == typeid(detail::key_is_empty_condition_impl))
{
auto ci = static_cast<detail::key_equals_number_condition_impl *>(b.m_impl);
auto ce = static_cast<detail::key_is_empty_condition_impl *>(a.m_impl);
@@ -1019,7 +1001,7 @@ inline condition operator or(condition &&a, condition &&b)
/**
* @brief A helper class to make it possible to search for empty items (NULL)
*
*
* @code{.cpp}
* "id"_key == cif::empty_type();
* @endcode
@@ -1031,7 +1013,7 @@ struct empty_type
/**
* @brief A helper to make it possible to have conditions like
*
*
* @code{.cpp}
* "id"_key == cif::null;
* @endcode
@@ -1041,14 +1023,14 @@ inline constexpr empty_type null = empty_type();
/**
* @brief Class to use in creating conditions, creates a reference to a item or item
*
*
*/
struct key
{
/**
* @brief Construct a new key object using @a item_name as name
*
* @param item_name
*
* @param item_name
*/
explicit key(const std::string &item_name)
: m_item_name(item_name)
@@ -1057,8 +1039,8 @@ struct key
/**
* @brief Construct a new key object using @a item_name as name
*
* @param item_name
*
* @param item_name
*/
explicit key(const char *item_name)
: m_item_name(item_name)
@@ -1067,8 +1049,8 @@ struct key
/**
* @brief Construct a new key object using @a item_name as name
*
* @param item_name
*
* @param item_name
*/
explicit key(std::string_view item_name)
: m_item_name(item_name)
@@ -1137,13 +1119,10 @@ inline condition operator!=(const key &key, std::string_view value)
template <Numeric T>
condition operator>(const key &key, const T &v)
{
std::ostringstream s;
s << " > " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].compare(v) > 0; },
s.str()));
std::format(" > {}", v)));
}
/**
@@ -1152,13 +1131,10 @@ condition operator>(const key &key, const T &v)
template <Numeric T>
condition operator>=(const key &key, const T &v)
{
std::ostringstream s;
s << " >= " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].compare(v) >= 0; },
s.str()));
std::format(" >= {}", v)));
}
/**
@@ -1167,13 +1143,10 @@ condition operator>=(const key &key, const T &v)
template <Numeric T>
condition operator<(const key &key, const T &v)
{
std::ostringstream s;
s << " < " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].compare(v) < 0; },
s.str()));
std::format(" < {}", v)));
}
/**
@@ -1182,13 +1155,10 @@ condition operator<(const key &key, const T &v)
template <Numeric T>
condition operator<=(const key &key, const T &v)
{
std::ostringstream s;
s << " <= " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].compare(v) <= 0; },
s.str()));
std::format(" <= {}", v)));
}
/**
@@ -1196,13 +1166,10 @@ condition operator<=(const key &key, const T &v)
*/
inline condition operator>(const key &key, std::string_view v)
{
std::ostringstream s;
s << " > " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].compare(v, icase) > 0; },
s.str()));
std::format(" > {}", v)));
}
/**
@@ -1210,13 +1177,10 @@ inline condition operator>(const key &key, std::string_view v)
*/
inline condition operator>=(const key &key, std::string_view v)
{
std::ostringstream s;
s << " >= " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].compare(v, icase) >= 0; },
s.str()));
std::format(" >= {}", v)));
}
/**
@@ -1224,13 +1188,10 @@ inline condition operator>=(const key &key, std::string_view v)
*/
inline condition operator<(const key &key, std::string_view v)
{
std::ostringstream s;
s << " < " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].compare(v, icase) < 0; },
s.str()));
std::format(" < {}", v)));
}
/**
@@ -1238,13 +1199,10 @@ inline condition operator<(const key &key, std::string_view v)
*/
inline condition operator<=(const key &key, std::string_view v)
{
std::ostringstream s;
s << " <= " << v;
return condition(new detail::key_compare_condition_impl(
key.m_item_name, [item_name = key.m_item_name, v](row_handle r, bool icase)
{ return r[item_name].compare(v, icase) <= 0; },
s.str()));
std::format(" <= {}", v)));
}
/**
@@ -1345,7 +1303,7 @@ namespace literals
{
/**
* @brief Return a cif::key for the item name @a text
*
*
* @param text The name of the item
* @param length The length of @a text
* @return key The cif::key created

View File

@@ -26,139 +26,17 @@
#pragma once
#include <format>
#include <string>
/** \file format.hpp
*
* File containing a basic reimplementation of boost::format
* but then a bit more simplistic. Still this allowed me to move my code
* from using boost::format to something without external dependency easily.
* Now using std::format instead of a home grown rip off
*/
namespace cif
{
namespace detail
{
template <typename T>
struct to_varg
{
using type = T;
to_varg(const T &v)
: m_value(v)
{
}
type operator*() { return m_value; }
T m_value;
};
template <>
struct to_varg<const char *>
{
using type = const char *;
to_varg(const char *v)
: m_value(v)
{
}
type operator*() { return m_value.c_str(); }
std::string m_value;
};
template <>
struct to_varg<std::string>
{
using type = const char *;
to_varg(const std::string &v)
: m_value(v)
{
}
type operator*() { return m_value.c_str(); }
std::string m_value;
};
} // namespace
/** @cond */
template <typename... Args>
class format_plus_arg
{
public:
using args_vector_type = std::tuple<detail::to_varg<Args>...>;
using vargs_vector_type = std::tuple<typename detail::to_varg<Args>::type...>;
format_plus_arg(const format_plus_arg &) = delete;
format_plus_arg &operator=(const format_plus_arg &) = delete;
format_plus_arg(std::string_view fmt, Args... args)
: m_fmt(fmt)
, m_args(std::forward<Args>(args)...)
{
auto ix = std::make_index_sequence<sizeof...(Args)>();
copy_vargs(ix);
}
std::string str()
{
char buffer[1024];
std::string::size_type r = std::apply(snprintf, std::tuple_cat(std::make_tuple(buffer, sizeof(buffer), m_fmt.c_str()), m_vargs));
return { buffer, r };
}
friend std::ostream &operator<<(std::ostream &os, const format_plus_arg &f)
{
char buffer[1024];
std::string::size_type r = std::apply(snprintf, std::tuple_cat(std::make_tuple(buffer, sizeof(buffer), f.m_fmt.c_str()), f.m_vargs));
os.write(buffer, r);
return os;
}
private:
template <std::size_t... I>
void copy_vargs(std::index_sequence<I...>)
{
((std::get<I>(m_vargs) = *std::get<I>(m_args)), ...);
}
std::string m_fmt;
args_vector_type m_args;
vargs_vector_type m_vargs;
};
/** @endcond */
/**
* @brief A simplistic reimplementation of boost::format, in fact it is
* actually a way to call the C function snprintf to format the arguments
* in @a args into the format string @a fmt
*
* The string in @a fmt should thus be a C style format string.
*
* TODO: Move to C++23 style of printing.
*
* @tparam Args The types of the arguments
* @param fmt The format string
* @param args The arguments
* @return An object that can be written out to a std::ostream using operator<<
*/
template <typename... Args>
constexpr auto format(std::string_view fmt, Args... args)
{
return format_plus_arg(fmt, std::forward<Args>(args)...);
}
// --------------------------------------------------------------------
/// A streambuf that fills out lines with spaces up until a specified width

View File

@@ -1,7 +1,33 @@
// Copyright Maarten L. Hekkelman, 2022
// Distributed under the Boost Software License, Version 1.0.
// (See accompanying file LICENSE_1_0.txt or copy at
// http://www.boost.org/LICENSE_1_0.txt)
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2025 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*
Original code comes from libgxrio at https://github.com/mhekkel/gxrio
This is a stripped down version.
*/
#pragma once

View File

@@ -250,6 +250,8 @@ class item
return value();
}
auto operator<=>(const item &rhs) const = default;
private:
std::string_view m_name;
std::string m_value;

View File

@@ -92,7 +92,7 @@ class row_comparator
return d;
}
int operator()(const category &cat, const row_initializer &a, const row *b) const
int operator()(const category &cat, const category::key_type &a, const row *b) const
{
assert(b);
@@ -105,10 +105,11 @@ class row_comparator
{
assert(ai != a.end());
std::string_view ka = ai->value();
std::string_view ka = ai->value;
std::string_view kb = rhb[k].text();
d = f(ka, kb);
if (not (ai->may_be_null and rhb[k].empty()))
d = f(ka, kb);
if (d != 0)
break;
@@ -142,7 +143,7 @@ class category_index
}
row *find(const category &cat, row *k) const;
row *find_by_value(const category &cat, row_initializer k) const;
row *find_by_value(const category &cat, const category::key_type &k) const;
void insert(category &cat, row *r);
void erase(category &cat, row *r);
@@ -352,19 +353,19 @@ row *category_index::find(const category &cat, row *k) const
return r ? r->m_row : nullptr;
}
row *category_index::find_by_value(const category &cat, row_initializer k) const
row *category_index::find_by_value(const category &cat, const category::key_type &k) const
{
// sort the values in k first
row_initializer k2;
category::key_type k2;
for (auto &f : cat.key_item_indices())
{
auto fld = cat.get_item_name(f);
auto ki = find_if(k.begin(), k.end(), [&fld](auto &i)
{ return i.name() == fld; });
{ return i.name == fld; });
if (ki == k.end())
k2.emplace_back(fld, "");
k2.emplace_back(std::string{ fld }, "");
else
k2.emplace_back(*ki);
}

View File

@@ -757,8 +757,7 @@ bool compound_factory::is_monomer(std::string_view res_name) const
void compound_factory::report_missing_compound(std::string_view compound_id)
{
static bool s_reported = false;
if (std::exchange(s_reported, true) == false)
if (std::exchange(m_report_missing, false))
{
using namespace cif::colour;

View File

@@ -24,8 +24,8 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "cif++/category.hpp"
#include "cif++/condition.hpp"
#include "cif++/category.hpp"
#include "cif++/validate.hpp"
namespace cif
@@ -61,6 +61,52 @@ bool is_item_type_uchar(const category &cat, std::string_view col)
namespace detail
{
// // index lookup
// struct index_lookup_condition_impl : public condition_impl
// {
// index_lookup_condition_impl(row_initializer &&key_values)
// : m_key_values(std::move(key_values))
// {
// }
//
// condition_impl *prepare(const category &c) override
// {
// m_single_hit = c[m_key_values];
// return this;
// }
//
// bool test(row_handle r) const override
// {
// return m_single_hit == r;
// }
//
// void str(std::ostream &os) const override
// {
// os << "index scan";
// }
//
// virtual std::optional<row_handle> single() const override
// {
// return m_single_hit;
// }
//
// virtual bool equals(const condition_impl *rhs) const override
// {
// if (typeid(*rhs) == typeid(index_lookup_condition_impl))
// {
// auto ri = static_cast<const index_lookup_condition_impl *>(rhs);
// if (m_single_hit or ri->m_single_hit)
// return m_single_hit == ri->m_single_hit;
// else
// // watch out, both m_item_ix might be the same while item_names might be diffent (in case they both do not exist in the category)
// return m_key_values == ri->m_key_values;
// }
// return this == rhs;
// }
//
// row_initializer m_key_values;
// row_handle m_single_hit;
// };
condition_impl *key_equals_condition_impl::prepare(const category &c)
{
@@ -85,7 +131,8 @@ namespace detail
c.key_item_indices().contains(m_item_ix) and
c.key_item_indices().size() == 1)
{
m_single_hit = c[{ { m_item_name, m_value } }];
item v(m_item_name, m_value);
m_single_hit = c[{ { m_item_name, std::string{ v.value() }, false } }];
}
return this;
@@ -99,7 +146,8 @@ namespace detail
{
auto &cs = (*s)->m_sub;
if (find_if(cs.begin(), cs.end(), [c](const condition_impl *i) { return i->equals(c); }) == cs.end())
if (find_if(cs.begin(), cs.end(), [c](const condition_impl *i)
{ return i->equals(c); }) == cs.end())
{
result = false;
break;
@@ -119,7 +167,8 @@ namespace detail
for (size_t fc_i = 0; fc_i < fc.size();)
{
auto c = fc[fc_i];
if (not found_in_range(c, subs.begin() + 1, subs.end())) {
if (not found_in_range(c, subs.begin() + 1, subs.end()))
{
++fc_i;
continue;
}
@@ -137,11 +186,12 @@ namespace detail
for (size_t ssub_i = 0; ssub_i < ssub.size();)
{
auto sc = ssub[ssub_i];
if (not sc->equals(c)) {
if (not sc->equals(c))
{
++ssub_i;
continue;
}
ssub.erase(ssub.begin() + ssub_i);
delete sc;
break;
@@ -158,6 +208,99 @@ namespace detail
return oc;
}
condition_impl *and_condition_impl::prepare(const category &c)
{
for (auto &sub : m_sub)
sub = sub->prepare(c);
if (auto cv = c.get_cat_validator(); cv != nullptr)
{
// See if we can collapse a search part of this and_condition into a single index lookup
cif::iset keys{ cv->m_keys.begin(), cv->m_keys.end() };
category::key_type lookup;
std::vector<condition_impl *> subs;
std::vector<std::string> may_be_empty;
for (auto &sub : m_sub)
{
if (auto s = dynamic_cast<const key_equals_condition_impl *>(sub); s != nullptr)
{
if (keys.contains(s->m_item_name))
{
lookup.emplace_back(s->m_item_name, s->m_value);
subs.emplace_back(sub);
}
continue;
}
if (auto s = dynamic_cast<const key_equals_number_condition_impl *>(sub); s != nullptr)
{
if (keys.contains(s->m_item_name))
{
item v{ s->m_item_name, s->m_value };
lookup.emplace_back(s->m_item_name, std::string{ v.value() } );
subs.emplace_back(sub);
}
continue;
}
if (auto s = dynamic_cast<const key_equals_or_empty_condition_impl *>(sub); s != nullptr)
{
if (keys.contains(s->m_item_name))
{
lookup.emplace_back(s->m_item_name, s->m_value, true);
subs.emplace_back(sub);
may_be_empty.emplace_back(s->m_item_name);
}
continue;
}
if (auto s = dynamic_cast<const key_equals_number_or_empty_condition_impl *>(sub); s != nullptr)
{
if (keys.contains(s->m_item_name))
{
item v{ s->m_item_name, s->m_value };
lookup.emplace_back(s->m_item_name, std::string{ v.value() }, true );
subs.emplace_back(sub);
}
continue;
}
}
if (lookup.size() == keys.size())
{
m_single = c[lookup];
for (auto s : subs)
m_sub.erase(std::remove(m_sub.begin(), m_sub.end(), s), m_sub.end());
}
}
return this;
}
bool and_condition_impl::test(row_handle r) const
{
bool result = true;
if (m_single.has_value() and *m_single != r)
result = false;
else
{
for (auto sub : m_sub)
{
if (sub->test(r))
continue;
result = false;
break;
}
}
return result;
}
condition_impl *or_condition_impl::prepare(const category &c)
{
std::vector<and_condition_impl *> and_conditions;
@@ -181,7 +324,7 @@ void condition::prepare(const category &c)
{
if (m_impl)
m_impl = m_impl->prepare(c);
m_prepared = true;
}

View File

@@ -47,15 +47,10 @@ void atom::atom_impl::moveTo(const point &p)
auto r = row();
#if __cpp_lib_format
r.assign("Cartn_x", std::format("{:.3f}", p.m_x), false, false);
r.assign("Cartn_y", std::format("{:.3f}", p.m_y), false, false);
r.assign("Cartn_z", std::format("{:.3f}", p.m_z), false, false);
#else
r.assign("Cartn_x", cif::format("%.3f", p.m_x).str(), false, false);
r.assign("Cartn_y", cif::format("%.3f", p.m_y).str(), false, false);
r.assign("Cartn_z", cif::format("%.3f", p.m_z).str(), false, false);
#endif
m_location = p;
}

View File

@@ -33,7 +33,6 @@
#include <regex>
#include <set>
namespace cif::pdb
{
@@ -58,9 +57,9 @@ std::string cif2pdbDate(const std::string &d)
int month = std::stoi(m[2].str());
if (m[3].matched)
result = cif::format("%02.2d-%3.3s-%02.2d", stoi(m[3].str()), kMonths[month - 1], (year % 100)).str();
result = std::format("%02.2d-%3.3s-%02.2d", stoi(m[3].str()), kMonths[month - 1], (year % 100));
else
result = cif::format("%3.3s-%02.2d", kMonths[month - 1], (year % 100)).str();
result = std::format("%3.3s-%02.2d", kMonths[month - 1], (year % 100));
}
return result;
@@ -258,16 +257,14 @@ std::size_t WriteCitation(std::ostream &pdbFile, const datablock &db, row_handle
{
to_upper(pubname);
const std::string kRefHeader = s1 + "REF %2.2s %-28.28s %2.2s%4.4s %5.5s %4.4s";
pdbFile << cif::format(kRefHeader, "" /* continuation */, pubname, (volume.empty() ? "" : "V."), volume, pageFirst, year)
pdbFile << s1 + std::format("REF %2.2s %-28.28s %2.2s%4.4s %5.5s %4.4s", "" /* continuation */, pubname, (volume.empty() ? "" : "V."), volume, pageFirst, year)
<< '\n';
++result;
}
if (not issn.empty())
{
const std::string kRefHeader = s1 + "REFN ISSN %-25.25s";
pdbFile << cif::format(kRefHeader, issn) << '\n';
pdbFile << s1 + std::format("REFN ISSN %-25.25s", issn) << '\n';
++result;
}
@@ -278,25 +275,23 @@ std::size_t WriteCitation(std::ostream &pdbFile, const datablock &db, row_handle
// const char kRefHeader[] =
// "REMARK 1 REFN %4.4s %-6.6s %2.2s %-25.25s";
//
// pdbFile << (boost::cif::format(kRefHeader)
// pdbFile << (boost::std::format(kRefHeader)
// % (astm.empty() ? "" : "ASTN")
// % astm
// % country
// % issn).str()
// % issn)
// << '\n';
// }
if (not pmid.empty())
{
const std::string kPMID = s1 + "PMID %-60.60s ";
pdbFile << cif::format(kPMID, pmid) << '\n';
pdbFile << s1 + std::format("PMID %-60.60s ", pmid) << '\n';
++result;
}
if (not doi.empty())
{
const std::string kDOI = s1 + "DOI %-60.60s ";
pdbFile << cif::format(kDOI, doi) << '\n';
pdbFile << s1 + std::format("DOI %-60.60s ", doi) << '\n';
++result;
}
@@ -307,10 +302,10 @@ void write_header_lines(std::ostream &pdbFile, const datablock &db)
{
// 0 1 2 3 4 5 6 7 8
// HEADER xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxDDDDDDDDD IIII
const char kHeader[] =
"HEADER %-40.40s"
"%-9.9s"
" %-4.4s";
// const char kHeader[] =
// "HEADER %-40.40s"
// "%-9.9s"
// " %-4.4s";
// HEADER
@@ -345,7 +340,12 @@ void write_header_lines(std::ostream &pdbFile, const datablock &db)
}
}
pdbFile << cif::format(kHeader, keywords, date, db.name()) << '\n';
pdbFile << std::format(/* kHeader */
"HEADER %-40.40s"
"%-9.9s"
" %-4.4s"
, keywords, date, db.name()) << '\n';
// TODO: implement
// OBSLTE (skip for now)
@@ -535,7 +535,6 @@ void WriteTitle(std::ostream &pdbFile, const datablock &db)
write_header_lines(pdbFile, db);
// REVDAT
const char kRevDatFmt[] = "REVDAT %3d%2.2s %9.9s %4.4s %1d ";
auto &cat2 = db["database_PDB_rev"];
std::vector<row_handle> rev(cat2.begin(), cat2.end());
sort(rev.begin(), rev.end(), [](row_handle a, row_handle b) -> bool
@@ -559,9 +558,9 @@ void WriteTitle(std::ostream &pdbFile, const datablock &db)
{
std::string cs = ++continuation > 1 ? std::to_string(continuation) : std::string();
pdbFile << cif::format(kRevDatFmt, revNum, cs, date, db.name(), modType);
pdbFile << std::format("REVDAT %3d%2.2s %9.9s %4.4s %1d ", revNum, cs, date, db.name(), modType);
for (std::size_t i = 0; i < 4; ++i)
pdbFile << cif::format(" %-6.6s", (i < types.size() ? types[i] : std::string()));
pdbFile << std::format(" %-6.6s", (i < types.size() ? types[i] : std::string()));
pdbFile << '\n';
if (types.size() > 4)
@@ -614,7 +613,7 @@ void WriteRemark2(std::ostream &pdbFile, const datablock &db)
{
float resHigh = refine.front()["ls_d_res_high"].as<float>();
pdbFile << "REMARK 2\n"
<< cif::format("REMARK 2 RESOLUTION. %7.2f ANGSTROMS.", resHigh) << '\n';
<< std::format("REMARK 2 RESOLUTION. %7.2f ANGSTROMS.", resHigh) << '\n';
}
catch (...)
{ /* skip it */
@@ -761,10 +760,7 @@ class Fs : public FBase
else
{
os << '\n';
std::stringstream ss;
ss << "REMARK " << std::setw(3) << std::right << mNr << ' ';
WriteOneContinuedLine(os, ss.str(), 0, s);
WriteOneContinuedLine(os, std::format("REMARK {:3} ", mNr), 0, s);
}
}
@@ -1617,7 +1613,7 @@ void WriteRemark3Phenix(std::ostream &pdbFile, const datablock &db)
percent_reflns_obs /= 100;
pdbFile << RM3(" ") << cif::format("%3d %7.4f - %7.4f %4.2f %8d %5d %6.4f %6.4f", bin++, d_res_low, d_res_high, percent_reflns_obs, number_reflns_R_work, number_reflns_R_free, R_factor_R_work, R_factor_R_free) << '\n';
pdbFile << RM3(" ") << std::format("%3d %7.4f - %7.4f %4.2f %8d %5d %6.4f %6.4f", bin++, d_res_low, d_res_high, percent_reflns_obs, number_reflns_R_work, number_reflns_R_free, R_factor_R_work, R_factor_R_free) << '\n';
}
pdbFile << RM3("") << '\n'
@@ -2585,7 +2581,7 @@ void WriteRemark465(std::ostream &pdbFile, const datablock &db)
cif::tie(modelNr, resName, chainID, iCode, seqNr) =
r.get("PDB_model_num", "auth_comp_id", "auth_asym_id", "PDB_ins_code", "auth_seq_id");
pdbFile << cif::format("REMARK 465 %3.3s %3.3s %1.1s %5d%1.1s", modelNr, resName, chainID, seqNr, iCode) << '\n';
pdbFile << std::format("REMARK 465 %3.3s %3.3s %1.1s %5d%1.1s", modelNr, resName, chainID, seqNr, iCode) << '\n';
}
}
@@ -2632,7 +2628,7 @@ void WriteRemark470(std::ostream &pdbFile, const datablock &db)
while (not a.second.empty())
{
pdbFile << cif::format("REMARK 470 %3.3s %3.3s %1.1s%4d%1.1s ", modelNr, resName, chainID, seqNr, iCode) << " ";
pdbFile << std::format("REMARK 470 %3.3s %3.3s %1.1s%4d%1.1s ", modelNr, resName, chainID, seqNr, iCode) << " ";
for (std::size_t i = 0; i < 6 and not a.second.empty(); ++i)
{
@@ -2729,16 +2725,16 @@ int WritePrimaryStructure(std::ostream &pdbFile, const datablock &db)
"pdbx_seq_align_end_ins_code", "pdbx_db_accession", "db_align_beg", "pdbx_db_align_beg_ins_code", "db_align_end", "pdbx_db_align_end_ins_code");
if (dbAccession.length() > 8 or db_code.length() > 12 or atoi(dbseqEnd.c_str()) >= 100000)
pdbFile << cif::format(
pdbFile << std::format(
"DBREF1 %4.4s %1.1s %4.4s%1.1s %4.4s%1.1s %-6.6s %-20.20s",
idCode, chainID, seqBegin, insertBegin, seqEnd, insertEnd, db_name, db_code)
<< '\n'
<< cif::format(
<< std::format(
"DBREF2 %4.4s %1.1s %-22.22s %10.10s %10.10s",
idCode, chainID, dbAccession, dbseqBegin, dbseqEnd)
<< '\n';
else
pdbFile << cif::format(
pdbFile << std::format(
"DBREF %4.4s %1.1s %4.4s%1.1s %4.4s%1.1s %-6.6s %-8.8s %-12.12s %5.5s%1.1s %5.5s%1.1s",
idCode, chainID, seqBegin, insertBegin, seqEnd, insertEnd, db_name, dbAccession, db_code, dbseqBegin, dbinsBeg, dbseqEnd, dbinsEnd)
<< '\n';
@@ -2757,10 +2753,9 @@ int WritePrimaryStructure(std::ostream &pdbFile, const datablock &db)
to_upper(conflict);
pdbFile << cif::format(
pdbFile << std::format(
"SEQADV %4.4s %3.3s %1.1s %4.4s%1.1s %-4.4s %-9.9s %3.3s %5.5s %-21.21s",
idCode, resName, chainID, seqNum, iCode, database, dbAccession, dbRes, dbSeq, conflict)
.str()
<< '\n';
}
@@ -2787,7 +2782,7 @@ int WritePrimaryStructure(std::ostream &pdbFile, const datablock &db)
if (t > 13)
t = 13;
pdbFile << cif::format(
pdbFile << std::format(
"SEQRES %3d %1.1s %4d %-51.51s ",
n++, std::string{ chainID }, seqresl[chainID], join(seq.begin(), seq.begin() + t, " "))
<< '\n';
@@ -2807,10 +2802,9 @@ int WritePrimaryStructure(std::ostream &pdbFile, const datablock &db)
cif::tie(chainID, seqNum, resName, iCode, stdRes, comment) =
r.get("auth_asym_id", "auth_seq_id", "auth_comp_id", "PDB_ins_code", "parent_comp_id", "details");
pdbFile << cif::format(
pdbFile << std::format(
"MODRES %4.4s %3.3s %1.1s %4.4s%1.1s %3.3s %-41.41s",
db.name(), resName, chainID, seqNum, iCode, stdRes, comment)
.str()
<< '\n';
}
@@ -2925,7 +2919,7 @@ int WriteHeterogen(std::ostream &pdbFile, const datablock &db)
{
if (h.water)
continue;
pdbFile << cif::format("HET %3.3s %c%4d%c %5d", h.hetID, h.chainID, h.seqNum, h.iCode, h.numHetAtoms) << '\n';
pdbFile << std::format("HET %3.3s %c%4d%c %5d", h.hetID, h.chainID, h.seqNum, h.iCode, h.numHetAtoms) << '\n';
++numHet;
}
@@ -2940,7 +2934,7 @@ int WriteHeterogen(std::ostream &pdbFile, const datablock &db)
for (;;)
{
pdbFile << cif::format("HETNAM %2.2s %3.3s ", (c > 1 ? std::to_string(c) : std::string()), id);
pdbFile << std::format("HETNAM %2.2s %3.3s ", (c > 1 ? std::to_string(c) : std::string()), id);
++c;
if (name.length() > 55)
@@ -3032,7 +3026,7 @@ int WriteHeterogen(std::ostream &pdbFile, const datablock &db)
{
std::stringstream fs;
fs << cif::format("FORMUL %2d %3.3s %2.2s%c", componentNr, hetID, (c > 1 ? std::to_string(c) : std::string()), (hetID == water_comp_id ? '*' : ' '));
fs << std::format("FORMUL %2d %3.3s %2.2s%c", componentNr, hetID, (c > 1 ? std::to_string(c) : std::string()), (hetID == water_comp_id ? '*' : ' '));
++c;
if (formula.length() > 51)
@@ -3099,7 +3093,7 @@ std::tuple<int, int> WriteSecondaryStructure(std::ostream &pdbFile, const databl
"pdbx_PDB_helix_class", "pdbx_PDB_helix_length", "beg_auth_seq_id", "end_auth_seq_id");
++numHelix;
pdbFile << cif::format("HELIX %3d %3.3s %3.3s %1.1s %4d%1.1s %3.3s %1.1s %4d%1.1s%2d%-30.30s %5d",
pdbFile << std::format("HELIX %3d %3.3s %3.3s %1.1s %4d%1.1s %3.3s %1.1s %4d%1.1s%2d%-30.30s %5d",
numHelix, pdbx_PDB_helix_id, beg_label_comp_id, beg_auth_asym_id, beg_auth_seq_id, pdbx_beg_PDB_ins_code, end_label_comp_id, end_auth_asym_id, end_auth_seq_id, pdbx_end_PDB_ins_code, pdbx_PDB_helix_class, details, pdbx_PDB_helix_length)
<< '\n';
}
@@ -3136,7 +3130,7 @@ std::tuple<int, int> WriteSecondaryStructure(std::ostream &pdbFile, const databl
"pdbx_end_PDB_ins_code", "beg_auth_comp_id", "beg_auth_asym_id", "beg_auth_seq_id",
"end_auth_comp_id", "end_auth_asym_id", "end_auth_seq_id");
pdbFile << cif::format("SHEET %3.3s %3.3s%2d %3.3s %1.1s%4d%1.1s %3.3s %1.1s%4d%1.1s%2d", rangeID1, sheetID, numStrands, initResName, initChainID, initSeqNum, initICode, endResName, endChainID, endSeqNum, endICode, 0) << '\n';
pdbFile << std::format("SHEET %3.3s %3.3s%2d %3.3s %1.1s%4d%1.1s %3.3s %1.1s%4d%1.1s%2d", rangeID1, sheetID, numStrands, initResName, initChainID, initSeqNum, initICode, endResName, endChainID, endSeqNum, endICode, 0) << '\n';
first = false;
}
@@ -3155,7 +3149,7 @@ std::tuple<int, int> WriteSecondaryStructure(std::ostream &pdbFile, const databl
if (h.empty())
{
pdbFile << cif::format("SHEET %3.3s %3.3s%2d %3.3s %1.1s%4d%1.1s %3.3s %1.1s%4d%1.1s%2d", rangeID2, sheetID, numStrands, initResName, initChainID, initSeqNum, initICode, endResName, endChainID, endSeqNum, endICode, sense) << '\n';
pdbFile << std::format("SHEET %3.3s %3.3s%2d %3.3s %1.1s%4d%1.1s %3.3s %1.1s%4d%1.1s%2d", rangeID2, sheetID, numStrands, initResName, initChainID, initSeqNum, initICode, endResName, endChainID, endSeqNum, endICode, sense) << '\n';
}
else
{
@@ -3168,7 +3162,7 @@ std::tuple<int, int> WriteSecondaryStructure(std::ostream &pdbFile, const databl
curAtom = cif2pdbAtomName(curAtom, compID[0], db);
prevAtom = cif2pdbAtomName(prevAtom, compID[1], db);
pdbFile << cif::format("SHEET %3.3s %3.3s%2d %3.3s %1.1s%4d%1.1s %3.3s %1.1s%4d%1.1s%2d "
pdbFile << std::format("SHEET %3.3s %3.3s%2d %3.3s %1.1s%4d%1.1s %3.3s %1.1s%4d%1.1s%2d "
"%-4.4s%3.3s %1.1s%4d%1.1s %-4.4s%3.3s %1.1s%4d%1.1s",
rangeID2, sheetID, numStrands, initResName, initChainID, initSeqNum, initICode, endResName, endChainID, endSeqNum, endICode, sense, curAtom, curResName, curChainID, curResSeq, curICode, prevAtom, prevResName, prevChainID, prevResSeq, prevICode)
<< '\n';
@@ -3207,7 +3201,7 @@ void WriteConnectivity(std::ostream &pdbFile, const datablock &db)
sym1 = cif2pdbSymmetry(sym1);
sym2 = cif2pdbSymmetry(sym2);
pdbFile << cif::format("SSBOND %3d CYS %1.1s %4d%1.1s CYS %1.1s %4d%1.1s %6.6s %6.6s %5.2f", nr, chainID1, seqNum1, icode1, chainID2, seqNum2, icode2, sym1, sym2, Length) << '\n';
pdbFile << std::format("SSBOND %3d CYS %1.1s %4d%1.1s CYS %1.1s %4d%1.1s %6.6s %6.6s %5.2f", nr, chainID1, seqNum1, icode1, chainID2, seqNum2, icode2, sym1, sym2, Length) << '\n';
++nr;
}
@@ -3234,10 +3228,10 @@ void WriteConnectivity(std::ostream &pdbFile, const datablock &db)
sym1 = cif2pdbSymmetry(sym1);
sym2 = cif2pdbSymmetry(sym2);
pdbFile << cif::format("LINK %-4.4s%1.1s%3.3s %1.1s%4d%1.1s %-4.4s%1.1s%3.3s %1.1s%4d%1.1s %6.6s %6.6s", name1, altLoc1, resName1, chainID1, resSeq1, iCode1, name2, altLoc2, resName2, chainID2, resSeq2, iCode2, sym1, sym2);
pdbFile << std::format("LINK %-4.4s%1.1s%3.3s %1.1s%4d%1.1s %-4.4s%1.1s%3.3s %1.1s%4d%1.1s %6.6s %6.6s", name1, altLoc1, resName1, chainID1, resSeq1, iCode1, name2, altLoc2, resName2, chainID2, resSeq2, iCode2, sym1, sym2);
if (not Length.empty())
pdbFile << cif::format(" %5.2f", stod(Length));
pdbFile << std::format(" %5.2f", stod(Length));
pdbFile << '\n';
}
@@ -3255,7 +3249,7 @@ void WriteConnectivity(std::ostream &pdbFile, const datablock &db)
"pdbx_label_comp_id_2", "pdbx_auth_asym_id_2", "pdbx_auth_seq_id_2", "pdbx_PDB_ins_code_2",
"pdbx_PDB_model_num", "pdbx_omega_angle");
pdbFile << cif::format("CISPEP %3.3s %3.3s %1.1s %4d%1.1s %3.3s %1.1s %4d%1.1s %3.3s %6.2f",
pdbFile << std::format("CISPEP %3.3s %3.3s %1.1s %4d%1.1s %3.3s %1.1s %4d%1.1s %3.3s %6.2f",
serNum, pep1, chainID1, seqNum1, icode1, pep2, chainID2, seqNum2, icode2, modNum, measure) << '\n';
}
}
@@ -3276,7 +3270,7 @@ int WriteMiscellaneousFeatures(std::ostream &pdbFile, const datablock &db)
cif::tie(siteID, resName, chainID, seq, iCode) =
r.get("site_id", "auth_comp_id", "auth_asym_id", "auth_seq_id", "pdbx_auth_ins_code");
sites[siteID].push_back(cif::format("%3.3s %1.1s%4d%1.1s ", resName, chainID, seq, iCode).str());
sites[siteID].push_back(std::format("%3.3s %1.1s%4d%1.1s ", resName, chainID, seq, iCode));
}
for (auto s : sites)
@@ -3289,7 +3283,7 @@ int WriteMiscellaneousFeatures(std::ostream &pdbFile, const datablock &db)
int nr = 1;
while (res.empty() == false)
{
pdbFile << cif::format("SITE %3d %3.3s %2d ", nr, siteID, numRes);
pdbFile << std::format("SITE %3d %3.3s %2d ", nr, siteID, numRes);
for (int i = 0; i < 4; ++i)
{
@@ -3318,7 +3312,7 @@ void WriteCrystallographic(std::ostream &pdbFile, const datablock &db)
r = db["cell"].find_first(key("entry_id") == db.name());
pdbFile << cif::format("CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f %-11.11s%4d", r["length_a"].as<double>(), r["length_b"].as<double>(), r["length_c"].as<double>(), r["angle_alpha"].as<double>(), r["angle_beta"].as<double>(), r["angle_gamma"].as<double>(), symmetry, r["Z_PDB"].as<int>()) << '\n';
pdbFile << std::format("CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f %-11.11s%4d", r["length_a"].as<double>(), r["length_b"].as<double>(), r["length_c"].as<double>(), r["angle_alpha"].as<double>(), r["angle_beta"].as<double>(), r["angle_gamma"].as<double>(), symmetry, r["Z_PDB"].as<int>()) << '\n';
}
int WriteCoordinateTransformation(std::ostream &pdbFile, const datablock &db)
@@ -3327,18 +3321,18 @@ int WriteCoordinateTransformation(std::ostream &pdbFile, const datablock &db)
for (auto r : db["database_PDB_matrix"])
{
pdbFile << cif::format("ORIGX%1d %10.6f%10.6f%10.6f %10.5f", 1, r["origx[1][1]"].as<float>(), r["origx[1][2]"].as<float>(), r["origx[1][3]"].as<float>(), r["origx_vector[1]"].as<float>()) << '\n';
pdbFile << cif::format("ORIGX%1d %10.6f%10.6f%10.6f %10.5f", 2, r["origx[2][1]"].as<float>(), r["origx[2][2]"].as<float>(), r["origx[2][3]"].as<float>(), r["origx_vector[2]"].as<float>()) << '\n';
pdbFile << cif::format("ORIGX%1d %10.6f%10.6f%10.6f %10.5f", 3, r["origx[3][1]"].as<float>(), r["origx[3][2]"].as<float>(), r["origx[3][3]"].as<float>(), r["origx_vector[3]"].as<float>()) << '\n';
pdbFile << std::format("ORIGX%1d %10.6f%10.6f%10.6f %10.5f", 1, r["origx[1][1]"].as<float>(), r["origx[1][2]"].as<float>(), r["origx[1][3]"].as<float>(), r["origx_vector[1]"].as<float>()) << '\n';
pdbFile << std::format("ORIGX%1d %10.6f%10.6f%10.6f %10.5f", 2, r["origx[2][1]"].as<float>(), r["origx[2][2]"].as<float>(), r["origx[2][3]"].as<float>(), r["origx_vector[2]"].as<float>()) << '\n';
pdbFile << std::format("ORIGX%1d %10.6f%10.6f%10.6f %10.5f", 3, r["origx[3][1]"].as<float>(), r["origx[3][2]"].as<float>(), r["origx[3][3]"].as<float>(), r["origx_vector[3]"].as<float>()) << '\n';
result += 3;
break;
}
for (auto r : db["atom_sites"])
{
pdbFile << cif::format("SCALE%1d %10.6f%10.6f%10.6f %10.5f", 1, r["fract_transf_matrix[1][1]"].as<float>(), r["fract_transf_matrix[1][2]"].as<float>(), r["fract_transf_matrix[1][3]"].as<float>(), r["fract_transf_vector[1]"].as<float>()) << '\n';
pdbFile << cif::format("SCALE%1d %10.6f%10.6f%10.6f %10.5f", 2, r["fract_transf_matrix[2][1]"].as<float>(), r["fract_transf_matrix[2][2]"].as<float>(), r["fract_transf_matrix[2][3]"].as<float>(), r["fract_transf_vector[2]"].as<float>()) << '\n';
pdbFile << cif::format("SCALE%1d %10.6f%10.6f%10.6f %10.5f", 3, r["fract_transf_matrix[3][1]"].as<float>(), r["fract_transf_matrix[3][2]"].as<float>(), r["fract_transf_matrix[3][3]"].as<float>(), r["fract_transf_vector[3]"].as<float>()) << '\n';
pdbFile << std::format("SCALE%1d %10.6f%10.6f%10.6f %10.5f", 1, r["fract_transf_matrix[1][1]"].as<float>(), r["fract_transf_matrix[1][2]"].as<float>(), r["fract_transf_matrix[1][3]"].as<float>(), r["fract_transf_vector[1]"].as<float>()) << '\n';
pdbFile << std::format("SCALE%1d %10.6f%10.6f%10.6f %10.5f", 2, r["fract_transf_matrix[2][1]"].as<float>(), r["fract_transf_matrix[2][2]"].as<float>(), r["fract_transf_matrix[2][3]"].as<float>(), r["fract_transf_vector[2]"].as<float>()) << '\n';
pdbFile << std::format("SCALE%1d %10.6f%10.6f%10.6f %10.5f", 3, r["fract_transf_matrix[3][1]"].as<float>(), r["fract_transf_matrix[3][2]"].as<float>(), r["fract_transf_matrix[3][3]"].as<float>(), r["fract_transf_vector[3]"].as<float>()) << '\n';
result += 3;
break;
}
@@ -3348,9 +3342,9 @@ int WriteCoordinateTransformation(std::ostream &pdbFile, const datablock &db)
{
std::string given = r["code"] == "given" ? "1" : "";
pdbFile << cif::format("MTRIX%1d %3d%10.6f%10.6f%10.6f %10.5f %1.1s", 1, nr, r["matrix[1][1]"].as<float>(), r["matrix[1][2]"].as<float>(), r["matrix[1][3]"].as<float>(), r["vector[1]"].as<float>(), given) << '\n';
pdbFile << cif::format("MTRIX%1d %3d%10.6f%10.6f%10.6f %10.5f %1.1s", 2, nr, r["matrix[2][1]"].as<float>(), r["matrix[2][2]"].as<float>(), r["matrix[2][3]"].as<float>(), r["vector[2]"].as<float>(), given) << '\n';
pdbFile << cif::format("MTRIX%1d %3d%10.6f%10.6f%10.6f %10.5f %1.1s", 3, nr, r["matrix[3][1]"].as<float>(), r["matrix[3][2]"].as<float>(), r["matrix[3][3]"].as<float>(), r["vector[3]"].as<float>(), given) << '\n';
pdbFile << std::format("MTRIX%1d %3d%10.6f%10.6f%10.6f %10.5f %1.1s", 1, nr, r["matrix[1][1]"].as<float>(), r["matrix[1][2]"].as<float>(), r["matrix[1][3]"].as<float>(), r["vector[1]"].as<float>(), given) << '\n';
pdbFile << std::format("MTRIX%1d %3d%10.6f%10.6f%10.6f %10.5f %1.1s", 2, nr, r["matrix[2][1]"].as<float>(), r["matrix[2][2]"].as<float>(), r["matrix[2][3]"].as<float>(), r["vector[2]"].as<float>(), given) << '\n';
pdbFile << std::format("MTRIX%1d %3d%10.6f%10.6f%10.6f %10.5f %1.1s", 3, nr, r["matrix[3][1]"].as<float>(), r["matrix[3][2]"].as<float>(), r["matrix[3][3]"].as<float>(), r["vector[3]"].as<float>(), given) << '\n';
++nr;
result += 3;
@@ -3417,7 +3411,7 @@ std::tuple<int, int> WriteCoordinatesForModel(std::ostream &pdbFile, const datab
if (terminate)
{
pdbFile << cif::format("TER %5d %3.3s %1.1s%4d%1.1s", serial, resName, chainID, resSeq, iCode) << '\n';
pdbFile << std::format("TER %5d %3.3s %1.1s%4d%1.1s", serial, resName, chainID, resSeq, iCode) << '\n';
++serial;
terminatedChains.insert(chainID);
@@ -3476,7 +3470,7 @@ std::tuple<int, int> WriteCoordinatesForModel(std::ostream &pdbFile, const datab
if (charge != 0)
sCharge = std::to_string(charge) + (charge > 0 ? '+' : '-');
pdbFile << cif::format("%-6.6s%5d %-4.4s%1.1s%3.3s %1.1s%4d%1.1s %8.3f%8.3f%8.3f%6.2f%6.2f %2.2s%2.2s", group, serial, name, altLoc, resName, chainID, resSeq, iCode, x, y, z, occupancy, tempFactor, element, sCharge) << '\n';
pdbFile << std::format("%-6.6s%5d %-4.4s%1.1s%3.3s %1.1s%4d%1.1s %8.3f%8.3f%8.3f%6.2f%6.2f %2.2s%2.2s", group, serial, name, altLoc, resName, chainID, resSeq, iCode, x, y, z, occupancy, tempFactor, element, sCharge) << '\n';
++numCoord;
@@ -3491,7 +3485,7 @@ std::tuple<int, int> WriteCoordinatesForModel(std::ostream &pdbFile, const datab
tie(u11, u22, u33, u12, u13, u23) =
ai.get("U[1][1]", "U[2][2]", "U[3][3]", "U[1][2]", "U[1][3]", "U[2][3]");
pdbFile << cif::format("ANISOU%5d %-4.4s%1.1s%3.3s %1.1s%4d%1.1s %7d%7d%7d%7d%7d%7d %2.2s%2.2s", serial, name, altLoc, resName, chainID, resSeq, iCode, std::lrintf(u11 * 10000), std::lrintf(u22 * 10000), std::lrintf(u33 * 10000), std::lrintf(u12 * 10000), std::lrintf(u13 * 10000), std::lrintf(u23 * 10000), element, sCharge) << '\n';
pdbFile << std::format("ANISOU%5d %-4.4s%1.1s%3.3s %1.1s%4d%1.1s %7d%7d%7d%7d%7d%7d %2.2s%2.2s", serial, name, altLoc, resName, chainID, resSeq, iCode, std::lrintf(u11 * 10000), std::lrintf(u22 * 10000), std::lrintf(u33 * 10000), std::lrintf(u12 * 10000), std::lrintf(u13 * 10000), std::lrintf(u23 * 10000), element, sCharge) << '\n';
}
++serial;
@@ -3543,7 +3537,7 @@ std::tuple<int, int> WriteCoordinate(std::ostream &pdbFile, const datablock &db)
for (int model_nr : models)
{
if (models.size() > 1)
pdbFile << cif::format("MODEL %4d", model_nr) << '\n';
pdbFile << std::format("MODEL %4d", model_nr) << '\n';
std::set<std::string> TERminatedChains;
auto n = WriteCoordinatesForModel(pdbFile, db, last_resseq_for_chain_map, TERminatedChains, model_nr);
@@ -3615,7 +3609,7 @@ std::string get_HEADER_line(const datablock &db, std::string::size_type truncate
}
}
return FixStringLength(cif::format("HEADER %-40.40s%-9.9s %-4.4s", keywords, date, db.name()).str(), truncate_at);
return FixStringLength(std::format("HEADER %-40.40s%-9.9s %-4.4s", keywords, date, db.name()), truncate_at);
}
std::string get_COMPND_line(const datablock &db, std::string::size_type truncate_at)
@@ -3788,7 +3782,7 @@ void write(std::ostream &os, const datablock &db)
numXform = WriteCoordinateTransformation(os, db);
std::tie(numCoord, numTer) = WriteCoordinate(os, db);
os << cif::format("MASTER %5d 0%5d%5d%5d%5d%5d%5d%5d%5d%5d%5d", numRemark, numHet, numHelix, numSheet, numTurn, numSite, numXform, numCoord, numTer, numConect, numSeq) << '\n'
os << std::format("MASTER %5d 0%5d%5d%5d%5d%5d%5d%5d%5d%5d%5d", numRemark, numHet, numHelix, numSheet, numTurn, numSite, numXform, numCoord, numTer, numConect, numSeq) << '\n'
<< "END\n";
}

View File

@@ -895,12 +895,7 @@ class PDBFileParser
if (year < 1950)
year += 100;
std::stringstream ss;
ss << std::setw(4) << std::setfill('0') << year << '-'
<< std::setw(2) << std::setfill('0') << month << '-'
<< std::setw(2) << std::setfill('0') << day;
s = ss.str();
s = std::format("{:04}-{:02}-{:02}", year, month, day);
}
else if (regex_match(s, m, rx2))
{
@@ -912,7 +907,7 @@ class PDBFileParser
if (year < 1950)
year += 100;
s = cif::format("%04d-%02d", year, month).str();
s = std::format("{:04}-{:02}", year, month);
}
else
ec = error::make_error_code(error::pdbErrors::invalidDate);
@@ -3341,18 +3336,18 @@ void PDBFileParser::ParseRemark350()
{ "type", type },
// { "name", "" },
// { "symmetryOperation", "" },
{ "matrix[1][1]", cif::format("%12.10f", mat[0]).str() },
{ "matrix[1][2]", cif::format("%12.10f", mat[1]).str() },
{ "matrix[1][3]", cif::format("%12.10f", mat[2]).str() },
{ "vector[1]", cif::format("%12.10f", vec[0]).str() },
{ "matrix[2][1]", cif::format("%12.10f", mat[3]).str() },
{ "matrix[2][2]", cif::format("%12.10f", mat[4]).str() },
{ "matrix[2][3]", cif::format("%12.10f", mat[5]).str() },
{ "vector[2]", cif::format("%12.10f", vec[1]).str() },
{ "matrix[3][1]", cif::format("%12.10f", mat[6]).str() },
{ "matrix[3][2]", cif::format("%12.10f", mat[7]).str() },
{ "matrix[3][3]", cif::format("%12.10f", mat[8]).str() },
{ "vector[3]", cif::format("%12.10f", vec[2]).str() }
{ "matrix[1][1]", std::format("%12.10f", mat[0]) },
{ "matrix[1][2]", std::format("%12.10f", mat[1]) },
{ "matrix[1][3]", std::format("%12.10f", mat[2]) },
{ "vector[1]", std::format("%12.10f", vec[0]) },
{ "matrix[2][1]", std::format("%12.10f", mat[3]) },
{ "matrix[2][2]", std::format("%12.10f", mat[4]) },
{ "matrix[2][3]", std::format("%12.10f", mat[5]) },
{ "vector[2]", std::format("%12.10f", vec[1]) },
{ "matrix[3][1]", std::format("%12.10f", mat[6]) },
{ "matrix[3][2]", std::format("%12.10f", mat[7]) },
{ "matrix[3][3]", std::format("%12.10f", mat[8]) },
{ "vector[3]", std::format("%12.10f", vec[2]) }
});
// clang-format on
@@ -5850,7 +5845,7 @@ void PDBFileParser::ParseCoordinate(int modelNr)
auto f = [](float f) -> std::string
{
return cif::format("%6.4f", f).str();
return std::format("%6.4f", f);
};
// clang-format off
@@ -6406,7 +6401,7 @@ file read(std::istream &is)
if (std::isalpha(ch) and std::toupper(ch) != 'D')
{
read_pdb_file(is, result);
reconstruct_pdbx(result);
fixup_pdbx(result);
}
else
{

View File

@@ -138,8 +138,8 @@ void checkEntities(datablock &db)
auto compound = cf.create(comp_id);
if (compound)
formula_weight += compound->formula_weight();
else if (cif::VERBOSE > 0)
std::clog << "missing information for compound " + comp_id << '\n';
// else if (cif::VERBOSE > 0)
// std::clog << "missing information for compound " + comp_id << '\n';
++n;
}
@@ -156,8 +156,8 @@ void checkEntities(datablock &db)
auto compound = cf.create(comp_id);
if (compound)
formula_weight += compound->formula_weight();
else if (cif::VERBOSE > 0)
std::clog << "missing information for compound " + comp_id << '\n';
// else if (cif::VERBOSE > 0)
// std::clog << "missing information for compound " + comp_id << '\n';
++n;
}
@@ -171,7 +171,7 @@ void checkEntities(datablock &db)
auto compound = cf.create(*comp_id);
if (not compound)
{
std::cerr << "missing information for compound " << *comp_id << "\n";
// std::cerr << "missing information for compound " << *comp_id << "\n";
continue;
}
formula_weight = compound->formula_weight();
@@ -484,7 +484,8 @@ void checkAtomRecords(datablock &db)
if (not compound)
{
missingCompounds.insert(comp_id);
std::cerr << "Missing compound information for " << comp_id << "\n";
// if (cif::VERBOSE > 0)
// std::cerr << "Missing compound information for " << comp_id << "\n";
continue;
}
@@ -628,13 +629,13 @@ void checkAtomAnisotropRecords(datablock &db)
if (row["pdbx_auth_alt_id"].empty() and not parent["pdbx_auth_alt_id"].empty())
row["pdbx_auth_alt_id"] = parent["pdbx_auth_alt_id"].text();
if (row["pdbx_label_seq_id"].empty() and not parent["pdbx_label_seq_id"].empty())
if (row["pdbx_label_seq_id"].empty() and not parent["label_seq_id"].empty())
row["pdbx_label_seq_id"] = parent["label_seq_id"].text();
if (row["pdbx_label_asym_id"].empty() and not parent["pdbx_label_asym_id"].empty())
if (row["pdbx_label_asym_id"].empty() and not parent["label_asym_id"].empty())
row["pdbx_label_asym_id"] = parent["label_asym_id"].text();
if (row["pdbx_label_atom_id"].empty() and not parent["pdbx_label_atom_id"].empty())
if (row["pdbx_label_atom_id"].empty() and not parent["label_atom_id"].empty())
row["pdbx_label_atom_id"] = parent["label_atom_id"].text();
if (row["pdbx_label_comp_id"].empty() and not parent["pdbx_label_comp_id"].empty())
if (row["pdbx_label_comp_id"].empty() and not parent["label_comp_id"].empty())
row["pdbx_label_comp_id"] = parent["label_comp_id"].text();
// if (row["pdbx_PDB_model_num"].empty() and not parent["pdbx_PDB_model_num"].empty())
// row["pdbx_PDB_model_num"] = parent["pdbx_PDB_model_num"].text();
@@ -1167,7 +1168,7 @@ void createPdbxNonpolyScheme(datablock &db)
for (int ndb_nr = 1; auto row : atom_site.find("label_entity_id"_key == entity_id and "label_comp_id"_key == comp_id))
{
// Skip existing records
auto linked = atom_site.get_linked(row, pdbx_nonpoly_scheme);
auto linked = atom_site.get_children(row, pdbx_nonpoly_scheme);
if (not linked.empty())
continue;
@@ -1242,6 +1243,101 @@ void createPdbxBranchScheme(datablock &db)
}
}
void reconstruct_index_for_category(const validator &validator, category &cat, datablock &db)
{
auto cv = validator.get_validator_for_category(cat.name());
enum class State
{
Start,
MissingKeys,
DuplicateKeys
} state = State::Start;
for (;;)
{
// See if we can build an index
try
{
cat.set_validator(&validator, db);
}
catch (const missing_key_error &ex)
{
if (state == State::MissingKeys)
{
if (cif::VERBOSE > 0)
std::clog << "Repairing failed for category " << cat.name() << ", missing keys remain: " << ex.what() << '\n';
throw;
}
state = State::MissingKeys;
auto key = ex.get_key();
if (cif::VERBOSE > 1)
std::clog << "Need to add key " << key << " to category " << cat.name() << '\n';
for (auto row : cat)
{
auto ord = row.get<std::string>(key.c_str());
if (ord.empty())
row.assign({ //
{ key, cat.get_unique_value(key) } });
}
continue;
}
catch (const duplicate_key_error &ex)
{
if (state == State::DuplicateKeys)
{
if (cif::VERBOSE > 0)
std::clog << "Repairing failed for category " << cat.name() << ", duplicate keys remain: " << ex.what() << '\n';
throw;
}
state = State::DuplicateKeys;
if (cif::VERBOSE > 0)
std::clog << "Attempt to fix " << cat.name() << " failed: " << ex.what() << '\n';
// replace items that do not define a relation to a parent
std::set<std::string> replaceableKeys;
for (auto key : cv->m_keys)
{
bool replaceable = true;
for (auto lv : validator.get_links_for_child(cat.name()))
{
if (find(lv->m_child_keys.begin(), lv->m_child_keys.end(), key) != lv->m_child_keys.end())
{
replaceable = false;
break;
}
}
if (replaceable)
replaceableKeys.insert(key);
}
if (replaceableKeys.empty())
throw std::runtime_error("Cannot repair category " + cat.name() + " since it contains duplicate keys that cannot be replaced");
for (auto key : replaceableKeys)
{
for (auto row : cat)
row.assign(key, cat.get_unique_value(key), false, false);
}
continue;
}
break;
}
}
bool reconstruct_pdbx(file &file)
{
if (file.empty())
@@ -1408,95 +1504,7 @@ bool reconstruct_pdbx(file &file, const validator &validator)
}
}
enum class State
{
Start,
MissingKeys,
DuplicateKeys
} state = State::Start;
for (;;)
{
// See if we can build an index
try
{
cat.set_validator(&validator, db);
}
catch (const missing_key_error &ex)
{
if (state == State::MissingKeys)
{
if (cif::VERBOSE > 0)
std::clog << "Repairing failed for category " << cat.name() << ", missing keys remain: " << ex.what() << '\n';
throw;
}
state = State::MissingKeys;
auto key = ex.get_key();
if (cif::VERBOSE > 0)
std::clog << "Need to add key " << key << " to category " << cat.name() << '\n';
for (auto row : cat)
{
auto ord = row.get<std::string>(key.c_str());
if (ord.empty())
row.assign({ //
{ key, cat.get_unique_value(key) } });
}
continue;
}
catch (const duplicate_key_error &ex)
{
if (state == State::DuplicateKeys)
{
if (cif::VERBOSE > 0)
std::clog << "Repairing failed for category " << cat.name() << ", duplicate keys remain: " << ex.what() << '\n';
throw;
}
state = State::DuplicateKeys;
if (cif::VERBOSE > 0)
std::clog << "Attempt to fix " << cat.name() << " failed: " << ex.what() << '\n';
// replace items that do not define a relation to a parent
std::set<std::string> replaceableKeys;
for (auto key : cv->m_keys)
{
bool replaceable = true;
for (auto lv : validator.get_links_for_child(cat.name()))
{
if (find(lv->m_child_keys.begin(), lv->m_child_keys.end(), key) != lv->m_child_keys.end())
{
replaceable = false;
break;
}
}
if (replaceable)
replaceableKeys.insert(key);
}
if (replaceableKeys.empty())
throw std::runtime_error("Cannot repair category " + cat.name() + " since it contains duplicate keys that cannot be replaced");
for (auto key : replaceableKeys)
{
for (auto row : cat)
row.assign(key, cat.get_unique_value(key), false, false);
}
continue;
}
break;
}
reconstruct_index_for_category(validator, cat, db);
}
catch (const std::exception &ex)
{
@@ -1537,7 +1545,7 @@ bool reconstruct_pdbx(file &file, const validator &validator)
if (auto cat = db.get("ndb_poly_seq_scheme"); cat == nullptr or cat->empty())
comparePolySeqSchemes(db);
createPdbxNonpolyScheme(db);
// Create a minimal set of branch records
@@ -1577,11 +1585,16 @@ void fixup_pdbx(file &file, const validator &validator)
// assuming the first datablock contains the entry ...
auto &db = file.front();
if (auto cat = db.get("atom_site"); cat == nullptr or cat->empty())
throw std::runtime_error("Cannot reconstruct PDBx file, atom data missing");
// ... and any additional datablock will contain compound information
cif::compound_source cs(file);
if (auto cat = db.get("atom_site"); cat == nullptr or cat->empty())
throw std::runtime_error("Cannot reconstruct PDBx file, atom data missing");
// Be silent about missing compound info in fixup
auto &cf = compound_factory::instance();
bool save_report = cf.get_report_missing();
cf.set_report_missing(cif::VERBOSE > 1);
std::string entry_id;
@@ -1610,11 +1623,24 @@ void fixup_pdbx(file &file, const validator &validator)
if (not db["atom_site"].find_first(key("label_entity_id") != null))
createEntityIDs(db);
// Now see if atom records make sense at all
// Now see if atom records make sense at all, but in a silent way, this time
checkAtomRecords(db);
db["chem_comp"].reorder_by_index();
// See if we can easily reconstruct missing data fields in order to create an index
for (auto &cat : db)
{
try
{
cat.set_validator(&validator, db);
}
catch (const missing_key_error &)
{
reconstruct_index_for_category(validator, cat, db);
}
}
db.set_validator(&validator);
// Now create any missing categories
@@ -1630,7 +1656,7 @@ void fixup_pdbx(file &file, const validator &validator)
if (auto cat = db.get("ndb_poly_seq_scheme"); cat == nullptr or cat->empty())
comparePolySeqSchemes(db);
createPdbxNonpolyScheme(db);
// Create a minimal set of branch records
@@ -1640,6 +1666,7 @@ void fixup_pdbx(file &file, const validator &validator)
checkEntities(db);
// That's it
cf.set_report_missing(save_report);
}
} // namespace cif::pdb

View File

@@ -61,8 +61,6 @@ condition get_parents_condition(const validator &validator, row_handle rh, const
result = std::move(result) or std::move(cond);
}
}
else if (cif::VERBOSE > 0)
std::cerr << "warning: no child to parent links were found for child " << childName << " and parent " << parentName << '\n';
return result;
}
@@ -92,7 +90,7 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
{
using namespace cif::literals;
bool result = true;
bool result = true, warned_missing_parents = false;
try
{
@@ -129,10 +127,18 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
if (not cf.is_monomer(comp_id))
continue;
auto p = pdbx_poly_seq_scheme.find(get_parents_condition(validator, r, pdbx_poly_seq_scheme));
auto cond = get_parents_condition(validator, r, pdbx_poly_seq_scheme);
if (not cond)
{
if (VERBOSE > 0 and std::exchange(warned_missing_parents, true) == false)
std::cerr << "warning: missing links for atom_site/pdbx_poly_seq_scheme\n";
continue;
}
auto p = pdbx_poly_seq_scheme.find(std::move(cond));
if (p.size() != 1)
{
if (cif::VERBOSE > 0)
if (VERBOSE > 0)
std::clog << "In atom_site record: " << r["id"].text() << '\n';
throw std::runtime_error("For each monomer in atom_site there should be exactly one pdbx_poly_seq_scheme record");
}
@@ -274,7 +280,7 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
if (not seq.has_value())
{
if (cif::VERBOSE > 0)
if (VERBOSE > 0)
std::clog << "Warning: entity_poly has no sequence for entity_id " << entity_id << '\n';
}
else
@@ -287,7 +293,7 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
if (not seq_can.has_value())
{
if (cif::VERBOSE > 1)
if (VERBOSE > 1)
std::clog << "Warning: entity_poly has no canonical sequence for entity_id " << entity_id << '\n';
}
else
@@ -304,7 +310,7 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
catch (const std::exception &ex)
{
result = false;
if (cif::VERBOSE > 0)
if (VERBOSE > 0)
std::clog << ex.what() << '\n';
ec = make_error_code(validation_error::not_valid_pdbx);
}

View File

@@ -36,16 +36,11 @@
// The validator depends on regular expressions. Unfortunately,
// the implementation of std::regex in g++ is buggy and crashes
// on reading the pdbx dictionary. Therefore, in case g++ is used
// the code will use boost::regex instead.
// on reading the pdbx dictionary. We used to use boost regex
// instead but using pcre2 is even easier and faster.
#if USE_BOOST_REGEX
# include <boost/regex.hpp>
using boost::regex;
#else
# include <regex>
using std::regex;
#endif
#define PCRE2_CODE_UNIT_WIDTH 8
#include <pcre2.h>
namespace cif
{
@@ -67,14 +62,67 @@ validation_exception::validation_exception(std::error_code ec, std::string_view
// --------------------------------------------------------------------
struct regex_impl : public regex
// struct regex_impl : public regex
// {
// regex_impl(std::string_view rx)
// : regex(rx.begin(), rx.end(), regex::extended | regex::optimize)
// {
// }
// };
struct regex_impl
{
regex_impl(std::string_view rx)
: regex(rx.begin(), rx.end(), regex::extended | regex::optimize)
{
}
regex_impl(std::string_view rx);
~regex_impl();
regex_impl(const regex_impl &) = delete;
regex_impl &operator=(const regex_impl &) = delete;
bool match(std::string_view v) const;
private:
pcre2_code *m_rx = nullptr;
pcre2_match_data *m_data = nullptr;
};
regex_impl::regex_impl(std::string_view rx)
{
int err_code;
size_t err_offset;
m_rx = pcre2_compile((PCRE2_SPTR)rx.data(), rx.length(), 0, &err_code, &err_offset, nullptr);
if (m_rx == nullptr)
{
PCRE2_UCHAR buffer[256];
int n = pcre2_get_error_message(err_code, buffer, sizeof(buffer));
throw std::runtime_error(std::string("PCRE2 compilation failed: ") + std::string{ (char *)buffer, (char *)buffer + n });
}
m_data = pcre2_match_data_create_from_pattern(m_rx, nullptr);
}
regex_impl::~regex_impl()
{
if (m_data)
pcre2_match_data_free(m_data);
if (m_rx)
pcre2_code_free(m_rx);
}
bool regex_impl::match(std::string_view v) const
{
bool result = false;
if (int rc = pcre2_match(m_rx, (PCRE2_SPTR)v.data(), v.length(), 0, 0, m_data, nullptr); rc >= 0)
result = true;
else if (rc != PCRE2_ERROR_NOMATCH)
std::cerr << "Error matching with pcre\n";
return result;
}
// --------------------------------------------------------------------
DDL_PrimitiveType map_to_primitive_type(std::string_view s, std::error_code &ec) noexcept
@@ -233,7 +281,7 @@ bool item_validator::validate_value(std::string_view value, std::error_code &ec)
if (not value.empty() and value != "?" and value != ".")
{
if (m_type != nullptr and not regex_match(value.begin(), value.end(), *m_type->m_rx))
if (m_type != nullptr and not m_type->m_rx->match(value))
ec = make_error_code(validation_error::value_does_not_match_rx);
else if (not m_enums.empty() and m_enums.count(std::string{ value }) == 0)
ec = make_error_code(validation_error::value_is_not_in_enumeration_list);

View File

@@ -22,8 +22,8 @@ list(
CIFPP_tests
unit-v2
unit-3d
format
model
query
rename-compound
sugar
spinner

View File

@@ -1,17 +1,17 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause
*
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
*
*
* Copyright (c) 2025 NKI/AVL, Netherlands Cancer Institute
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
@@ -24,38 +24,31 @@
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "test-main.hpp"
#include <stdexcept>
#include <cif++.hpp>
// --------------------------------------------------------------------
#include <iostream>
#include <fstream>
TEST_CASE("fmt_1")
TEST_CASE("q-1")
{
std::ostringstream os;
using namespace cif::literals;
std::string world("world");
os << cif::format("Hello, %-10.10s, the magic number is %d and pi is %g", world, 42, cif::kPI);
REQUIRE(os.str() == "Hello, world , the magic number is 42 and pi is 3.14159");
cif::compound_factory::instance().push_dictionary(gTestDir / "REA.cif");
cif::file a = cif::pdb::read(gTestDir / "pdb1cbs.ent.gz");
auto &pdbx_poly_seq_scheme = a.front()["pdbx_poly_seq_scheme"];
REQUIRE_FALSE(pdbx_poly_seq_scheme.empty());
SECTION("s-11")
{
CHECK(pdbx_poly_seq_scheme.count("asym_id"_key == "A") == 137);
CHECK(pdbx_poly_seq_scheme.count("asym_id"_key == "A" and "entity_id"_key == 1 and "seq_id"_key == 1 and "mon_id"_key == "PRO") == 1);
CHECK(pdbx_poly_seq_scheme.count("asym_id"_key == "A" and "entity_id"_key == 1 and "seq_id"_key == 1 and "mon_id"_key == "PRO" and "hetero"_key == false) == 1);
}
REQUIRE(cif::format("Hello, %-10.10s, the magic number is %d and pi is %g", world, 42, cif::kPI).str() ==
"Hello, world , the magic number is 42 and pi is 3.14159");
}
// --------------------------------------------------------------------
TEST_CASE("clr_1")
{
using namespace cif::colour;
std::cout << "Hello, " << cif::coloured("world!", white, red, cif::colour::regular) << '\n'
<< "Hello, " << cif::coloured("world!", white, red, bold) << '\n'
<< "Hello, " << cif::coloured("world!", black, red) << '\n'
<< "Hello, " << cif::coloured("world!", white, green) << '\n'
<< "Hello, " << cif::coloured("world!", white, blue) << '\n'
<< "Hello, " << cif::coloured("world!", blue, white) << '\n'
<< "Hello, " << cif::coloured("world!", red, white, bold) << '\n';
}

View File

@@ -3355,10 +3355,10 @@ _cat_1.id_2
using test_tuple_type = std::tuple<key_type,bool>;
test_tuple_type TESTS[] = {
{ {{"id", 1}, {"id_2", 10}}, true },
{ {{"id_2", 10}, {"id", 1}}, true },
{ {{"id", 1}, {"id_2", 20}}, false },
{ {{"id", 3} }, true },
{ {{"id", "1"}, {"id_2", "10"}}, true },
{ {{"id_2", "10"}, {"id", "1"}}, true },
{ {{"id", "1"}, {"id_2", "20"}}, false },
{ {{"id", "3"} }, true },
};
for (const auto &[key, test] : TESTS)