mirror of
https://github.com/PDB-REDO/libcifpp.git
synced 2026-06-04 22:14:24 +08:00
Compare commits
34 Commits
v9.0.1
...
fast_float
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bc0222dc0e | ||
|
|
10a6b5649b | ||
|
|
743e2800f8 | ||
|
|
32ac884127 | ||
|
|
bec69f7d07 | ||
|
|
a99215ad6a | ||
|
|
e3d2cbd044 | ||
|
|
5fc965789d | ||
|
|
b4596902aa | ||
|
|
cbf8b52f62 | ||
|
|
4e0fa1c916 | ||
|
|
95b007d38f | ||
|
|
b66f7a30ce | ||
|
|
ec7287c503 | ||
|
|
a41c591f0c | ||
|
|
3a6527cdd5 | ||
|
|
5f21a094c0 | ||
|
|
2203a1855d | ||
|
|
7edd2ecc18 | ||
|
|
1d2953c850 | ||
|
|
dbf59ce622 | ||
|
|
1596db8499 | ||
|
|
bd1fb5c5cd | ||
|
|
da500025c3 | ||
|
|
60eeea9a93 | ||
|
|
1220f01f1e | ||
|
|
ad0a34fe98 | ||
|
|
a7425ff1a0 | ||
|
|
1ce25f86ae | ||
|
|
cd93f72b96 | ||
|
|
23500bd303 | ||
|
|
14b4753b4f | ||
|
|
4c37d5db5f | ||
|
|
fc2c4b4172 |
@@ -32,7 +32,7 @@ endif()
|
||||
# set the project name
|
||||
project(
|
||||
libcifpp
|
||||
VERSION 9.0.1
|
||||
VERSION 9.0.4
|
||||
LANGUAGES CXX C)
|
||||
|
||||
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||
@@ -174,10 +174,22 @@ find_file(FMT NAME format)
|
||||
if(FMT EQUAL "FMT-NOTFOUND")
|
||||
if(NOT (fmt_FOUND OR TARGET fmt))
|
||||
find_package(fmt REQUIRED)
|
||||
message(FATAL_ERROR "fmt not found, compiler too old, you're out of luck")
|
||||
message(FATAL_ERROR "cifpp: <format> not found and neither was libfmt, compiler too old, you're out of luck")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Using fast_float for float parsing, but only if needed
|
||||
# try_compile(STD_CHARCONV_COMPILING
|
||||
# SOURCES ${CMAKE_CURRENT_SOURCE_DIR}/cmake/test-charconv.cpp)
|
||||
|
||||
if(NOT STD_CHARCONV_COMPILING)
|
||||
message(NOTICE "libcifpp: Using fast_float for std::from_chars")
|
||||
FetchContent_Declare(fastfloat
|
||||
GIT_REPOSITORY "https://github.com/fastfloat/fast_float"
|
||||
GIT_TAG v8.0.2)
|
||||
FetchContent_MakeAvailable(fastfloat)
|
||||
endif()
|
||||
|
||||
find_package(Threads)
|
||||
find_package(ZLIB QUIET)
|
||||
|
||||
@@ -189,10 +201,6 @@ include(FindPkgConfig)
|
||||
|
||||
if(PKG_CONFIG_FOUND)
|
||||
pkg_check_modules(PCRE2 IMPORTED_TARGET libpcre2-8)
|
||||
|
||||
if(PCRE2_FOUND)
|
||||
message(STATUS "Using pcre2 found using pkg-config")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(NOT PCRE2_FOUND)
|
||||
@@ -347,6 +355,10 @@ else()
|
||||
target_link_libraries(cifpp PRIVATE $<BUILD_INTERFACE:pcre2s>)
|
||||
endif()
|
||||
|
||||
if(NOT STD_CHARCONV_COMPILING)
|
||||
target_link_libraries(cifpp PUBLIC FastFloat::fast_float)
|
||||
endif()
|
||||
|
||||
if(fmt_FOUND)
|
||||
target_link_libraries(cifpp PUBLIC fmt)
|
||||
endif()
|
||||
|
||||
10
changelog
10
changelog
@@ -1,3 +1,13 @@
|
||||
Version 9.0.4
|
||||
- Fix various stopping and reconstruction errors
|
||||
|
||||
Version 9.0.3
|
||||
- Reconstruction fixed when some entity ids are missing
|
||||
|
||||
Version 9.0.2
|
||||
- Fix code that reconstructs sequences, could throw a map::at
|
||||
- Many optimisations in validation and reconstruction code.
|
||||
|
||||
Version 9.0.1
|
||||
- Use pcre2 from pkg-config if available, if not
|
||||
build a version from the original code.
|
||||
|
||||
17
cmake/test-charconv.cpp
Normal file
17
cmake/test-charconv.cpp
Normal file
@@ -0,0 +1,17 @@
|
||||
#include <charconv>
|
||||
#include <cassert>
|
||||
#include <cstring>
|
||||
|
||||
int main()
|
||||
{
|
||||
float v;
|
||||
char s[] = "1.0";
|
||||
|
||||
auto r = std::from_chars(s, s + strlen(s), v);
|
||||
|
||||
assert(r.ec == std::errc{});
|
||||
assert(r.ptr = s + strlen(s));
|
||||
assert(v == 1.0f);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -53,12 +53,12 @@ namespace cif
|
||||
// --------------------------------------------------------------------
|
||||
/** @brief item is a transient class that is used to pass data into rows
|
||||
* but it also takes care of formatting data.
|
||||
*
|
||||
*
|
||||
*
|
||||
*
|
||||
*
|
||||
*
|
||||
* The class cif::item is often used implicitly when creating a row in a category
|
||||
* using the emplace function.
|
||||
*
|
||||
*
|
||||
* @code{.cpp}
|
||||
* cif::category cat("my-cat");
|
||||
* cat.emplace({
|
||||
@@ -68,12 +68,12 @@ namespace cif
|
||||
* { "item-4", std::make_optional<int>(42) }, // <- stores an item with value 42
|
||||
* { "item-5" } // <- stores an item with value .
|
||||
* });
|
||||
*
|
||||
*
|
||||
* std::cout << cat << '\n';
|
||||
* @endcode
|
||||
*
|
||||
*
|
||||
* Will result in:
|
||||
*
|
||||
*
|
||||
* @code{.txt}
|
||||
* _my-cat.item-1 1
|
||||
* _my-cat.item-2 1.00
|
||||
@@ -176,7 +176,7 @@ class item
|
||||
|
||||
/// \brief constructor for an item with name \a name and as
|
||||
/// content value \a value
|
||||
template<typename T, std::enable_if_t<std::is_same_v<T, std::string>, int> = 0>
|
||||
template <typename T, std::enable_if_t<std::is_same_v<T, std::string>, int> = 0>
|
||||
item(const std::string_view name, T &&value)
|
||||
: m_name(name)
|
||||
, m_value(std::move(value))
|
||||
@@ -221,8 +221,8 @@ class item
|
||||
item &operator=(item &&rhs) noexcept = default;
|
||||
/** @endcond */
|
||||
|
||||
std::string_view name() const { return m_name; } ///< Return the name of the item
|
||||
std::string_view value() const & { return m_value; } ///< Return the value of the item
|
||||
std::string_view name() const { return m_name; } ///< Return the name of the item
|
||||
std::string_view value() const & { return m_value; } ///< Return the value of the item
|
||||
std::string value() const && { return std::move(m_value); } ///< Return the value of the item
|
||||
|
||||
/// \brief replace the content of the stored value with \a v
|
||||
@@ -560,7 +560,9 @@ struct item_handle::item_value_as<T, std::enable_if_t<std::is_arithmetic_v<T> an
|
||||
auto b = txt.data();
|
||||
auto e = txt.data() + txt.size();
|
||||
|
||||
std::from_chars_result r = (b + 1 < e and *b == '+' and std::isdigit(b[1])) ? selected_charconv<value_type>::from_chars(b + 1, e, result) : selected_charconv<value_type>::from_chars(b, e, result);
|
||||
std::from_chars_result r = (b + 1 < e and *b == '+' and std::isdigit(b[1])) //
|
||||
? from_chars(b + 1, e, result)
|
||||
: from_chars(b, e, result);
|
||||
|
||||
if ((bool)r.ec or r.ptr != e)
|
||||
{
|
||||
@@ -595,7 +597,9 @@ struct item_handle::item_value_as<T, std::enable_if_t<std::is_arithmetic_v<T> an
|
||||
auto b = txt.data();
|
||||
auto e = txt.data() + txt.size();
|
||||
|
||||
std::from_chars_result r = (b + 1 < e and *b == '+' and std::isdigit(b[1])) ? selected_charconv<value_type>::from_chars(b + 1, e, v) : selected_charconv<value_type>::from_chars(b, e, v);
|
||||
std::from_chars_result r = (b + 1 < e and *b == '+' and std::isdigit(b[1]))
|
||||
? from_chars(b + 1, e, v)
|
||||
: from_chars(b, e, v);
|
||||
|
||||
if ((bool)r.ec or r.ptr != e)
|
||||
{
|
||||
|
||||
@@ -1065,6 +1065,16 @@ class structure
|
||||
/// \param atom The set of item data containing the data for the atoms.
|
||||
void create_water(row_initializer atom);
|
||||
|
||||
/// \brief Create a link, a struct_conn record for two atoms.
|
||||
///
|
||||
/// \param a1 Atom 1
|
||||
/// \param a2 Atom 2
|
||||
/// \param link_type The struct_conn_type ID for the link
|
||||
/// \param role The pdbx_role field value
|
||||
/// \return The ID of the struct_conn record created
|
||||
|
||||
std::string create_link(atom a1, atom a2, const std::string &link_type, const std::string &role);
|
||||
|
||||
/// \brief Create a new and empty (sugar) branch
|
||||
branch &create_branch();
|
||||
|
||||
|
||||
@@ -355,279 +355,35 @@ std::string cif_id_for_number(int number);
|
||||
std::vector<std::string> word_wrap(const std::string &text, std::size_t width);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
/// \brief std::from_chars for floating point types.
|
||||
///
|
||||
/// These are optional, there's a selected_charconv class below that selects
|
||||
/// the best option to use based on support by the stl library.
|
||||
///
|
||||
/// I.e. that in case of GNU < 12 (or something) the cif implementation will
|
||||
/// be used, all other cases will use the stl version.
|
||||
|
||||
template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
|
||||
std::from_chars_result from_chars(const char *first, const char *last, FloatType &value)
|
||||
{
|
||||
std::from_chars_result result{ first, {} };
|
||||
|
||||
enum State
|
||||
{
|
||||
IntegerSign,
|
||||
Integer,
|
||||
Fraction,
|
||||
ExponentSign,
|
||||
Exponent
|
||||
} state = IntegerSign;
|
||||
int sign = 1;
|
||||
unsigned long long vi = 0;
|
||||
int fl = 0, tz = 0;
|
||||
int exponent_sign = 1;
|
||||
int exponent = 0;
|
||||
bool done = false;
|
||||
|
||||
while (not done and not (bool)result.ec)
|
||||
{
|
||||
char ch = result.ptr != last ? *result.ptr : 0;
|
||||
++result.ptr;
|
||||
|
||||
switch (state)
|
||||
{
|
||||
case IntegerSign:
|
||||
if (ch == '-')
|
||||
{
|
||||
sign = -1;
|
||||
state = Integer;
|
||||
}
|
||||
else if (ch == '+')
|
||||
state = Integer;
|
||||
else if (ch >= '0' and ch <= '9')
|
||||
{
|
||||
vi = ch - '0';
|
||||
state = Integer;
|
||||
}
|
||||
else if (ch == '.')
|
||||
state = Fraction;
|
||||
else
|
||||
result.ec = std::errc::invalid_argument;
|
||||
break;
|
||||
|
||||
case Integer:
|
||||
if (ch >= '0' and ch <= '9')
|
||||
vi = 10 * vi + (ch - '0');
|
||||
else if (ch == 'e' or ch == 'E')
|
||||
state = ExponentSign;
|
||||
else if (ch == '.')
|
||||
state = Fraction;
|
||||
else
|
||||
{
|
||||
done = true;
|
||||
--result.ptr;
|
||||
}
|
||||
break;
|
||||
|
||||
case Fraction:
|
||||
if (ch >= '0' and ch <= '9')
|
||||
{
|
||||
vi = 10 * vi + (ch - '0');
|
||||
|
||||
if (ch == '0')
|
||||
tz += 1;
|
||||
else
|
||||
{
|
||||
fl += tz + 1;
|
||||
tz = 0;
|
||||
}
|
||||
}
|
||||
else if (ch == 'e' or ch == 'E')
|
||||
state = ExponentSign;
|
||||
else
|
||||
{
|
||||
done = true;
|
||||
--result.ptr;
|
||||
}
|
||||
break;
|
||||
|
||||
case ExponentSign:
|
||||
if (ch == '-')
|
||||
{
|
||||
exponent_sign = -1;
|
||||
state = Exponent;
|
||||
}
|
||||
else if (ch == '+')
|
||||
state = Exponent;
|
||||
else if (ch >= '0' and ch <= '9')
|
||||
{
|
||||
exponent = ch - '0';
|
||||
state = Exponent;
|
||||
}
|
||||
else
|
||||
result.ec = std::errc::invalid_argument;
|
||||
break;
|
||||
|
||||
case Exponent:
|
||||
if (ch >= '0' and ch <= '9')
|
||||
exponent = 10 * exponent + (ch - '0');
|
||||
else
|
||||
{
|
||||
done = true;
|
||||
--result.ptr;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (not (bool)result.ec)
|
||||
{
|
||||
while (tz-- > 0)
|
||||
vi /= 10;
|
||||
|
||||
long double v = std::pow(10, -fl) * vi * sign;
|
||||
if (exponent != 0)
|
||||
v *= std::pow(10, exponent * exponent_sign);
|
||||
|
||||
if (std::isnan(v))
|
||||
result.ec = std::errc::invalid_argument;
|
||||
else if (std::abs(v) > std::numeric_limits<FloatType>::max())
|
||||
result.ec = std::errc::result_out_of_range;
|
||||
|
||||
value = static_cast<FloatType>(v);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// \brief duplication of std::chars_format for deficient STL implementations
|
||||
enum class chars_format
|
||||
{
|
||||
scientific = 1,
|
||||
fixed = 2,
|
||||
// hex,
|
||||
general = fixed | scientific
|
||||
};
|
||||
|
||||
/// \brief a simplistic implementation of std::to_chars for old STL implementations
|
||||
template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
|
||||
std::to_chars_result to_chars(char *first, char *last, FloatType &value, chars_format fmt)
|
||||
{
|
||||
int size = static_cast<int>(last - first);
|
||||
int r = 0;
|
||||
|
||||
switch (fmt)
|
||||
{
|
||||
case chars_format::scientific:
|
||||
if constexpr (std::is_same_v<FloatType, long double>)
|
||||
r = snprintf(first, last - first, "%le", value);
|
||||
else
|
||||
r = snprintf(first, last - first, "%e", value);
|
||||
break;
|
||||
|
||||
case chars_format::fixed:
|
||||
if constexpr (std::is_same_v<FloatType, long double>)
|
||||
r = snprintf(first, last - first, "%lf", value);
|
||||
else
|
||||
r = snprintf(first, last - first, "%f", value);
|
||||
break;
|
||||
|
||||
case chars_format::general:
|
||||
if constexpr (std::is_same_v<FloatType, long double>)
|
||||
r = snprintf(first, last - first, "%lg", value);
|
||||
else
|
||||
r = snprintf(first, last - first, "%g", value);
|
||||
break;
|
||||
}
|
||||
|
||||
std::to_chars_result result;
|
||||
if (r < 0 or r >= size)
|
||||
result = { first, std::errc::value_too_large };
|
||||
else
|
||||
result = { first + r, std::errc() };
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// \brief a simplistic implementation of std::to_chars for old STL implementations
|
||||
template <typename FloatType, std::enable_if_t<std::is_floating_point_v<FloatType>, int> = 0>
|
||||
std::to_chars_result to_chars(char *first, char *last, FloatType &value, chars_format fmt, int precision)
|
||||
{
|
||||
int size = static_cast<int>(last - first);
|
||||
int r = 0;
|
||||
|
||||
switch (fmt)
|
||||
{
|
||||
case chars_format::scientific:
|
||||
if constexpr (std::is_same_v<FloatType, long double>)
|
||||
r = snprintf(first, last - first, "%.*le", precision, value);
|
||||
else
|
||||
r = snprintf(first, last - first, "%.*e", precision, value);
|
||||
break;
|
||||
|
||||
case chars_format::fixed:
|
||||
if constexpr (std::is_same_v<FloatType, long double>)
|
||||
r = snprintf(first, last - first, "%.*lf", precision, value);
|
||||
else
|
||||
r = snprintf(first, last - first, "%.*f", precision, value);
|
||||
break;
|
||||
|
||||
case chars_format::general:
|
||||
if constexpr (std::is_same_v<FloatType, long double>)
|
||||
r = snprintf(first, last - first, "%.*lg", precision, value);
|
||||
else
|
||||
r = snprintf(first, last - first, "%.*g", precision, value);
|
||||
break;
|
||||
}
|
||||
|
||||
std::to_chars_result result;
|
||||
if (r < 0 or r >= size)
|
||||
result = { first, std::errc::value_too_large };
|
||||
else
|
||||
result = { first + r, std::errc() };
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/// \brief class that uses our implementation of std::from_chars and std::to_chars
|
||||
template <typename T>
|
||||
struct my_charconv
|
||||
{
|
||||
/// @brief Simply call our version of std::from_chars
|
||||
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
|
||||
{
|
||||
return cif::from_chars(a, b, d);
|
||||
}
|
||||
using from_chars_function = decltype(std::from_chars(std::declval<const char *>(), std::declval<const char *>(), std::declval<T &>()));
|
||||
|
||||
/// @brief Simply call our version of std::to_chars
|
||||
static std::to_chars_result to_chars(char *first, char *last, T &value, chars_format fmt)
|
||||
{
|
||||
return cif::to_chars(first, last, value, fmt);
|
||||
}
|
||||
};
|
||||
|
||||
/// \brief class that uses the STL implementation of std::from_chars and std::to_chars
|
||||
template <typename T>
|
||||
struct std_charconv
|
||||
{
|
||||
/// @brief Simply call std::from_chars
|
||||
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
|
||||
{
|
||||
return std::from_chars(a, b, d);
|
||||
}
|
||||
|
||||
/// @brief Simply call std::to_chars
|
||||
static std::to_chars_result to_chars(char *first, char *last, T &value, chars_format fmt)
|
||||
{
|
||||
return std::to_chars(first, last, value, fmt);
|
||||
}
|
||||
};
|
||||
|
||||
/// \brief helper to find a from_chars function
|
||||
template <typename T>
|
||||
using from_chars_function = decltype(std::from_chars(std::declval<const char *>(), std::declval<const char *>(), std::declval<T &>()));
|
||||
template <typename T, typename = void>
|
||||
struct ff_charconv;
|
||||
|
||||
/**
|
||||
* @brief Helper to select the best implementation of charconv based on availability of the
|
||||
* function in the std:: namespace
|
||||
*
|
||||
* @tparam T The type for which we want to find a from_chars/to_chars function
|
||||
*/
|
||||
template <typename T>
|
||||
using selected_charconv = typename std::conditional_t<std_experimental::is_detected_v<from_chars_function, T>, std_charconv<T>, my_charconv<T>>;
|
||||
struct ff_charconv<T, typename std::enable_if_t<std::is_floating_point_v<T>>>
|
||||
{
|
||||
static std::from_chars_result from_chars(const char *a, const char *b, T &v);
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
using charconv = typename std::conditional_t<std_experimental::is_detected_v<from_chars_function, T>, std_charconv<T>, ff_charconv<T>>;
|
||||
|
||||
template <typename T>
|
||||
constexpr auto from_chars(const char *s, const char *e, T &v)
|
||||
{
|
||||
return charconv<T>::from_chars(s, e, v);
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
@@ -296,6 +296,11 @@ class progress_bar
|
||||
*/
|
||||
void message(const std::string &inMessage);
|
||||
|
||||
/**
|
||||
* @brief Flush the progress bar to the output stream
|
||||
*/
|
||||
void flush();
|
||||
|
||||
private:
|
||||
progress_bar(const progress_bar &) = delete;
|
||||
progress_bar &operator=(const progress_bar &) = delete;
|
||||
|
||||
@@ -784,7 +784,7 @@ void compound_factory::report_missing_compound(std::string_view compound_id)
|
||||
<< "in /var/cache/libcifpp using the following commands:\n\n"
|
||||
<< "curl -o " << CACHE_DIR << "/components.cif https://files.wwpdb.org/pub/pdb/data/monomers/components.cif\n"
|
||||
<< "curl -o " << CACHE_DIR << "/mmcif_pdbx.dic https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic\n"
|
||||
<< "curl -o " << CACHE_DIR << "/mmcif_ma.dic https://github.com/ihmwg/ModelCIF/raw/master/dist/mmcif_ma.dic\n\n";
|
||||
<< "curl -o " << CACHE_DIR << "/mmcif_ma.dic https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_ma.dic\n\n";
|
||||
#endif
|
||||
|
||||
if (m_impl)
|
||||
|
||||
@@ -25,8 +25,11 @@
|
||||
*/
|
||||
|
||||
#include "cif++/datablock.hpp"
|
||||
|
||||
#include "cif++/validate.hpp"
|
||||
|
||||
#include <exception>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
@@ -42,7 +45,16 @@ datablock::datablock(const datablock &db)
|
||||
void datablock::load_dictionary()
|
||||
{
|
||||
if (auto *audit_conform = get("audit_conform"); audit_conform and not audit_conform->empty())
|
||||
set_validator(&validator_factory::instance().get(*audit_conform));
|
||||
{
|
||||
try
|
||||
{
|
||||
set_validator(&validator_factory::instance().get(*audit_conform));
|
||||
}
|
||||
catch (const std::exception &ex)
|
||||
{
|
||||
std::clog << ex.what() << '\n';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void datablock::set_validator(const validator *v)
|
||||
@@ -96,7 +108,8 @@ bool datablock::strip()
|
||||
bool result = true;
|
||||
|
||||
// remove all categories that have no validator
|
||||
erase(std::remove_if(begin(), end(), [](category &c) {
|
||||
erase(std::remove_if(begin(), end(), [](category &c)
|
||||
{
|
||||
bool result = false;
|
||||
if (c.get_cat_validator() == nullptr)
|
||||
{
|
||||
@@ -104,8 +117,8 @@ bool datablock::strip()
|
||||
std::clog << "Dropping category " << c.name() << '\n';
|
||||
result = true;
|
||||
}
|
||||
return result;
|
||||
}), end());
|
||||
return result; }),
|
||||
end());
|
||||
|
||||
// then strip the remaining categories
|
||||
for (auto &cat : *this)
|
||||
|
||||
@@ -28,6 +28,9 @@
|
||||
#include "cif++/dictionary_parser.hpp"
|
||||
#include "cif++/file.hpp"
|
||||
#include "cif++/parser.hpp"
|
||||
#include <exception>
|
||||
#include <iomanip>
|
||||
#include <stdexcept>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
@@ -46,7 +49,7 @@ class dictionary_parser : public parser
|
||||
void load_dictionary()
|
||||
{
|
||||
std::unique_ptr<datablock> dict;
|
||||
auto savedDatablock = m_datablock;
|
||||
auto savedDatablock = std::exchange(m_datablock, nullptr);
|
||||
|
||||
try
|
||||
{
|
||||
@@ -75,6 +78,9 @@ class dictionary_parser : public parser
|
||||
error(ex.what());
|
||||
}
|
||||
|
||||
if (m_datablock == nullptr)
|
||||
throw std::runtime_error("Dictionary file is empty?");
|
||||
|
||||
// store all validators
|
||||
for (auto &ic : mCategoryValidators)
|
||||
m_validator.add_category_validator(std::move(ic));
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
|
||||
#include <filesystem>
|
||||
#include <fstream>
|
||||
#include <initializer_list>
|
||||
#include <iomanip>
|
||||
#include <numeric>
|
||||
#include <stack>
|
||||
@@ -1275,28 +1276,28 @@ void structure::load_atoms_for_model(structure_open_options options)
|
||||
else
|
||||
{
|
||||
std::vector<cif::mm::atom> atoms;
|
||||
std::map<std::tuple<std::string,int>, std::map<std::string, float>> alts;
|
||||
|
||||
std::map<std::tuple<std::string, int>, std::map<std::string, float>> alts;
|
||||
|
||||
for (auto id : atom_site.find<std::string>(std::move(c), "id"))
|
||||
{
|
||||
auto a = atoms.emplace_back(std::make_shared<atom::atom_impl>(m_db, id));
|
||||
|
||||
|
||||
if (a.is_alternate())
|
||||
{
|
||||
auto key = std::make_tuple(a.get_label_asym_id(), a.get_label_seq_id());
|
||||
auto alt_id = a.get_label_alt_id();
|
||||
|
||||
|
||||
if (auto i = alts.find(key); i != alts.end())
|
||||
i->second[alt_id] += a.get_occupancy();
|
||||
else
|
||||
alts[key][alt_id] = a.get_occupancy();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
for (auto &&[key, value] : alts)
|
||||
{
|
||||
// const auto &[asym_id, seq_id] = key;
|
||||
|
||||
|
||||
// select highest occupancy for this residue's alternates
|
||||
std::string alt_id;
|
||||
float occupancy = options.occupancy_mode == occupancy_policy::MAX ? 0.f : std::numeric_limits<float>::max();
|
||||
@@ -1319,11 +1320,11 @@ void structure::load_atoms_for_model(structure_open_options options)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
value.clear();
|
||||
value.emplace(alt_id, occupancy);
|
||||
}
|
||||
|
||||
|
||||
for (auto a : atoms)
|
||||
{
|
||||
if (a.is_alternate())
|
||||
@@ -1335,10 +1336,8 @@ void structure::load_atoms_for_model(structure_open_options options)
|
||||
}
|
||||
else
|
||||
emplace_atom(a);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void structure::load_data()
|
||||
@@ -1898,13 +1897,12 @@ void structure::swap_atoms(atom a1, atom a2)
|
||||
auto r1 = atomSites.find1(key("id") == a1.id());
|
||||
auto r2 = atomSites.find1(key("id") == a2.id());
|
||||
|
||||
auto l1 = r1["label_atom_id"];
|
||||
auto l2 = r2["label_atom_id"];
|
||||
l1.swap(l2);
|
||||
|
||||
auto l3 = r1["auth_atom_id"];
|
||||
auto l4 = r2["auth_atom_id"];
|
||||
l3.swap(l4);
|
||||
for (std::string fld : std::initializer_list<std::string>{ "label_atom_id", "auth_atom_id", "type_symbol" })
|
||||
{
|
||||
auto l1 = r1[fld];
|
||||
auto l2 = r2[fld];
|
||||
l1.swap(l2);
|
||||
}
|
||||
}
|
||||
catch (const std::exception &ex)
|
||||
{
|
||||
@@ -2412,6 +2410,61 @@ void structure::create_water(row_initializer atom)
|
||||
});
|
||||
}
|
||||
|
||||
std::string structure::create_link(atom a1, atom a2, const std::string &link_type, const std::string &role)
|
||||
{
|
||||
using namespace literals;
|
||||
|
||||
auto &struct_conn = m_db["struct_conn"];
|
||||
auto &struct_conn_type = m_db["struct_conn_type"];
|
||||
|
||||
// This will validate link_type :-)
|
||||
if (not struct_conn_type.contains("id"_key == link_type))
|
||||
struct_conn_type.emplace({ { "id", link_type } });
|
||||
|
||||
std::string link_id = struct_conn.get_unique_id(link_type + '_');
|
||||
|
||||
item label_seq_id_1("ptnr1_label_seq_id");
|
||||
if (int nr = a1.get_label_seq_id(); nr != 0)
|
||||
label_seq_id_1.value(std::to_string(nr));
|
||||
|
||||
item label_seq_id_2("ptnr2_label_seq_id");
|
||||
if (int nr = a2.get_label_seq_id(); nr != 0)
|
||||
label_seq_id_2.value(std::to_string(nr));
|
||||
|
||||
struct_conn.emplace(
|
||||
{ //
|
||||
{ "id", link_id },
|
||||
{ "conn_type_id", link_type },
|
||||
{ "pdbx_leaving_atom_flag", "one" },
|
||||
|
||||
{ "ptnr1_label_asym_id", a1.get_label_asym_id() },
|
||||
{ "ptnr1_label_comp_id", a1.get_label_comp_id() },
|
||||
label_seq_id_1,
|
||||
{ "ptnr1_label_atom_id", a1.get_label_atom_id() },
|
||||
{ "pdbx_ptnr1_label_alt_id", a1.get_label_alt_id() },
|
||||
{ "pdbx_ptnr1_PDB_ins_code", a1.get_pdb_ins_code() },
|
||||
{ "ptnr1_auth_asym_id", a1.get_auth_asym_id() },
|
||||
{ "ptnr1_auth_comp_id", a1.get_auth_comp_id() },
|
||||
{ "ptnr1_auth_seq_id", a1.get_auth_seq_id() },
|
||||
{ "ptnr1_symmetry", a1.symmetry() },
|
||||
|
||||
{ "ptnr2_label_asym_id", a2.get_label_asym_id() },
|
||||
{ "ptnr2_label_comp_id", a2.get_label_comp_id() },
|
||||
label_seq_id_2,
|
||||
{ "ptnr2_label_atom_id", a2.get_label_atom_id() },
|
||||
{ "pdbx_ptnr2_label_alt_id", a2.get_label_alt_id() },
|
||||
{ "pdbx_ptnr2_PDB_ins_code", a2.get_pdb_ins_code() },
|
||||
{ "ptnr2_auth_asym_id", a2.get_auth_asym_id() },
|
||||
{ "ptnr2_auth_comp_id", a2.get_auth_comp_id() },
|
||||
{ "ptnr2_auth_seq_id", a2.get_auth_seq_id() },
|
||||
{ "ptnr2_symmetry", a2.symmetry() },
|
||||
|
||||
{ "pdbx_dist_value", distance(a1.get_location(), a2.get_location()), 3 },
|
||||
{ "pdbx_role", role } });
|
||||
|
||||
return link_id;
|
||||
}
|
||||
|
||||
branch &structure::create_branch()
|
||||
{
|
||||
auto &entity = m_db["entity"];
|
||||
@@ -2845,8 +2898,8 @@ static int compare_numbers(std::string_view a, std::string_view b)
|
||||
|
||||
std::from_chars_result ra, rb;
|
||||
|
||||
ra = selected_charconv<double>::from_chars(a.data(), a.data() + a.length(), da);
|
||||
rb = selected_charconv<double>::from_chars(b.data(), b.data() + b.length(), db);
|
||||
ra = from_chars(a.data(), a.data() + a.length(), da);
|
||||
rb = from_chars(b.data(), b.data() + b.length(), db);
|
||||
|
||||
if (not(bool) ra.ec and not(bool) rb.ec)
|
||||
{
|
||||
|
||||
@@ -25,8 +25,12 @@
|
||||
*/
|
||||
|
||||
#include "cif++.hpp"
|
||||
#include "cif++/compound.hpp"
|
||||
#include "cif++/row.hpp"
|
||||
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
namespace cif::pdb
|
||||
@@ -186,6 +190,8 @@ void checkEntities(datablock &db)
|
||||
|
||||
void createEntityIDs(datablock &db)
|
||||
{
|
||||
using namespace literals;
|
||||
|
||||
// Suppose the file does not have entity ID's. We have to make up some
|
||||
|
||||
// walk the atoms. For each auth_asym_id we have a new struct_asym.
|
||||
@@ -197,28 +203,44 @@ void createEntityIDs(datablock &db)
|
||||
// that should cover it
|
||||
|
||||
auto &atom_site = db["atom_site"];
|
||||
auto &entity = db["entity"];
|
||||
auto &cf = compound_factory::instance();
|
||||
|
||||
std::vector<std::vector<residue_key_type>> entities;
|
||||
std::vector<std::vector<row_handle>> entities;
|
||||
|
||||
std::string lastAsymID;
|
||||
int lastSeqID = -1;
|
||||
std::vector<residue_key_type> waters;
|
||||
std::vector<row_handle> waters;
|
||||
|
||||
for (residue_key_type k : atom_site.rows<std::optional<std::string>,
|
||||
std::optional<int>,
|
||||
std::optional<std::string>,
|
||||
std::optional<std::string>,
|
||||
std::optional<int>,
|
||||
std::optional<std::string>>(
|
||||
"auth_asym_id", "auth_seq_id", "auth_comp_id",
|
||||
"label_asym_id", "label_seq_id", "label_comp_id"))
|
||||
int nextEntityID;
|
||||
try
|
||||
{
|
||||
if (entity.empty())
|
||||
nextEntityID = 1;
|
||||
else
|
||||
nextEntityID = entity.find_max<int>("id") + 1;
|
||||
}
|
||||
catch (...)
|
||||
{
|
||||
nextEntityID = 1;
|
||||
}
|
||||
|
||||
for (auto rh : atom_site.find("label_entity_id"_key == cif::null))
|
||||
{
|
||||
residue_key_type k = rh.get<std::optional<std::string>,
|
||||
std::optional<int>,
|
||||
std::optional<std::string>,
|
||||
std::optional<std::string>,
|
||||
std::optional<int>,
|
||||
std::optional<std::string>>(
|
||||
"auth_asym_id", "auth_seq_id", "auth_comp_id",
|
||||
"label_asym_id", "label_seq_id", "label_comp_id");
|
||||
|
||||
std::string comp_id = get_comp_id(k);
|
||||
|
||||
if (cf.is_water(comp_id))
|
||||
{
|
||||
waters.emplace_back(k);
|
||||
waters.emplace_back(rh);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -227,19 +249,20 @@ void createEntityIDs(datablock &db)
|
||||
|
||||
bool is_monomer = cf.is_monomer(comp_id);
|
||||
|
||||
if (lastAsymID == asym_id and lastSeqID == seq_id and not is_monomer)
|
||||
continue;
|
||||
// if (lastAsymID == asym_id and lastSeqID == seq_id and not is_monomer)
|
||||
// continue;
|
||||
|
||||
if (asym_id != lastAsymID or (not is_monomer and lastSeqID != seq_id))
|
||||
entities.push_back({});
|
||||
|
||||
entities.back().emplace_back(k);
|
||||
entities.back().emplace_back(rh);
|
||||
|
||||
lastAsymID = asym_id;
|
||||
lastSeqID = seq_id;
|
||||
}
|
||||
|
||||
std::map<std::size_t, std::string> entity_ids;
|
||||
std::map<std::string, std::string> newEntitiesForCompound;
|
||||
|
||||
atom_site.add_item("label_entity_id");
|
||||
|
||||
@@ -248,7 +271,39 @@ void createEntityIDs(datablock &db)
|
||||
if (entity_ids.contains(i))
|
||||
continue;
|
||||
|
||||
auto entity_id = std::to_string(i + 1);
|
||||
residue_key_type k = entities[i].front().get<std::optional<std::string>, std::optional<int>, std::optional<std::string>, std::optional<std::string>, std::optional<int>, std::optional<std::string>>(
|
||||
"auth_asym_id", "auth_seq_id", "auth_comp_id",
|
||||
"label_asym_id", "label_seq_id", "label_comp_id");
|
||||
|
||||
std::string comp_id = get_comp_id(k);
|
||||
|
||||
std::string entity_id;
|
||||
if (auto v = db["pdbx_entity_nonpoly"].find_first("comp_id"_key == comp_id); v)
|
||||
entity_id = v.get<std::string>("entity_id");
|
||||
else if (auto i2 = newEntitiesForCompound.find(comp_id); i2 != newEntitiesForCompound.end())
|
||||
entity_id = i2->second;
|
||||
else
|
||||
{
|
||||
entity_id = std::to_string(nextEntityID++);
|
||||
|
||||
if (cf.is_monomer(comp_id))
|
||||
entity.emplace({ //
|
||||
{ "id", entity_id },
|
||||
{ "type", "polymer" } });
|
||||
else if (cf.is_water(comp_id))
|
||||
entity.emplace({ //
|
||||
{ "id", entity_id },
|
||||
{ "type", "water" } });
|
||||
else
|
||||
{
|
||||
entity.emplace({ //
|
||||
{ "id", entity_id },
|
||||
{ "type", "non-polymer" } });
|
||||
|
||||
newEntitiesForCompound[comp_id] = entity_id;
|
||||
}
|
||||
}
|
||||
|
||||
entity_ids[i] = entity_id;
|
||||
|
||||
for (std::size_t j = i + 1; j < entities.size(); ++j)
|
||||
@@ -260,20 +315,17 @@ void createEntityIDs(datablock &db)
|
||||
|
||||
for (std::size_t ix = 0; auto &e : entities)
|
||||
{
|
||||
auto k = e.front();
|
||||
const auto &entity_id = entity_ids[ix++];
|
||||
|
||||
std::string comp_id = get_comp_id(k);
|
||||
|
||||
for (auto &k2 : e)
|
||||
atom_site.update_value(get_condition(k2), "label_entity_id", entity_id);
|
||||
for (auto rh : e)
|
||||
rh["label_entity_id"] = entity_id;
|
||||
}
|
||||
|
||||
if (not waters.empty())
|
||||
{
|
||||
std::string waterEntityID = std::to_string(entities.size() + 1);
|
||||
for (auto &k : waters)
|
||||
atom_site.update_value(get_condition(k), "label_entity_id", waterEntityID);
|
||||
for (auto rh : waters)
|
||||
rh["label_entity_id"] = waterEntityID;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -440,9 +492,38 @@ void checkAtomRecords(datablock &db)
|
||||
if (atom_site.contains(key("label_seq_id") < 0))
|
||||
fixNegativeSeqID(atom_site);
|
||||
|
||||
std::set<int> polymer_entities;
|
||||
for (int id : db["entity"].find<int>("type"_key == "polymer", "id"))
|
||||
polymer_entities.insert(id);
|
||||
std::set<std::string> polymer_entities;
|
||||
if (db["entity"].empty())
|
||||
{
|
||||
// No entity, so we have to guess the types based on the content of atom_site
|
||||
|
||||
std::string last_entity_id;
|
||||
std::optional<int> last_label_seq_id, last_auth_seq_id;
|
||||
|
||||
std::set<std::string> entityIDs;
|
||||
for (auto &[entity_id, label_comp_id, label_seq_id, auth_comp_id, auth_seq_id] :
|
||||
atom_site.rows<std::string, std::string, std::optional<int>, std::string, std::optional<int>>(
|
||||
"label_entity_id", "label_comp_id", "label_seq_id", "auth_comp_id", "auth_seq_id"))
|
||||
{
|
||||
if (cf.is_water(label_comp_id) or cf.is_water(auth_comp_id))
|
||||
continue;
|
||||
|
||||
if (polymer_entities.contains(entity_id))
|
||||
continue;
|
||||
|
||||
if (last_entity_id == entity_id and (last_label_seq_id != label_seq_id or last_auth_seq_id != auth_seq_id))
|
||||
polymer_entities.emplace(entity_id);
|
||||
|
||||
last_entity_id = entity_id;
|
||||
last_label_seq_id = label_seq_id;
|
||||
last_auth_seq_id = auth_seq_id;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (std::string id : db["entity"].find<std::string>("type"_key == "polymer", "id"))
|
||||
polymer_entities.insert(id);
|
||||
}
|
||||
|
||||
std::set<std::string> missingCompounds;
|
||||
|
||||
@@ -479,7 +560,7 @@ void checkAtomRecords(datablock &db)
|
||||
if (missingCompounds.contains(comp_id))
|
||||
continue;
|
||||
|
||||
bool is_polymer = polymer_entities.contains(row["label_entity_id"].as<int>());
|
||||
bool is_polymer = polymer_entities.contains(row["label_entity_id"].as<std::string>());
|
||||
auto compound = cf.create(comp_id);
|
||||
|
||||
if (not compound)
|
||||
@@ -533,14 +614,20 @@ void checkAtomRecords(datablock &db)
|
||||
if (is_polymer and row["label_seq_id"].empty() and cf.is_monomer(comp_id))
|
||||
row["label_seq_id"] = std::to_string(seq_id);
|
||||
|
||||
if (row["label_atom_id"].empty())
|
||||
row["label_atom_id"] = row["auth_atom_id"].text();
|
||||
if (row["label_asym_id"].empty())
|
||||
row["label_asym_id"] = row["auth_asym_id"].text();
|
||||
else if (row["auth_asym_id"].empty())
|
||||
row["auth_asym_id"] = row["label_asym_id"].text();
|
||||
|
||||
if (row["label_comp_id"].empty())
|
||||
row["label_comp_id"] = row["auth_comp_id"].text();
|
||||
else if (row["auth_comp_id"].empty())
|
||||
row["auth_comp_id"] = row["label_comp_id"].text();
|
||||
|
||||
if (row["label_atom_id"].empty())
|
||||
row["label_atom_id"] = row["auth_atom_id"].text();
|
||||
else if (row["auth_atom_id"].empty())
|
||||
row["auth_atom_id"] = row["label_atom_id"].text();
|
||||
|
||||
// Rewrite the coordinates and other items that look better in a fixed format
|
||||
// Be careful not to nuke invalidly formatted data here
|
||||
@@ -563,7 +650,7 @@ void checkAtomRecords(datablock &db)
|
||||
{
|
||||
char b[12];
|
||||
|
||||
if (auto [ptr, ec] = cif::to_chars(b, b + sizeof(b), v, cif::chars_format::fixed, prec); ec == std::errc{})
|
||||
if (auto [ptr, ec] = std::to_chars(b, b + sizeof(b), v, std::chars_format::fixed, prec); ec == std::errc{})
|
||||
row.assign(item_name, { b, static_cast<std::string::size_type>(ptr - b) }, false, false);
|
||||
}
|
||||
}
|
||||
@@ -605,19 +692,24 @@ void checkAtomAnisotropRecords(datablock &db)
|
||||
|
||||
std::vector<row_handle> to_be_deleted;
|
||||
|
||||
std::map<int, row_handle> atoms;
|
||||
for (auto rh : atom_site)
|
||||
atoms[rh.get<int>("id")] = rh;
|
||||
|
||||
bool warnReplaceTypeSymbol = true;
|
||||
for (auto row : atom_site_anisotrop)
|
||||
{
|
||||
auto parents = atom_site_anisotrop.get_parents(row, atom_site);
|
||||
if (parents.size() != 1)
|
||||
auto ai = atoms.find(row.get<int>("id"));
|
||||
|
||||
if (ai == atoms.end())
|
||||
{
|
||||
to_be_deleted.emplace_back(row);
|
||||
continue;
|
||||
}
|
||||
|
||||
// this happens sometimes (Phenix):
|
||||
auto parent = ai->second;
|
||||
|
||||
auto parent = parents.front();
|
||||
// this happens sometimes (Phenix):
|
||||
|
||||
if (row["type_symbol"].empty())
|
||||
row["type_symbol"] = parent["type_symbol"].text();
|
||||
@@ -638,8 +730,6 @@ void checkAtomAnisotropRecords(datablock &db)
|
||||
row["pdbx_label_atom_id"] = parent["label_atom_id"].text();
|
||||
if (row["pdbx_label_comp_id"].empty() and not parent["label_comp_id"].empty())
|
||||
row["pdbx_label_comp_id"] = parent["label_comp_id"].text();
|
||||
// if (row["pdbx_PDB_model_num"].empty() and not parent["pdbx_PDB_model_num"].empty())
|
||||
// row["pdbx_PDB_model_num"] = parent["pdbx_PDB_model_num"].text();
|
||||
}
|
||||
|
||||
if (not to_be_deleted.empty())
|
||||
@@ -652,23 +742,53 @@ void checkAtomAnisotropRecords(datablock &db)
|
||||
}
|
||||
}
|
||||
|
||||
void createStructAsym(datablock &db)
|
||||
void checkStructAsym(datablock &db)
|
||||
{
|
||||
auto &atom_site = db["atom_site"];
|
||||
auto &struct_asym = db["struct_asym"];
|
||||
|
||||
for (const auto &[label_asym_id, entity_id] : atom_site.rows<std::string, std::string>("label_asym_id", "label_entity_id"))
|
||||
if (struct_asym.empty())
|
||||
{
|
||||
if (label_asym_id.empty())
|
||||
throw std::runtime_error("File contains atom_site records without a label_asym_id");
|
||||
if (struct_asym.count(key("id") == label_asym_id) == 0)
|
||||
for (const auto &[label_asym_id, entity_id] : atom_site.rows<std::string, std::string>("label_asym_id", "label_entity_id"))
|
||||
{
|
||||
struct_asym.emplace({
|
||||
// clang-format off
|
||||
{ "id", label_asym_id },
|
||||
{ "entity_id", entity_id }
|
||||
//clang-format on
|
||||
});
|
||||
if (label_asym_id.empty())
|
||||
throw std::runtime_error("File contains atom_site records without a label_asym_id");
|
||||
if (struct_asym.count(key("id") == label_asym_id) == 0)
|
||||
{
|
||||
struct_asym.emplace({
|
||||
// clang-format off
|
||||
{ "id", label_asym_id },
|
||||
{ "entity_id", entity_id }
|
||||
//clang-format on
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (const auto &[label_asym_id, entity_id] :
|
||||
atom_site.rows<std::string, std::string>("label_asym_id", "label_entity_id"))
|
||||
{
|
||||
if (label_asym_id.empty())
|
||||
throw std::runtime_error("File contains atom_site records without a label_asym_id");
|
||||
|
||||
auto sa = struct_asym.find_first(key("id") == label_asym_id);
|
||||
if (sa)
|
||||
{
|
||||
if (sa["entity_id"].empty())
|
||||
sa.assign("entity_id", entity_id, false, true);
|
||||
else if (sa.get<std::string>("entity_id") != entity_id)
|
||||
throw std::runtime_error("Inconsistent entity ID's in struct_asym");
|
||||
}
|
||||
else
|
||||
{
|
||||
struct_asym.emplace({
|
||||
// clang-format off
|
||||
{ "id", label_asym_id },
|
||||
{ "entity_id", entity_id }
|
||||
//clang-format on
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -739,8 +859,11 @@ void createEntity(datablock &db)
|
||||
auto c = cf.create(first_comp_id);
|
||||
|
||||
type = "non-polymer";
|
||||
desc = c->name();
|
||||
weight = c->formula_weight();
|
||||
if (c)
|
||||
{
|
||||
desc = c->name();
|
||||
weight = c->formula_weight();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -811,28 +934,28 @@ void createEntityPoly(datablock &db)
|
||||
if (type != "other")
|
||||
{
|
||||
std::string c_type;
|
||||
if (cf.is_base(comp_id))
|
||||
if (auto i = compound_factory::kBaseMap.find(comp_id); i != compound_factory::kBaseMap.end())
|
||||
{
|
||||
c_type = "polydeoxyribonucleotide";
|
||||
letter_can = compound_factory::kBaseMap.at(comp_id);
|
||||
|
||||
letter_can = i->second;
|
||||
|
||||
if (comp_id.length() == 1)
|
||||
letter = letter_can;
|
||||
else
|
||||
letter = '(' + letter_can + ')';
|
||||
letter = '(' + comp_id + ')';
|
||||
}
|
||||
else if (cf.is_peptide(comp_id))
|
||||
else if (auto i2 = compound_factory::kAAMap.find(comp_id); i2 != compound_factory::kAAMap.end())
|
||||
{
|
||||
c_type = "polypeptide(L)";
|
||||
letter = letter_can = compound_factory::kAAMap.at(comp_id);
|
||||
|
||||
letter = letter_can = i2->second;
|
||||
}
|
||||
else if (iequals(c->type(), "D-PEPTIDE LINKING"))
|
||||
{
|
||||
c_type = "polypeptide(D)";
|
||||
|
||||
letter_can = c->one_letter_code();
|
||||
if (letter_can == 0)
|
||||
letter_can = 'X';
|
||||
|
||||
letter = '(' + comp_id + ')';
|
||||
|
||||
non_std_linkage = true;
|
||||
@@ -843,9 +966,6 @@ void createEntityPoly(datablock &db)
|
||||
c_type = "polypeptide(L)";
|
||||
|
||||
letter_can = c->one_letter_code();
|
||||
if (letter_can == 0)
|
||||
letter_can = 'X';
|
||||
|
||||
letter = '(' + comp_id + ')';
|
||||
|
||||
non_std_monomer = true;
|
||||
@@ -855,9 +975,6 @@ void createEntityPoly(datablock &db)
|
||||
// c_type = "other";
|
||||
|
||||
letter_can = c->one_letter_code();
|
||||
if (letter_can == 0)
|
||||
letter_can = 'X';
|
||||
|
||||
letter = '(' + comp_id + ')';
|
||||
|
||||
non_std_monomer = true;
|
||||
@@ -870,7 +987,7 @@ void createEntityPoly(datablock &db)
|
||||
}
|
||||
|
||||
seq[auth_asym_id] += letter;
|
||||
seq_can[auth_asym_id] += letter_can;
|
||||
seq_can[auth_asym_id] += letter_can ? letter_can : 'X';
|
||||
|
||||
if (find(pdb_strand_ids.begin(), pdb_strand_ids.end(), auth_asym_id) == pdb_strand_ids.end())
|
||||
pdb_strand_ids.emplace_back(auth_asym_id);
|
||||
@@ -1390,7 +1507,7 @@ bool reconstruct_pdbx(file &file, const validator &validator)
|
||||
checkChemCompRecords(db);
|
||||
|
||||
// If the data is really horrible, it might not contain entities
|
||||
if (not db["atom_site"].find_first(key("label_entity_id") != null))
|
||||
if (db["atom_site"].find_first(key("label_entity_id") == null))
|
||||
createEntityIDs(db);
|
||||
|
||||
// Now see if atom records make sense at all
|
||||
@@ -1534,9 +1651,8 @@ bool reconstruct_pdbx(file &file, const validator &validator)
|
||||
checkAtomAnisotropRecords(db);
|
||||
|
||||
// Now create any missing categories
|
||||
// Next make sure we have struct_asym records
|
||||
if (auto cat = db.get("struct_asym"); cat == nullptr or cat->empty())
|
||||
createStructAsym(db);
|
||||
// Next make sure we have good struct_asym records
|
||||
checkStructAsym(db);
|
||||
|
||||
if (auto cat = db.get("entity"); cat == nullptr or cat->empty())
|
||||
createEntity(db);
|
||||
@@ -1645,9 +1761,8 @@ void fixup_pdbx(file &file, const validator &validator)
|
||||
db.set_validator(&validator);
|
||||
|
||||
// Now create any missing categories
|
||||
// Next make sure we have struct_asym records
|
||||
if (auto cat = db.get("struct_asym"); cat == nullptr or cat->empty())
|
||||
createStructAsym(db);
|
||||
// Next make sure we have good struct_asym records
|
||||
checkStructAsym(db);
|
||||
|
||||
if (auto cat = db.get("entity"); cat == nullptr or cat->empty())
|
||||
createEntity(db);
|
||||
|
||||
@@ -69,7 +69,7 @@ bool is_valid_pdbx_file(const file &file, const validator &v)
|
||||
{
|
||||
std::error_code ec;
|
||||
bool result = is_valid_pdbx_file(file, v, ec);
|
||||
return result and not (bool)ec;
|
||||
return result and not(bool) ec;
|
||||
}
|
||||
|
||||
bool is_valid_pdbx_file(const file &file, std::error_code &ec)
|
||||
@@ -82,7 +82,7 @@ bool is_valid_pdbx_file(const file &file, std::error_code &ec)
|
||||
result = is_valid_pdbx_file(file, validator_factory::instance().get(*ac), ec);
|
||||
else
|
||||
result = is_valid_pdbx_file(file, validator_factory::instance().get("mmcif_pdbx.dic"), ec);
|
||||
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
@@ -167,7 +167,7 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
|
||||
|
||||
const auto entity_poly_type = entity_poly.find1<std::string>("entity_id"_key == entity_id, "type");
|
||||
|
||||
std::map<int,std::set<std::string>> mon_per_seq_id;
|
||||
std::map<int, std::set<std::string>> mon_per_seq_id;
|
||||
|
||||
for (const auto &[num, mon_id, hetero] : entity_poly_seq.find<int, std::string, bool>("entity_id"_key == entity_id, "num", "mon_id", "hetero"))
|
||||
{
|
||||
@@ -202,28 +202,37 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
|
||||
throw std::runtime_error("Mismatch between the hetero flag in the poly seq schemes and the number residues per seq_id");
|
||||
}
|
||||
|
||||
for (const auto &[seq_id, mon_ids] : mon_per_seq_id)
|
||||
{
|
||||
for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
|
||||
{
|
||||
condition cond;
|
||||
|
||||
for (auto mon_id : mon_ids)
|
||||
cond = std::move(cond) or "label_comp_id"_key == mon_id;
|
||||
// This code proved to take too much time ...
|
||||
|
||||
cond = "label_entity_id"_key == entity_id and
|
||||
"label_asym_id"_key == asym_id and
|
||||
"label_seq_id"_key == seq_id and not std::move(cond);
|
||||
|
||||
if (atom_site.contains(std::move(cond)))
|
||||
throw std::runtime_error("An atom_site record exists that has no parent in the poly seq scheme categories");
|
||||
}
|
||||
// for (const auto &[seq_id, mon_ids] : mon_per_seq_id)
|
||||
// {
|
||||
// for (auto asym_id : struct_asym.find<std::string>("entity_id"_key == entity_id, "id"))
|
||||
// {
|
||||
// condition cond;
|
||||
|
||||
// for (auto mon_id : mon_ids)
|
||||
// cond = std::move(cond) or "label_comp_id"_key == mon_id;
|
||||
|
||||
// cond = "label_entity_id"_key == entity_id and
|
||||
// "label_asym_id"_key == asym_id and
|
||||
// "label_seq_id"_key == seq_id and not std::move(cond);
|
||||
|
||||
// if (atom_site.contains(std::move(cond)))
|
||||
// throw std::runtime_error("An atom_site record exists that has no parent in the poly seq scheme categories");
|
||||
// }
|
||||
// }
|
||||
|
||||
// ... so we're using this instead, should be almost the same...
|
||||
|
||||
for (const auto &[comp_id, seq_id] :
|
||||
atom_site.find<std::string, int>("label_entity_id"_key == entity_id, "label_comp_id", "label_seq_id"))
|
||||
{
|
||||
if (not mon_per_seq_id[seq_id].contains(comp_id))
|
||||
throw std::runtime_error("An atom_site record exists that has no parent in the poly seq scheme categories");
|
||||
}
|
||||
|
||||
auto &&[seq, seq_can] = entity_poly.find1<std::optional<std::string>, std::optional<std::string>>("entity_id"_key == entity_id,
|
||||
"pdbx_seq_one_letter_code", "pdbx_seq_one_letter_code_can");
|
||||
|
||||
std::string::const_iterator si, sci, se, sce;
|
||||
|
||||
auto seq_match = [&](bool can, std::string::const_iterator si, std::string::const_iterator se)
|
||||
{
|
||||
@@ -260,8 +269,8 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
|
||||
else
|
||||
letter = '(' + comp_id + ')';
|
||||
}
|
||||
|
||||
if (iequals(std::string{si, si + letter.length()}, letter))
|
||||
|
||||
if (iequals(std::string{ si, si + letter.length() }, letter))
|
||||
{
|
||||
match = true;
|
||||
si += letter.length();
|
||||
@@ -285,7 +294,9 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
|
||||
}
|
||||
else
|
||||
{
|
||||
seq->erase(std::remove_if(seq->begin(), seq->end(), [](char ch) { return std::isspace(ch); }), seq->end());
|
||||
seq->erase(std::remove_if(seq->begin(), seq->end(), [](char ch)
|
||||
{ return std::isspace(ch); }),
|
||||
seq->end());
|
||||
|
||||
if (not seq_match(false, seq->begin(), seq->end()))
|
||||
throw std::runtime_error("Sequences do not match for entity " + entity_id);
|
||||
@@ -298,7 +309,9 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
|
||||
}
|
||||
else
|
||||
{
|
||||
seq_can->erase(std::remove_if(seq_can->begin(), seq_can->end(), [](char ch) { return std::isspace(ch); }), seq_can->end());
|
||||
seq_can->erase(std::remove_if(seq_can->begin(), seq_can->end(), [](char ch)
|
||||
{ return std::isspace(ch); }),
|
||||
seq_can->end());
|
||||
|
||||
if (not seq_match(true, seq_can->begin(), seq_can->end()))
|
||||
throw std::runtime_error("Canonical sequences do not match for entity " + entity_id);
|
||||
@@ -315,11 +328,10 @@ bool is_valid_pdbx_file(const file &file, const validator &validator, std::error
|
||||
ec = make_error_code(validation_error::not_valid_pdbx);
|
||||
}
|
||||
|
||||
if (not result and (bool)ec)
|
||||
if (not result and (bool) ec)
|
||||
ec = make_error_code(validation_error::not_valid_pdbx);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace cif::pdb
|
||||
|
||||
|
||||
23
src/text.cpp
23
src/text.cpp
@@ -29,6 +29,11 @@
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
|
||||
#if __has_include("fast_float/fast_float.h")
|
||||
#include "fast_float/fast_float.h"
|
||||
#endif
|
||||
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
@@ -512,4 +517,22 @@ std::vector<std::string> word_wrap(const std::string &text, std::size_t width)
|
||||
return result;
|
||||
}
|
||||
|
||||
#if __has_include("fast_float/fast_float.h")
|
||||
|
||||
template<>
|
||||
std::from_chars_result ff_charconv<float>::from_chars(const char *a, const char *b, float &v)
|
||||
{
|
||||
auto r = fast_float::from_chars(a, b, v);
|
||||
return { r.ptr, r.ec };
|
||||
}
|
||||
|
||||
template<>
|
||||
std::from_chars_result ff_charconv<double>::from_chars(const char *a, const char *b, double &v)
|
||||
{
|
||||
auto r = fast_float::from_chars(a, b, v);
|
||||
return { r.ptr, r.ec };
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
} // namespace cif
|
||||
@@ -34,14 +34,20 @@
|
||||
#include <condition_variable>
|
||||
#include <cstring>
|
||||
#include <deque>
|
||||
#include <format>
|
||||
#include <fstream>
|
||||
#include <functional>
|
||||
#include <iomanip>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <sstream>
|
||||
#include <string>
|
||||
#include <thread>
|
||||
#include <utility>
|
||||
|
||||
#if __cpp_lib_jthread >= 201911L
|
||||
#include <stop_token>
|
||||
#endif
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
@@ -65,27 +71,50 @@ std::string get_version_nr()
|
||||
|
||||
#if defined(_WIN32) or defined(__MINGW32__)
|
||||
}
|
||||
#include <windows.h>
|
||||
#include <libloaderapi.h>
|
||||
#include <wincon.h>
|
||||
|
||||
#include <codecvt>
|
||||
// clang-format off
|
||||
# include <windows.h>
|
||||
# include <libloaderapi.h>
|
||||
# include <wincon.h>
|
||||
// clang-format on
|
||||
|
||||
namespace cif
|
||||
{
|
||||
|
||||
uint32_t get_terminal_width()
|
||||
{
|
||||
CONSOLE_SCREEN_BUFFER_INFO csbi;
|
||||
::GetConsoleScreenBufferInfo(::GetStdHandle(STD_OUTPUT_HANDLE), &csbi);
|
||||
return csbi.srWindow.Right - csbi.srWindow.Left + 1;
|
||||
CONSOLE_SCREEN_BUFFER_INFO csbi;
|
||||
return ::GetConsoleScreenBufferInfo(::GetStdHandle(STD_OUTPUT_HANDLE), &csbi)
|
||||
? csbi.srWindow.Right - csbi.srWindow.Left + 1
|
||||
: 80;
|
||||
}
|
||||
|
||||
void write_to_console(const std::string &s)
|
||||
{
|
||||
auto h = ::GetStdHandle(STD_OUTPUT_HANDLE);
|
||||
CONSOLE_SCREEN_BUFFER_INFO csbi;
|
||||
|
||||
if (auto l = ::MultiByteToWideChar(CP_UTF8, 0, s.data(), s.length(), nullptr, 0);
|
||||
l > 0 and ::GetConsoleScreenBufferInfo(::GetStdHandle(STD_OUTPUT_HANDLE), &csbi))
|
||||
{
|
||||
std::u16string ws(l, 0);
|
||||
|
||||
::MultiByteToWideChar(CP_UTF8, 0, s.data(), s.length(), (LPWSTR)ws.data(), l);
|
||||
|
||||
DWORD w;
|
||||
::WriteConsoleW(h, ws.data(), ws.length(), &w, nullptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout.write(s.data(), s.length());
|
||||
std::cout.flush();
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#include <sys/ioctl.h>
|
||||
#include <termios.h>
|
||||
#include <limits.h>
|
||||
# include <limits.h>
|
||||
# include <sys/ioctl.h>
|
||||
# include <termios.h>
|
||||
|
||||
uint32_t get_terminal_width()
|
||||
{
|
||||
@@ -100,59 +129,220 @@ uint32_t get_terminal_width()
|
||||
return result;
|
||||
}
|
||||
|
||||
inline void write_to_console(const std::string &s)
|
||||
{
|
||||
std::cout << s << std::flush;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
struct progress_bar_impl
|
||||
{
|
||||
progress_bar_impl(int64_t inMax, const std::string &inAction)
|
||||
: m_max_value(inMax)
|
||||
progress_bar_impl(uint64_t max_value, const std::string &message)
|
||||
: m_max_value(max_value)
|
||||
, m_consumed(0)
|
||||
, m_action(inAction)
|
||||
, m_message(inAction)
|
||||
, m_thread(std::bind(&progress_bar_impl::run, this))
|
||||
, m_action(message)
|
||||
, m_message(message)
|
||||
{
|
||||
}
|
||||
|
||||
progress_bar_impl(const progress_bar_impl&) = delete;
|
||||
progress_bar_impl &operator=(const progress_bar_impl &) = delete;
|
||||
virtual ~progress_bar_impl() {}
|
||||
|
||||
~progress_bar_impl();
|
||||
|
||||
void run();
|
||||
|
||||
void consumed(int64_t n);
|
||||
void progress(int64_t p);
|
||||
void message(const std::string &msg);
|
||||
|
||||
void print_progress();
|
||||
void print_done();
|
||||
virtual void consumed(uint64_t n);
|
||||
virtual void progress(uint64_t p);
|
||||
virtual void message(const std::string &msg);
|
||||
virtual void print_done();
|
||||
|
||||
using time_point = std::chrono::time_point<std::chrono::system_clock>;
|
||||
|
||||
int64_t m_max_value;
|
||||
std::atomic<int64_t> m_consumed;
|
||||
int64_t m_last_consumed = 0;
|
||||
int m_spinner_index = 0;
|
||||
uint64_t m_max_value;
|
||||
std::atomic<uint64_t> m_consumed;
|
||||
std::string m_action, m_message;
|
||||
std::mutex m_mutex;
|
||||
std::thread m_thread;
|
||||
time_point m_start = std::chrono::system_clock::now();
|
||||
time_point m_last = std::chrono::system_clock::now();
|
||||
bool m_stop = false;
|
||||
};
|
||||
|
||||
progress_bar_impl::~progress_bar_impl()
|
||||
void progress_bar_impl::consumed(uint64_t n)
|
||||
{
|
||||
m_consumed += n;
|
||||
}
|
||||
|
||||
void progress_bar_impl::progress(uint64_t p)
|
||||
{
|
||||
m_consumed = p;
|
||||
}
|
||||
|
||||
void progress_bar_impl::message(const std::string &msg)
|
||||
{
|
||||
m_message = msg;
|
||||
}
|
||||
|
||||
void progress_bar_impl::print_done()
|
||||
{
|
||||
std::chrono::duration<double> elapsed = std::chrono::system_clock::now() - m_start;
|
||||
std::string days, hours, minutes, seconds;
|
||||
|
||||
uint64_t s = static_cast<uint64_t>(std::trunc(elapsed.count()));
|
||||
if (s > 24 * 60 * 60)
|
||||
{
|
||||
days = std::format("{:d}d ", s / (24 * 60 * 60));
|
||||
s %= 24 * 60 * 60;
|
||||
}
|
||||
|
||||
if (s > 60 * 60)
|
||||
{
|
||||
hours = std::format("{:2d}h ", s / (60 * 60));
|
||||
s %= 60 * 60;
|
||||
}
|
||||
|
||||
if (s > 60)
|
||||
{
|
||||
minutes = std::format("{:2d}m ", s / 60);
|
||||
s %= 60;
|
||||
}
|
||||
|
||||
std::string msg = std::format("{} done in {}{}{}{:.1f}s", m_action, days, hours, minutes, s + 1e-6 * (elapsed.count() - s));
|
||||
|
||||
uint32_t width = get_terminal_width();
|
||||
|
||||
if (msg.length() < width)
|
||||
msg += std::string(width - msg.length(), ' ');
|
||||
|
||||
write_to_console(msg += '\n');
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
struct simple_progress_bar_impl : public progress_bar_impl
|
||||
{
|
||||
simple_progress_bar_impl(uint64_t max_value, const std::string &message)
|
||||
: progress_bar_impl(max_value, message)
|
||||
{
|
||||
}
|
||||
|
||||
~simple_progress_bar_impl()
|
||||
{
|
||||
if (m_printed_any)
|
||||
print_done();
|
||||
}
|
||||
|
||||
void consumed(uint64_t n) override
|
||||
{
|
||||
using namespace std::literals;
|
||||
|
||||
progress_bar_impl::consumed(n);
|
||||
|
||||
// print at most 10 steps, but only if it took long enough
|
||||
|
||||
int percentile = static_cast<int>(std::floor(10.f * m_consumed / m_max_value));
|
||||
if (percentile > m_last_percentile and (m_printed_any or std::chrono::system_clock::now() - m_start >= 1s))
|
||||
{
|
||||
if (not std::exchange(m_printed_any, true))
|
||||
write_to_console(m_action + ": ");
|
||||
|
||||
write_to_console(std::format("...{:d}0%", percentile));
|
||||
m_last_percentile = percentile;
|
||||
}
|
||||
}
|
||||
|
||||
void progress(uint64_t p) override
|
||||
{
|
||||
consumed(p - m_consumed);
|
||||
}
|
||||
|
||||
void print_done() override
|
||||
{
|
||||
if (m_printed_any)
|
||||
{
|
||||
write_to_console("\n");
|
||||
progress_bar_impl::print_done();
|
||||
}
|
||||
}
|
||||
|
||||
bool m_printed_any = false;
|
||||
int m_last_percentile = 0;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
struct fancy_progress_bar_impl : public progress_bar_impl
|
||||
{
|
||||
fancy_progress_bar_impl(uint64_t max_value, const std::string &message)
|
||||
: progress_bar_impl(max_value, message)
|
||||
, m_thread(
|
||||
#if __cpp_lib_jthread >= 201911L
|
||||
[this](std::stop_token stoken)
|
||||
{ this->run(stoken); }
|
||||
#else
|
||||
[this]()
|
||||
{ this->run(); }
|
||||
#endif
|
||||
)
|
||||
{
|
||||
}
|
||||
|
||||
~fancy_progress_bar_impl();
|
||||
|
||||
#if __cpp_lib_jthread >= 201911L
|
||||
void run(std::stop_token stoken);
|
||||
#else
|
||||
void run();
|
||||
#endif
|
||||
|
||||
void consumed(uint64_t n) override;
|
||||
void progress(uint64_t p) override;
|
||||
void message(const std::string &msg) override;
|
||||
|
||||
void print_progress();
|
||||
|
||||
std::mutex m_mutex;
|
||||
std::condition_variable m_cv;
|
||||
|
||||
float m_progress;
|
||||
uint32_t m_width, m_bar_width;
|
||||
uint32_t m_steps, m_last_steps = 0;
|
||||
uint64_t m_last_consumed = 0;
|
||||
#if __cpp_lib_jthread >= 201911L
|
||||
std::jthread m_thread;
|
||||
#else
|
||||
std::thread m_thread;
|
||||
bool m_stop = false;
|
||||
#endif
|
||||
};
|
||||
|
||||
const char *kBlocks[] = {
|
||||
" ",
|
||||
"▏",
|
||||
"▎",
|
||||
"▍",
|
||||
"▌",
|
||||
"▋",
|
||||
"▊",
|
||||
"▉",
|
||||
"█",
|
||||
};
|
||||
|
||||
const size_t kBlockCount = sizeof(kBlocks) / sizeof(void *) - 1;
|
||||
|
||||
fancy_progress_bar_impl::~fancy_progress_bar_impl()
|
||||
{
|
||||
using namespace std::literals;
|
||||
assert(m_thread.joinable());
|
||||
|
||||
#if __cpp_lib_jthread >= 201911L
|
||||
m_thread.request_stop();
|
||||
#else
|
||||
m_stop = true;
|
||||
#endif
|
||||
m_thread.join();
|
||||
}
|
||||
|
||||
void progress_bar_impl::run()
|
||||
#if __cpp_lib_jthread >= 201911L
|
||||
void fancy_progress_bar_impl::run(std::stop_token stoken)
|
||||
#else
|
||||
void fancy_progress_bar_impl::run()
|
||||
#endif
|
||||
{
|
||||
using namespace std::literals;
|
||||
|
||||
@@ -160,25 +350,44 @@ void progress_bar_impl::run()
|
||||
|
||||
try
|
||||
{
|
||||
while (not m_stop)
|
||||
for (;;)
|
||||
{
|
||||
std::unique_lock lock(m_mutex);
|
||||
|
||||
m_cv.wait_for(lock, 25ms);
|
||||
|
||||
#if __cpp_lib_jthread >= 201911L
|
||||
if (stoken.stop_requested())
|
||||
break;
|
||||
#else
|
||||
if (m_stop)
|
||||
break;
|
||||
#endif
|
||||
|
||||
auto now = std::chrono::system_clock::now();
|
||||
|
||||
if (now - m_start < 2s or now - m_last < 100ms)
|
||||
{
|
||||
std::this_thread::sleep_for(10ms);
|
||||
if (m_consumed == m_last_consumed or now - m_start < 1s)
|
||||
continue;
|
||||
}
|
||||
|
||||
std::lock_guard lock(m_mutex);
|
||||
m_last_consumed = m_consumed;
|
||||
|
||||
if (not printedAny and isatty(STDOUT_FILENO))
|
||||
std::cout << "\x1b[?25l";
|
||||
// See if we need to do work
|
||||
m_width = get_terminal_width();
|
||||
m_progress = static_cast<float>(m_consumed) / m_max_value;
|
||||
m_bar_width = 7 * m_width / 10; // 70% of the width of the terminal
|
||||
m_steps = static_cast<uint32_t>(std::ceil(m_progress * m_bar_width * kBlockCount));
|
||||
|
||||
if (m_steps == m_last_steps)
|
||||
continue;
|
||||
|
||||
m_last_steps = m_steps;
|
||||
|
||||
if (not printedAny)
|
||||
write_to_console("\x1b[?25l");
|
||||
|
||||
print_progress();
|
||||
|
||||
printedAny = true;
|
||||
m_last = std::chrono::system_clock::now();
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
@@ -187,161 +396,94 @@ void progress_bar_impl::run()
|
||||
|
||||
if (printedAny)
|
||||
{
|
||||
write_to_console("\r\x1b[?25h");
|
||||
print_done();
|
||||
if (isatty(STDOUT_FILENO))
|
||||
std::cout << "\x1b[?25h";
|
||||
}
|
||||
}
|
||||
|
||||
void progress_bar_impl::consumed(int64_t n)
|
||||
void fancy_progress_bar_impl::consumed(uint64_t n)
|
||||
{
|
||||
m_consumed += n;
|
||||
progress_bar_impl::consumed(n);
|
||||
// m_cv.notify_one();
|
||||
}
|
||||
|
||||
void progress_bar_impl::progress(int64_t p)
|
||||
void fancy_progress_bar_impl::progress(uint64_t p)
|
||||
{
|
||||
m_consumed = p;
|
||||
progress_bar_impl::progress(p);
|
||||
// m_cv.notify_one();
|
||||
}
|
||||
|
||||
void progress_bar_impl::message(const std::string &msg)
|
||||
void fancy_progress_bar_impl::message(const std::string &msg)
|
||||
{
|
||||
std::unique_lock lock(m_mutex);
|
||||
m_message = msg;
|
||||
progress_bar_impl::message(msg);
|
||||
// m_cv.notify_one();
|
||||
}
|
||||
|
||||
const char* kSpinner[] = {
|
||||
// ".", "o", "O", "0", "O", "o", ".", " "
|
||||
// "⢄", "⢂", "⢁", "⡁", "⡈", "⡐", "⡠"
|
||||
".", "o", "O", "0", "@", "*", " "
|
||||
};
|
||||
|
||||
const std::size_t kSpinnerCount = sizeof(kSpinner) / sizeof(char*);
|
||||
|
||||
const int kSpinnerTimeInterval = 100;
|
||||
|
||||
const uint32_t kMinBarWidth = 40, kMinMsgWidth = 12;
|
||||
|
||||
void progress_bar_impl::print_progress()
|
||||
void fancy_progress_bar_impl::print_progress()
|
||||
{
|
||||
const char *kBlocks[] = {
|
||||
// "▯", // 0
|
||||
// "▮", // 1
|
||||
"=",
|
||||
"-"
|
||||
};
|
||||
const uint32_t pct_width = 5;
|
||||
uint32_t msg_width = m_width - m_bar_width - pct_width - 1;
|
||||
|
||||
uint32_t width = get_terminal_width();
|
||||
|
||||
float progress = static_cast<float>(m_consumed) / m_max_value;
|
||||
|
||||
if (width < kMinBarWidth)
|
||||
std::cout << (100 * progress) << "%\n";
|
||||
else
|
||||
if (msg_width < kMinMsgWidth)
|
||||
{
|
||||
uint32_t bar_width = 7 * width / 10;
|
||||
uint32_t pct_width = 7;
|
||||
uint32_t msg_width = width - bar_width - pct_width - 1;
|
||||
m_bar_width += kMinMsgWidth - msg_width;
|
||||
msg_width = kMinMsgWidth;
|
||||
}
|
||||
|
||||
if (msg_width < kMinMsgWidth)
|
||||
{
|
||||
bar_width += kMinMsgWidth - msg_width;
|
||||
msg_width = kMinMsgWidth;
|
||||
}
|
||||
std::string bar;
|
||||
bar.reserve(m_bar_width * 4);
|
||||
|
||||
std::ostringstream msg;
|
||||
|
||||
if (m_message.length() <= msg_width)
|
||||
{
|
||||
msg << m_message;
|
||||
if (m_message.length() < msg_width)
|
||||
msg << std::string(msg_width - m_message.length(), ' ');
|
||||
}
|
||||
for (uint32_t i = 0; i < m_bar_width; ++i)
|
||||
{
|
||||
if (i * kBlockCount <= m_steps)
|
||||
bar += kBlocks[kBlockCount];
|
||||
else if (i * kBlockCount > m_steps + kBlockCount)
|
||||
bar += kBlocks[0];
|
||||
else
|
||||
msg << m_message.substr(0, msg_width - 3) << "...";
|
||||
|
||||
msg << ' ';
|
||||
|
||||
uint32_t pi = static_cast<uint32_t>(std::ceil(progress * bar_width));
|
||||
|
||||
for (uint32_t i = 0; i < bar_width; ++i)
|
||||
msg << kBlocks[i > pi ? 1 : 0];
|
||||
|
||||
msg << ' ';
|
||||
|
||||
msg << std::setw(3) << static_cast<int>(std::ceil(progress * 100)) << "% ";
|
||||
|
||||
auto now = std::chrono::system_clock::now();
|
||||
m_spinner_index = (std::chrono::duration_cast<std::chrono::milliseconds>(now - m_start).count() / kSpinnerTimeInterval) % kSpinnerCount;
|
||||
|
||||
msg << kSpinner[m_spinner_index];
|
||||
|
||||
std::cout << '\r' << msg.str();
|
||||
std::cout.flush();
|
||||
bar += kBlocks[1 + m_steps % kBlockCount];
|
||||
}
|
||||
}
|
||||
|
||||
namespace
|
||||
{
|
||||
|
||||
std::ostream &operator<<(std::ostream &os, const std::chrono::duration<double> &t)
|
||||
// make the bar more colorfull
|
||||
struct color_type
|
||||
{
|
||||
uint64_t s = static_cast<uint64_t>(std::trunc(t.count()));
|
||||
if (s > 24 * 60 * 60)
|
||||
{
|
||||
auto days = s / (24 * 60 * 60);
|
||||
os << days << "d ";
|
||||
s %= 24 * 60 * 60;
|
||||
}
|
||||
uint8_t r, g, b;
|
||||
} fg{ 0, 3, 5 }, bg{ 0, 1, 2 };
|
||||
|
||||
if (s > 60 * 60)
|
||||
{
|
||||
auto hours = s / (60 * 60);
|
||||
os << hours << "h ";
|
||||
s %= 60 * 60;
|
||||
}
|
||||
auto esc_1 = std::format("\x1b[38;5;{}m\x1b[48;5;{}m",
|
||||
16 + (fg.r * 36) + (fg.g * 6) + fg.b,
|
||||
16 + (bg.r * 36) + (bg.g * 6) + bg.b);
|
||||
std::string esc_2("\x1b[0m");
|
||||
|
||||
if (s > 60)
|
||||
{
|
||||
auto minutes = s / 60;
|
||||
os << minutes << "m ";
|
||||
s %= 60;
|
||||
}
|
||||
bar = esc_1 + bar + esc_2;
|
||||
|
||||
double ss = s + 1e-6 * (t.count() - s);
|
||||
std::string msg = m_message.length() <= msg_width
|
||||
? m_message
|
||||
: m_message.substr(0, msg_width - 3) + "...";
|
||||
|
||||
os << std::fixed << std::setprecision(1) << ss << 's';
|
||||
|
||||
return os;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
void progress_bar_impl::print_done()
|
||||
{
|
||||
std::chrono::duration<double> elapsed = std::chrono::system_clock::now() - m_start;
|
||||
|
||||
std::ostringstream msgstr;
|
||||
msgstr << m_action << " done in " << elapsed << " seconds";
|
||||
auto msg = msgstr.str();
|
||||
|
||||
uint32_t width = get_terminal_width();
|
||||
|
||||
if (msg.length() < width)
|
||||
msg += std::string(width - msg.length(), ' ');
|
||||
|
||||
std::cout << '\r' << msg << '\n';
|
||||
write_to_console(std::format("{:{}} {} {:3d}%\r", msg, msg_width, bar,
|
||||
static_cast<int>(std::ceil(m_progress * 100))));
|
||||
}
|
||||
|
||||
progress_bar::progress_bar(int64_t inMax, const std::string &inAction)
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
progress_bar::progress_bar(int64_t max_value, const std::string &message)
|
||||
: m_impl(nullptr)
|
||||
{
|
||||
if (isatty(STDOUT_FILENO) and VERBOSE >= 0)
|
||||
m_impl = new progress_bar_impl(inMax, inAction);
|
||||
if (VERBOSE >= 0)
|
||||
{
|
||||
if (isatty(STDOUT_FILENO) and get_terminal_width() > kMinBarWidth)
|
||||
m_impl = new fancy_progress_bar_impl(max_value, message);
|
||||
else
|
||||
m_impl = new simple_progress_bar_impl(max_value, message);
|
||||
}
|
||||
}
|
||||
|
||||
progress_bar::~progress_bar()
|
||||
{
|
||||
delete m_impl;
|
||||
flush();
|
||||
}
|
||||
|
||||
void progress_bar::consumed(int64_t inConsumed)
|
||||
@@ -350,16 +492,25 @@ void progress_bar::consumed(int64_t inConsumed)
|
||||
m_impl->consumed(inConsumed);
|
||||
}
|
||||
|
||||
void progress_bar::progress(int64_t inProgress)
|
||||
void progress_bar::progress(int64_t value)
|
||||
{
|
||||
if (m_impl != nullptr)
|
||||
m_impl->progress(inProgress);
|
||||
m_impl->progress(value);
|
||||
}
|
||||
|
||||
void progress_bar::message(const std::string &inMessage)
|
||||
void progress_bar::message(const std::string &message)
|
||||
{
|
||||
if (m_impl != nullptr)
|
||||
m_impl->message(inMessage);
|
||||
m_impl->message(message);
|
||||
}
|
||||
|
||||
void progress_bar::flush()
|
||||
{
|
||||
if (m_impl)
|
||||
{
|
||||
delete m_impl;
|
||||
m_impl = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
@@ -387,13 +538,13 @@ struct rsrc_imp
|
||||
|
||||
#if _WIN32
|
||||
|
||||
#if __MINGW32__
|
||||
# if __MINGW32__
|
||||
|
||||
extern "C" __attribute__((weak, alias("gResourceIndexDefault"))) const mrsrc::rsrc_imp gResourceIndex[];
|
||||
extern "C" __attribute__((weak, alias("gResourceDataDefault"))) const char gResourceData[];
|
||||
extern "C" __attribute__((weak, alias("gResourceNameDefault"))) const char gResourceName[];
|
||||
|
||||
#else
|
||||
# else
|
||||
|
||||
extern "C" const mrsrc::rsrc_imp *gResourceIndexDefault[1] = {};
|
||||
extern "C" const char *gResourceDataDefault[1] = {};
|
||||
@@ -403,11 +554,11 @@ extern "C" const mrsrc::rsrc_imp gResourceIndex[];
|
||||
extern "C" const char gResourceData[];
|
||||
extern "C" const char gResourceName[];
|
||||
|
||||
#pragma comment(linker, "/alternatename:gResourceIndex=gResourceIndexDefault")
|
||||
#pragma comment(linker, "/alternatename:gResourceData=gResourceDataDefault")
|
||||
#pragma comment(linker, "/alternatename:gResourceName=gResourceNameDefault")
|
||||
# pragma comment(linker, "/alternatename:gResourceIndex=gResourceIndexDefault")
|
||||
# pragma comment(linker, "/alternatename:gResourceData=gResourceDataDefault")
|
||||
# pragma comment(linker, "/alternatename:gResourceName=gResourceNameDefault")
|
||||
|
||||
#endif
|
||||
# endif
|
||||
|
||||
#else
|
||||
extern const __attribute__((weak)) mrsrc::rsrc_imp gResourceIndex[];
|
||||
|
||||
@@ -181,8 +181,8 @@ int type_validator::compare(std::string_view a, std::string_view b) const
|
||||
|
||||
std::from_chars_result ra, rb;
|
||||
|
||||
ra = selected_charconv<double>::from_chars(a.data(), a.data() + a.length(), da);
|
||||
rb = selected_charconv<double>::from_chars(b.data(), b.data() + b.length(), db);
|
||||
ra = from_chars(a.data(), a.data() + a.length(), da);
|
||||
rb = from_chars(b.data(), b.data() + b.length(), db);
|
||||
|
||||
if (not(bool) ra.ec and not(bool) rb.ec)
|
||||
{
|
||||
@@ -561,8 +561,10 @@ const validator &validator_factory::get(const category &audit_conform)
|
||||
// If the audit conform contains only one record, this is easy
|
||||
if (audit_conform.size() == 1)
|
||||
{
|
||||
const auto &[name, version] = audit_conform.front().get<std::string, std::optional<std::string>>("dict_name", "dict_version");
|
||||
return m_validators.emplace_back(construct_validator(name, version));
|
||||
const auto &[name, version] =
|
||||
audit_conform.front().get<std::string, std::optional<std::string>>("dict_name", "dict_version");
|
||||
if (not name.empty())
|
||||
return m_validators.emplace_back(construct_validator(name, version));
|
||||
}
|
||||
|
||||
// A new, merged dictionary
|
||||
@@ -570,6 +572,9 @@ const validator &validator_factory::get(const category &audit_conform)
|
||||
std::optional<validator> v;
|
||||
for (const auto &[name, version] : audit_conform.rows<std::string, std::optional<std::string>>("dict_name", "dict_version"))
|
||||
{
|
||||
if (name.empty())
|
||||
continue;
|
||||
|
||||
if (not v)
|
||||
v = construct_validator(name, version);
|
||||
else
|
||||
@@ -604,7 +609,7 @@ validator validator_factory::construct_validator(std::string_view name, std::opt
|
||||
not v.matches_audit_conform(category{ "audit_conform", //
|
||||
{ { "dict_name", name }, { "dict_version", version } } }))
|
||||
{
|
||||
std::clog << "Invalid dictionary?\n";
|
||||
std::clog << "Loaded dictionary does not match name=" << name << " and version=" << version.value_or("''") << "\n";
|
||||
}
|
||||
|
||||
return v;
|
||||
|
||||
@@ -1,19 +1,39 @@
|
||||
# We're using the older version 2 of Catch2
|
||||
# SPDX-License-Identifier: BSD-2-Clause
|
||||
#
|
||||
# Copyright (c) 2025 NKI/AVL, Netherlands Cancer Institute
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright notice, this
|
||||
# list of conditions and the following disclaimer
|
||||
# 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
# this list of conditions and the following disclaimer in the documentation
|
||||
# and/or other materials provided with the distribution.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
||||
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
if(NOT(Catch2_FOUND OR TARGET Catch2))
|
||||
find_package(Catch2 QUIET)
|
||||
if(NOT (Catch2_FOUND OR TARGET Catch2))
|
||||
find_package(Catch2 3 QUIET)
|
||||
|
||||
if(NOT Catch2_FOUND)
|
||||
include(FetchContent)
|
||||
|
||||
FetchContent_Declare(
|
||||
Catch2
|
||||
GIT_REPOSITORY https://github.com/catchorg/Catch2.git
|
||||
GIT_TAG v2.13.9)
|
||||
GIT_TAG v3.4.0)
|
||||
|
||||
FetchContent_MakeAvailable(Catch2)
|
||||
|
||||
set(Catch2_VERSION "2.13.9")
|
||||
target_compile_features(Catch2 PRIVATE cxx_std_20)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
@@ -27,20 +47,14 @@ list(
|
||||
rename-compound
|
||||
sugar
|
||||
spinner
|
||||
# reconstruction
|
||||
reconstruction
|
||||
validate-pdbx
|
||||
)
|
||||
)
|
||||
|
||||
add_library(test-main OBJECT "${CMAKE_CURRENT_SOURCE_DIR}/test-main.cpp")
|
||||
|
||||
target_link_libraries(test-main cifpp::cifpp Catch2::Catch2)
|
||||
|
||||
if("${Catch2_VERSION}" VERSION_LESS 3.0.0)
|
||||
target_compile_definitions(test-main PUBLIC CATCH22=1)
|
||||
else()
|
||||
target_compile_definitions(test-main PUBLIC CATCH22=0)
|
||||
endif()
|
||||
|
||||
foreach(CIFPP_TEST IN LISTS CIFPP_tests)
|
||||
set(CIFPP_TEST "${CIFPP_TEST}-test")
|
||||
set(CIFPP_TEST_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/${CIFPP_TEST}.cpp")
|
||||
@@ -48,12 +62,6 @@ foreach(CIFPP_TEST IN LISTS CIFPP_tests)
|
||||
add_executable(
|
||||
${CIFPP_TEST} ${CIFPP_TEST_SOURCE} $<TARGET_OBJECTS:test-main>)
|
||||
|
||||
if(${Catch2_VERSION} VERSION_GREATER_EQUAL 3.0.0)
|
||||
target_compile_definitions(${CIFPP_TEST} PUBLIC CATCH22=0)
|
||||
else()
|
||||
target_compile_definitions(${CIFPP_TEST} PUBLIC CATCH22=1)
|
||||
endif()
|
||||
|
||||
target_link_libraries(${CIFPP_TEST} PRIVATE cifpp::cifpp Catch2::Catch2)
|
||||
target_include_directories(${CIFPP_TEST} PRIVATE "${EIGEN_INCLUDE_DIR}")
|
||||
|
||||
@@ -62,15 +70,6 @@ foreach(CIFPP_TEST IN LISTS CIFPP_tests)
|
||||
target_compile_options(${CIFPP_TEST} PRIVATE /EHsc)
|
||||
endif()
|
||||
|
||||
add_custom_target(
|
||||
"run-${CIFPP_TEST}"
|
||||
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch ${CIFPP_TEST})
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/Run${CIFPP_TEST}.touch
|
||||
COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir
|
||||
${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
add_test(NAME ${CIFPP_TEST} COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir
|
||||
${CMAKE_CURRENT_SOURCE_DIR})
|
||||
endforeach()
|
||||
add_test(NAME ${CIFPP_TEST}
|
||||
COMMAND $<TARGET_FILE:${CIFPP_TEST}> --data-dir ${CMAKE_CURRENT_SOURCE_DIR})
|
||||
endforeach()
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
|
||||
#include <cif++.hpp>
|
||||
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
|
||||
|
||||
@@ -11,12 +11,7 @@ int main(int argc, char *argv[])
|
||||
Catch::Session session; // There must be exactly one instance
|
||||
|
||||
// Build a new parser on top of Catch2's
|
||||
#if CATCH22
|
||||
using namespace Catch::clara;
|
||||
#else
|
||||
// Build a new parser on top of Catch2's
|
||||
using namespace Catch::Clara;
|
||||
#endif
|
||||
|
||||
auto cli = session.cli() // Get Catch2's command line parser
|
||||
| Opt(gTestDir, "data-dir") // bind variable to a new option, with a hint string
|
||||
|
||||
@@ -26,11 +26,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#if CATCH22
|
||||
#include <catch2/catch.hpp>
|
||||
#else
|
||||
#include <catch2/catch_all.hpp>
|
||||
#endif
|
||||
|
||||
#include <filesystem>
|
||||
|
||||
|
||||
@@ -155,8 +155,8 @@ TEST_CASE("cc_2")
|
||||
|
||||
for (const auto &[val, prec, test] : tests)
|
||||
{
|
||||
char buffer[64];
|
||||
const auto &[ptr, ec] = cif::to_chars(buffer, buffer + sizeof(buffer), val, cif::chars_format::fixed, prec);
|
||||
char buffer[64] = {};
|
||||
const auto &[ptr, ec] = std::to_chars(buffer, buffer + sizeof(buffer), val, std::chars_format::fixed, prec);
|
||||
|
||||
CHECK_FALSE((bool)ec);
|
||||
|
||||
|
||||
@@ -63,7 +63,7 @@ update_dictionary() {
|
||||
|
||||
update_dictionary "@CIFPP_CACHE_DIR@/components.cif" "https://files.wwpdb.org/pub/pdb/data/monomers/components.cif.gz"
|
||||
update_dictionary "@CIFPP_CACHE_DIR@/mmcif_pdbx.dic" "https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic.gz"
|
||||
update_dictionary "@CIFPP_CACHE_DIR@/mmcif_ma.dic" "https://github.com/ihmwg/ModelCIF/raw/master/dist/mmcif_ma.dic"
|
||||
update_dictionary "@CIFPP_CACHE_DIR@/mmcif_ma.dic" "https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_ma.dic"
|
||||
|
||||
# notify subscribers, using find instead of run-parts to make it work on FreeBSD as well
|
||||
|
||||
|
||||
Reference in New Issue
Block a user