mirror of
https://github.com/PDB-REDO/libcifpp.git
synced 2026-06-05 14:34:21 +08:00
389 lines
11 KiB
C++
389 lines
11 KiB
C++
/*-
|
|
* SPDX-License-Identifier: BSD-2-Clause
|
|
*
|
|
* Copyright (c) 2020 NKI/AVL, Netherlands Cancer Institute
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions are met:
|
|
*
|
|
* 1. Redistributions of source code must retain the above copyright notice, this
|
|
* list of conditions and the following disclaimer
|
|
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
|
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
|
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "cif++/exports.hpp"
|
|
|
|
#include <charconv>
|
|
#include <cmath>
|
|
#include <cstdint>
|
|
#include <limits>
|
|
#include <set>
|
|
#include <sstream>
|
|
#include <tuple>
|
|
#include <vector>
|
|
|
|
#if __has_include(<experimental/type_traits>)
|
|
|
|
#include <experimental/type_traits>
|
|
namespace std_experimental = std::experimental;
|
|
|
|
#else
|
|
|
|
// A quick hack to work around the missing is_detected in MSVC
|
|
namespace std_experimental
|
|
{
|
|
|
|
namespace detail
|
|
{
|
|
template <class AlwaysVoid, template <class...> class Op, class... Args>
|
|
struct detector
|
|
{
|
|
using value_t = std::false_type;
|
|
};
|
|
|
|
template <template <class...> class Op, class... Args>
|
|
struct detector<std::void_t<Op<Args...>>, Op, Args...>
|
|
{
|
|
using value_t = std::true_type;
|
|
};
|
|
} // namespace detail
|
|
|
|
template <template <class...> class Op, class... Args>
|
|
using is_detected = typename detail::detector<void, Op, Args...>::value_t;
|
|
|
|
template <template <class...> class Op, class... Args>
|
|
const auto is_detected_v = is_detected<Op, Args...>::value;
|
|
|
|
} // namespace std_experimental
|
|
|
|
#endif
|
|
|
|
/**
|
|
* \file text.hpp
|
|
*
|
|
* Various text manipulating routines
|
|
*/
|
|
|
|
namespace cif
|
|
{
|
|
|
|
// --------------------------------------------------------------------
|
|
|
|
// some basic utilities: Since we're using ASCII input only, we define for optimisation
|
|
// our own case conversion routines.
|
|
|
|
/// \brief return whether string @a is equal to string @a b ignoring changes in character case
|
|
bool iequals(std::string_view a, std::string_view b);
|
|
|
|
/// \brief compare string @a is to string @a b ignoring changes in character case
|
|
int icompare(std::string_view a, std::string_view b);
|
|
|
|
/// \brief return whether string @a is equal to string @a b ignoring changes in character case
|
|
bool iequals(const char *a, const char *b);
|
|
|
|
/// \brief compare string @a is to string @a b ignoring changes in character case
|
|
int icompare(const char *a, const char *b);
|
|
|
|
/// \brief convert the string @a s to lower case in situ
|
|
void to_lower(std::string &s);
|
|
|
|
/// \brief return a lower case copy of string @a s
|
|
std::string to_lower_copy(std::string_view s);
|
|
|
|
/// \brief convert the string @a s to upper case in situ
|
|
void to_upper(std::string &s);
|
|
|
|
/**
|
|
* @brief Join the strings in the range [ @a a, @a e ) using
|
|
* @a sep as separator
|
|
*
|
|
* Example usage:
|
|
*
|
|
* @code {.cpp}
|
|
* std::vector<std::string> v{ "aap", "noot", "mies" };
|
|
*
|
|
* assert(cif::join(v.begin(), v.end(), ", ") == "aap, noot, mies");
|
|
* @endcode
|
|
*
|
|
*/
|
|
template <typename IterType>
|
|
std::string join(IterType b, IterType e, std::string_view sep)
|
|
{
|
|
std::ostringstream s;
|
|
|
|
if (b != e)
|
|
{
|
|
auto ai = b;
|
|
auto ni = std::next(ai);
|
|
|
|
for (;;)
|
|
{
|
|
s << *ai;
|
|
|
|
if (ni == e)
|
|
break;
|
|
|
|
ai = ni;
|
|
ni = std::next(ai);
|
|
|
|
s << sep;
|
|
}
|
|
}
|
|
|
|
return s.str();
|
|
}
|
|
|
|
/**
|
|
* @brief Join the strings in the array @a arr using @a sep as separator
|
|
*
|
|
* Example usage:
|
|
*
|
|
* @code {.cpp}
|
|
* std::list<std::string> v{ "aap", "noot", "mies" };
|
|
*
|
|
* assert(cif::join(v, ", ") == "aap, noot, mies");
|
|
* @endcode
|
|
*
|
|
*/
|
|
template <typename V>
|
|
std::string join(const V &arr, std::string_view sep)
|
|
{
|
|
return join(arr.begin(), arr.end(), sep);
|
|
}
|
|
|
|
/**
|
|
* @brief Split the string in @a s based on the characters in @a separators
|
|
*
|
|
* Each of the characters in @a separators induces a split.
|
|
*
|
|
* When suppress_empty is true, empty strings are not produced in the
|
|
* resulting array.
|
|
*
|
|
* Example:
|
|
*
|
|
* @code {.cpp}
|
|
* auto v = cif::split("aap:noot,,mies", ":,", true);
|
|
*
|
|
* assert(v == std::vector{"aap", "noot", "mies"});
|
|
* @endcode
|
|
*
|
|
*/
|
|
template <typename StringType = std::string_view>
|
|
std::vector<StringType> split(std::string_view s, std::string_view separators, bool suppress_empty = false)
|
|
{
|
|
std::vector<StringType> result;
|
|
|
|
auto b = s.data();
|
|
auto e = b;
|
|
|
|
while (e != s.data() + s.length())
|
|
{
|
|
if (separators.find(*e) != std::string_view::npos)
|
|
{
|
|
if (e > b or not suppress_empty)
|
|
result.emplace_back(b, e - b);
|
|
b = e = e + 1;
|
|
continue;
|
|
}
|
|
|
|
++e;
|
|
}
|
|
|
|
if (e > b or not suppress_empty)
|
|
result.emplace_back(b, e - b);
|
|
|
|
return result;
|
|
}
|
|
|
|
/**
|
|
* @brief Replace all occurrences of @a what in string @a s with the string @a with
|
|
*
|
|
* The string @a with may be empty in which case each occurrence of @a what is simply
|
|
* deleted.
|
|
*/
|
|
void replace_all(std::string &s, std::string_view what, std::string_view with = {});
|
|
|
|
#if defined(__cpp_lib_starts_ends_with)
|
|
|
|
/// \brief return whether string @a s starts with @a with
|
|
inline bool starts_with(std::string s, std::string_view with)
|
|
{
|
|
return s.starts_with(with);
|
|
}
|
|
|
|
/// \brief return whether string @a s ends with @a with
|
|
inline bool ends_with(std::string_view s, std::string_view with)
|
|
{
|
|
return s.ends_with(with);
|
|
}
|
|
|
|
#else
|
|
|
|
/// \brief return whether string @a s starts with @a with
|
|
inline bool starts_with(std::string s, std::string_view with)
|
|
{
|
|
return s.compare(0, with.length(), with) == 0;
|
|
}
|
|
|
|
/// \brief return whether string @a s ends with @a with
|
|
inline bool ends_with(std::string_view s, std::string_view with)
|
|
{
|
|
return s.length() >= with.length() and s.compare(s.length() - with.length(), with.length(), with) == 0;
|
|
}
|
|
|
|
#endif
|
|
|
|
#if defined(__cpp_lib_string_contains)
|
|
|
|
/// \brief return whether string @a s contains @a q
|
|
inline bool contains(std::string_view s, std::string_view q)
|
|
{
|
|
return s.contains(q);
|
|
}
|
|
|
|
#else
|
|
|
|
/// \brief return whether string @a s contains @a q
|
|
inline bool contains(std::string_view s, std::string_view q)
|
|
{
|
|
return s.find(q) != std::string_view::npos;
|
|
}
|
|
|
|
#endif
|
|
|
|
/// \brief return whether string @a s contains @a q ignoring character case
|
|
bool icontains(std::string_view s, std::string_view q);
|
|
|
|
/// \brief trim white space at the start of string @a s in situ
|
|
void trim_left(std::string &s);
|
|
|
|
/// \brief trim white space at the end of string @a s in situ
|
|
void trim_right(std::string &s);
|
|
|
|
/// \brief trim white space at both the start and the end of string @a s in situ
|
|
void trim(std::string &s);
|
|
|
|
/// \brief return a string trimmed of white space at the start of string @a s
|
|
std::string trim_left_copy(std::string_view s);
|
|
|
|
/// \brief return a string trimmed of white space at the end of string @a s
|
|
std::string trim_right_copy(std::string_view s);
|
|
|
|
/// \brief return a string trimmed of white space at both the start and the end of string @a s
|
|
std::string trim_copy(std::string_view s);
|
|
|
|
// To make life easier, we also define iless and iset using iequals
|
|
|
|
/// \brief an operator object you can use to compare strings ignoring their character case
|
|
struct iless
|
|
{
|
|
/// \brief return the result of icompare for @a a and @a b
|
|
bool operator()(const std::string &a, const std::string &b) const
|
|
{
|
|
return icompare(a, b) < 0;
|
|
}
|
|
};
|
|
|
|
/// iset is a std::set of std::string but with a comparator that
|
|
/// ignores character case.
|
|
using iset = std::set<std::string, iless>;
|
|
|
|
// --------------------------------------------------------------------
|
|
// This really makes a difference, having our own tolower routines
|
|
|
|
/// \brief global list containing the lower case version of each ASCII character
|
|
extern CIFPP_EXPORT const uint8_t kCharToLowerMap[256];
|
|
|
|
/// \brief a very fast tolower implementation
|
|
inline char tolower(int ch)
|
|
{
|
|
return static_cast<char>(kCharToLowerMap[static_cast<uint8_t>(ch)]);
|
|
}
|
|
|
|
// --------------------------------------------------------------------
|
|
|
|
/** \brief return a tuple consisting of the category and item name for @a item_name
|
|
*
|
|
* The category name is stripped of its leading underscore character.
|
|
*
|
|
* If no dot character was found, the category name is empty. That's for
|
|
* cif 1.0 formatted data.
|
|
*/
|
|
|
|
[[deprecated("use split_item_name instead")]]
|
|
std::tuple<std::string, std::string> split_tag_name(std::string_view item_name);
|
|
|
|
|
|
/** \brief return a tuple consisting of the category and item name for @a item_name
|
|
*
|
|
* The category name is stripped of its leading underscore character.
|
|
*
|
|
* If no dot character was found, the category name is empty. That's for
|
|
* cif 1.0 formatted data.
|
|
*/
|
|
|
|
std::tuple<std::string, std::string> split_item_name(std::string_view item_name);
|
|
|
|
// --------------------------------------------------------------------
|
|
|
|
/// \brief generate a cif name, used e.g. to generate asym_id's
|
|
std::string cif_id_for_number(int number);
|
|
|
|
// --------------------------------------------------------------------
|
|
|
|
/** \brief custom word wrapping routine.
|
|
*
|
|
* Wrap the text in @a text based on a maximum line width @a width using
|
|
* a dynamic programming approach to get the most efficient filling of
|
|
* the space.
|
|
*/
|
|
std::vector<std::string> word_wrap(const std::string &text, std::size_t width);
|
|
|
|
// --------------------------------------------------------------------
|
|
|
|
template <typename T>
|
|
using from_chars_function = decltype(std::from_chars(std::declval<const char *>(), std::declval<const char *>(), std::declval<T &>()));
|
|
|
|
template <typename T>
|
|
struct std_charconv
|
|
{
|
|
static std::from_chars_result from_chars(const char *a, const char *b, T &d)
|
|
{
|
|
return std::from_chars(a, b, d);
|
|
}
|
|
};
|
|
|
|
template <typename T, typename = void>
|
|
struct ff_charconv;
|
|
|
|
template <typename T>
|
|
struct ff_charconv<T, typename std::enable_if_t<std::is_floating_point_v<T>>>
|
|
{
|
|
static std::from_chars_result from_chars(const char *a, const char *b, T &v);
|
|
};
|
|
|
|
template <typename T>
|
|
using charconv = typename std::conditional_t<std_experimental::is_detected_v<from_chars_function, T>, std_charconv<T>, ff_charconv<T>>;
|
|
|
|
template <typename T>
|
|
constexpr auto from_chars(const char *s, const char *e, T &v)
|
|
{
|
|
return charconv<T>::from_chars(s, e, v);
|
|
}
|
|
|
|
} // namespace cif
|