mirror of
https://github.com/schrodinger/pymol-open-source.git
synced 2026-06-04 20:04:21 +08:00
411 lines
11 KiB
C++
411 lines
11 KiB
C++
/*
|
|
* CIF tokenizer
|
|
*
|
|
* (c) 2014 Schrodinger, Inc.
|
|
*/
|
|
|
|
#ifndef _H_CIFFILE
|
|
#define _H_CIFFILE
|
|
|
|
#include <cstddef>
|
|
#include <cstdint>
|
|
#include <cstring>
|
|
#include <map>
|
|
#include <memory>
|
|
#include <vector>
|
|
#include <string>
|
|
#include <variant>
|
|
|
|
// for pymol::default_free
|
|
#include "MemoryDebug.h"
|
|
|
|
template<class... Ts>
|
|
struct overloaded : Ts... { using Ts::operator()...; };
|
|
template<class... Ts>
|
|
overloaded(Ts...) -> overloaded<Ts...>;
|
|
|
|
namespace pymol {
|
|
namespace _cif_detail {
|
|
|
|
/**
|
|
* Null-terminated string view.
|
|
*/
|
|
class zstring_view {
|
|
const char* m_data;
|
|
|
|
public:
|
|
zstring_view(const char* s) : m_data(s) {}
|
|
|
|
bool operator<(zstring_view rhs) const {
|
|
return std::strcmp(m_data, rhs.m_data) < 0;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Convert a raw cif data value to a typed value
|
|
*/
|
|
template <typename T> T raw_to_typed(const char*);
|
|
|
|
} // namespace _cif_detail
|
|
|
|
// cif data types
|
|
class cif_data;
|
|
class cif_loop;
|
|
class cif_array;
|
|
namespace cif_detail {
|
|
struct cif_str_data;
|
|
struct bcif_data;
|
|
};
|
|
using CIFData = std::variant<cif_detail::cif_str_data, cif_detail::bcif_data>;
|
|
|
|
/**
|
|
* Class for reading CIF files.
|
|
* Parses the entire file and exposes its data blocks.
|
|
*
|
|
* Read CIF file:
|
|
* @verbatim auto cf = cif_file("file.cif"); @endverbatim
|
|
*
|
|
* Read CIF string:
|
|
* @verbatim auto cf = cif_file(nullptr, cifstring); @endverbatim
|
|
*
|
|
* Iterate over data blocks:
|
|
* @verbatim
|
|
for (auto& [code, block] : cf.datablocks()) {
|
|
// data_<code>
|
|
const char* code = block->code();
|
|
|
|
// get data item pointer, or nullptr if name not found
|
|
auto* dataitem1 = block->get_arr("_some.name1");
|
|
auto* dataitem2 = block->get_arr("_some.name2", "_alternate.name");
|
|
|
|
// get data item pointer, or default item if name not found
|
|
auto* dataitem3 = block->get_opt("_some.name3");
|
|
|
|
// value of data item
|
|
const char* stringvalue = dataitem3->as_s();
|
|
int intvalue = dataitem3->as_i();
|
|
|
|
// values of looped data item
|
|
for (unsigned i = 0, i_end = dataitem3->size(); i != i_end; ++i) {
|
|
const char* stringvalue = dataitem3->as_s(i);
|
|
}
|
|
}
|
|
@endverbatim
|
|
*/
|
|
class cif_file {
|
|
std::vector<char*> m_tokens;
|
|
std::map<std::string, cif_data> m_datablocks;
|
|
std::unique_ptr<char, pymol::default_free> m_contents;
|
|
|
|
/**
|
|
* Parse CIF string
|
|
* @param p CIF string (takes ownership)
|
|
* @post datablocks() is valid
|
|
*/
|
|
bool parse(char*&&);
|
|
|
|
public:
|
|
/// Parse CIF file
|
|
bool parse_file(const char*);
|
|
|
|
/// Parse CIF string
|
|
bool parse_string(const char*);
|
|
|
|
/**
|
|
* Parse BinaryCIF blob
|
|
* @param bytes BinaryCIF blob
|
|
* @param size Blob size
|
|
* @post datablocks() is valid
|
|
*/
|
|
bool parse_bcif(const char* bytes, std::size_t size);
|
|
|
|
protected:
|
|
/// Report a parsing error
|
|
virtual void error(const char*);
|
|
|
|
public:
|
|
cif_file();
|
|
cif_file(cif_file&&);
|
|
cif_file(const cif_file&) = delete;
|
|
cif_file& operator=(cif_file&&);
|
|
cif_file& operator=(const cif_file&) = delete;
|
|
virtual ~cif_file();
|
|
|
|
/// Construct from file name or buffer
|
|
cif_file(const char* filename, const char* contents = nullptr);
|
|
|
|
/// Data blocks
|
|
const std::map<std::string, cif_data>& datablocks() const { return m_datablocks; }
|
|
};
|
|
|
|
|
|
using CifArrayElement = std::variant<std::int8_t, std::int16_t, std::int32_t,
|
|
std::uint8_t, std::uint16_t, std::uint32_t, float, double, std::string>;
|
|
|
|
namespace cif_detail {
|
|
struct cif_str_array {
|
|
enum { NOT_IN_LOOP = -1 };
|
|
|
|
// column index, -1 if not in loop
|
|
short col;
|
|
|
|
// pointer to either loop or single value
|
|
union {
|
|
const cif_loop * loop;
|
|
const char * value;
|
|
} pointer;
|
|
|
|
// Raw data value or NULL for unknown/inapplicable and `pos >= size()`
|
|
const char* get_value_raw(unsigned pos = 0) const;
|
|
|
|
// point this array to a loop (only for parsing)
|
|
void set_loop(const cif_loop * loop, short col_) {
|
|
col = col_;
|
|
pointer.loop = loop;
|
|
};
|
|
|
|
// point this array to a single value (only for parsing)
|
|
void set_value(const char * value) {
|
|
col = NOT_IN_LOOP;
|
|
pointer.value = value;
|
|
};
|
|
};
|
|
struct bcif_array {
|
|
std::vector<CifArrayElement> m_arr{};
|
|
};
|
|
|
|
/**
|
|
* Returns a typed value from a CIF data element.
|
|
* If the element is missing or inapplicable, return `d`.
|
|
* @param var CIF data element
|
|
* @param d default value
|
|
* @return typed value
|
|
*/
|
|
template <typename T> T var_to_typed(const CifArrayElement& var, const T& d)
|
|
{
|
|
if constexpr (std::is_same_v<T, const char*>) {
|
|
auto& str = std::get<std::string>(var);
|
|
return !str.empty() ? str.c_str() : d;
|
|
} else {
|
|
if (auto ptr = std::get_if<std::string>(&var); ptr && ptr->empty()) {
|
|
return d;
|
|
}
|
|
if constexpr (!std::is_same_v<T, std::string>) {
|
|
return std::visit(overloaded{[](const std::string& s) -> T {
|
|
return _cif_detail::raw_to_typed<T>(
|
|
s.c_str());
|
|
},
|
|
[](const auto& v) -> T { return v; }},
|
|
var);
|
|
}
|
|
}
|
|
return d;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* View on a CIF data array. The viewed data is owned by the cif_file
|
|
*/
|
|
class cif_array {
|
|
friend class cif_file;
|
|
|
|
private:
|
|
mutable std::string m_internal_str_cache;
|
|
std::variant<cif_detail::cif_str_array, cif_detail::bcif_array> m_array;
|
|
|
|
public:
|
|
// constructor
|
|
cif_array() = default;
|
|
|
|
// constructor (only needed for EMPTY_ARRAY)
|
|
cif_array(std::nullptr_t) {
|
|
if (auto arr = std::get_if<cif_detail::cif_str_array>(&m_array)) {
|
|
arr->set_value(nullptr);
|
|
} else if (auto arr = std::get_if<cif_detail::bcif_array>(&m_array)) {
|
|
arr->m_arr.clear();
|
|
}
|
|
}
|
|
|
|
cif_array(std::vector<CifArrayElement>&& arr) {
|
|
m_array = cif_detail::bcif_array{std::move(arr)};
|
|
}
|
|
|
|
/// Number of elements in this array (= number of rows in loop)
|
|
unsigned size() const;
|
|
|
|
/// True if value in ['.', '?']
|
|
bool is_missing(unsigned pos = 0) const {
|
|
if (auto arr = std::get_if<cif_detail::cif_str_array>(&m_array)) {
|
|
return !arr->get_value_raw(pos);
|
|
} else {
|
|
return false;
|
|
}
|
|
}
|
|
|
|
/// True if all values in ['.', '?']
|
|
bool is_missing_all() const;
|
|
|
|
/**
|
|
* Get element as type T. If `pos >= size()` then return `d`.
|
|
* @param pos element index (= row index in loop)
|
|
* @param d default value for unknown/inapplicable elements
|
|
*/
|
|
template <typename T> T as(unsigned pos = 0, T d = T()) const {
|
|
if (auto arr = std::get_if<cif_detail::cif_str_array>(&m_array)) {
|
|
const char* s = arr->get_value_raw(pos);
|
|
return s ? _cif_detail::raw_to_typed<T>(s) : d;
|
|
} else if (auto arr = std::get_if<cif_detail::bcif_array>(&m_array)) {
|
|
if (pos >= arr->m_arr.size())
|
|
return d;
|
|
auto& var = arr->m_arr[pos];
|
|
return cif_detail::var_to_typed<T>(var, d);
|
|
}
|
|
return d;
|
|
}
|
|
|
|
/**
|
|
* Get element as null-terminated string. The default value is the empty
|
|
* string, unlike as<const char*>() which returns nullptr as the default value.
|
|
* If `pos >= size()` then return `d`.
|
|
* @param pos element index (= row index in loop)
|
|
* @param d default value for unknown/inapplicable elements
|
|
*/
|
|
const char* as_s(unsigned pos = 0, const char* d = "") const {
|
|
if (std::get_if<cif_detail::cif_str_array>(&m_array)) {
|
|
return as(pos, d);
|
|
} else if (auto arr = std::get_if<cif_detail::bcif_array>(&m_array)) {
|
|
if (pos >= arr->m_arr.size())
|
|
return d;
|
|
if (auto str_ptr = std::get_if<std::string>(&arr->m_arr[pos])) {
|
|
return str_ptr->c_str();
|
|
}
|
|
m_internal_str_cache = std::visit([](auto&& arg) -> std::string {
|
|
if constexpr (std::is_same_v<std::decay_t<decltype(arg)>,
|
|
std::string>) {
|
|
return arg;
|
|
} else {
|
|
return std::to_string(arg);
|
|
}
|
|
}, arr->m_arr[pos]);
|
|
return m_internal_str_cache.c_str();
|
|
}
|
|
return d;
|
|
}
|
|
|
|
/// Alias for as<int>()
|
|
int as_i(unsigned pos = 0, int d = 0) const { return as(pos, d); }
|
|
|
|
/// Alias for as<double>()
|
|
double as_d(unsigned pos = 0, double d = 0.) const { return as(pos, d); }
|
|
|
|
/// Alias for as<float>()
|
|
float as_f(unsigned pos = 0, float f = 0.0f) const { return static_cast<float>(as_d(pos, f)); }
|
|
|
|
/**
|
|
* Get a copy of the array.
|
|
* @param d default value for unknown/inapplicable elements
|
|
*/
|
|
template <typename T> std::vector<T> to_vector(T d = T()) const {
|
|
auto n = size();
|
|
std::vector<T> v;
|
|
v.reserve(n);
|
|
for (unsigned i = 0; i < n; ++i)
|
|
v.push_back(as<T>(i, d));
|
|
return v;
|
|
}
|
|
};
|
|
|
|
/**
|
|
* CIF data block. The viewed data is owned by the cif_file.
|
|
*/
|
|
|
|
namespace cif_detail {
|
|
struct cif_str_data {
|
|
// data_<code>
|
|
const char* m_code = nullptr;
|
|
|
|
std::map<_cif_detail::zstring_view, cif_array> m_dict;
|
|
std::map<std::string, cif_array> m_dict_str;
|
|
std::map<_cif_detail::zstring_view, cif_detail::cif_str_data> m_saveframes;
|
|
|
|
// only needed for freeing
|
|
std::vector<std::unique_ptr<cif_loop>> m_loops;
|
|
};
|
|
|
|
using ColumnMap = std::map<std::string, std::vector<CifArrayElement>>;
|
|
using CategoryMap = std::map<std::string, ColumnMap>;
|
|
using DataBlockMap = std::map<std::string, CategoryMap>;
|
|
struct bcif_data {
|
|
std::string m_code;
|
|
std::map<std::string, std::map<std::string, cif_array>> m_dict;
|
|
};
|
|
}
|
|
|
|
class cif_data {
|
|
friend class cif_file;
|
|
|
|
CIFData m_data;
|
|
|
|
// generic default value
|
|
static const cif_array* empty_array();
|
|
|
|
public:
|
|
|
|
cif_data() = default;
|
|
cif_data(const cif_data&) = delete;
|
|
cif_data(cif_data&&) = default;
|
|
cif_data& operator=(const cif_data&) = delete;
|
|
cif_data& operator=(cif_data&&) = default;
|
|
|
|
/// Block code (never nullptr)
|
|
const char* code() const;
|
|
|
|
// Get a pointer to array or nullptr if not found
|
|
const cif_array* get_arr(const char* key) const;
|
|
template <typename... Args>
|
|
const cif_array* get_arr(const char* key, Args... aliases) const
|
|
{
|
|
auto arr = get_arr(key);
|
|
return arr ? arr : get_arr(aliases...);
|
|
}
|
|
|
|
/// Like get_arr() but return a default value instead of nullptr if not found
|
|
template <typename... Args> const cif_array* get_opt(Args... keys) const
|
|
{
|
|
auto arr = get_arr(keys...);
|
|
return arr ? arr : empty_array();
|
|
}
|
|
|
|
/// Get a pointer to a save frame or nullptr if not found
|
|
const cif_detail::cif_str_data* get_saveframe(const char* code) const;
|
|
};
|
|
|
|
namespace cif
|
|
{
|
|
enum class DataTypes {
|
|
Int8 = 1,
|
|
Int16 = 2,
|
|
Int32 = 3,
|
|
UInt8 = 4,
|
|
UInt16 = 5,
|
|
UInt32 = 6,
|
|
Float32 = 32,
|
|
Float64 = 33,
|
|
};
|
|
} // namespace cif
|
|
|
|
} // namespace pymol
|
|
|
|
/**
|
|
* CIF parser which captures the last error message.
|
|
*/
|
|
class cif_file_with_error_capture : public pymol::cif_file
|
|
{
|
|
public:
|
|
std::string m_error_msg;
|
|
void error(const char* msg) override { m_error_msg = msg; }
|
|
};
|
|
|
|
#endif
|
|
// vi:sw=2:ts=2
|