mirror of
https://github.com/PDB-REDO/libcifpp.git
synced 2026-06-04 22:14:24 +08:00
Compare commits
18 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
836aed6ea9 | ||
|
|
50df250415 | ||
|
|
2409fc5b7b | ||
|
|
8a1184a24c | ||
|
|
d2fbc54765 | ||
|
|
1bcb26ba75 | ||
|
|
32f4749d84 | ||
|
|
da12be879a | ||
|
|
94a38ad4e8 | ||
|
|
20ef79a172 | ||
|
|
92bf25476e | ||
|
|
b55e074dd7 | ||
|
|
7b654a837d | ||
|
|
ae9d247d22 | ||
|
|
16b7deafe8 | ||
|
|
f2cfe28458 | ||
|
|
2e8a52949e | ||
|
|
441e142767 |
@@ -25,7 +25,7 @@
|
||||
cmake_minimum_required(VERSION 3.16)
|
||||
|
||||
# set the project name
|
||||
project(cifpp VERSION 5.0.9 LANGUAGES CXX)
|
||||
project(cifpp VERSION 5.1.0.1 LANGUAGES CXX)
|
||||
|
||||
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
|
||||
|
||||
@@ -382,6 +382,16 @@ install(FILES
|
||||
DESTINATION ${CIFPP_DATA_DIR}
|
||||
)
|
||||
|
||||
if(${CIFPP_CACHE_DIR})
|
||||
install(FILES
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ddl.dic
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_pdbx.dic
|
||||
${PROJECT_SOURCE_DIR}/rsrc/mmcif_ma.dic
|
||||
${COMPONENTS_CIF}
|
||||
DESTINATION ${CIFPP_CACHE_DIR}
|
||||
)
|
||||
endif()
|
||||
|
||||
set(CONFIG_TEMPLATE_FILE ${PROJECT_SOURCE_DIR}/cmake/cifppConfig.cmake.in)
|
||||
|
||||
configure_package_config_file(
|
||||
|
||||
66
README.md
66
README.md
@@ -3,18 +3,78 @@ libcifpp
|
||||
|
||||
This library contains code to work with mmCIF and PDB files.
|
||||
|
||||
Synopsis
|
||||
--------
|
||||
|
||||
```c++
|
||||
// A simple program counting residues with an OXT atom
|
||||
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
|
||||
#include <cif++.hpp>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (argc != 2)
|
||||
exit(1);
|
||||
|
||||
// Read file, can be PDB or mmCIF and can even be compressed with gzip.
|
||||
cif::file file = cif::pdb::read(argv[1]);
|
||||
|
||||
if (file.empty())
|
||||
{
|
||||
std::cerr << "Empty file" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
auto &db = file.front();
|
||||
auto &atom_site = db["atom_site"];
|
||||
auto n = atom_site.find(cif::key("label_atom_id") == "OXT").size();
|
||||
|
||||
std::cout << "File contains " << atom_site.size() << " atoms of which "
|
||||
<< n << (n == 1 ? " is" : " are") << " OXT" << std::endl
|
||||
<< "residues with an OXT are:" << std::endl;
|
||||
|
||||
for (const auto &[asym, comp, seqnr] :
|
||||
atom_site.find<std::string, std::string, int>(
|
||||
cif::key("label_atom_id") == "OXT",
|
||||
"label_asym_id", "label_comp_id", "label_seq_id"))
|
||||
{
|
||||
std::cout << asym << ' ' << comp << ' ' << seqnr << std::endl;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
```
|
||||
|
||||
Requirements
|
||||
------------
|
||||
|
||||
The code for this library was written in C++17. You therefore need a
|
||||
recent compiler to build it. For the development gcc 9.3 and clang 9.0
|
||||
recent compiler to build it. For the development gcc 9.4 and clang 9.0
|
||||
have been used as well as MSVC version 2019.
|
||||
|
||||
Other requirements are:
|
||||
|
||||
- [mrc](https://github.com/mhekkel/mrc), a resource compiler that
|
||||
allows including data files into the executable making them easier to
|
||||
install. Strictly this is optional, but at the expense of functionality.
|
||||
install. Strictly speaking this is optional, but at the expense of
|
||||
functionality.
|
||||
- [libeigen](https://eigen.tuxfamily.org/index.php?title=Main_Page), a
|
||||
library to do amongst others matrix calculations. This usually can be
|
||||
installed using your package manager, in Debian/Ubuntu it is called
|
||||
`libeigen3-dev`
|
||||
- zlib, the development version of this library. On Debian/Ubuntu this
|
||||
is the package `zlib1g-dev`.
|
||||
- [boost](https://www.boost.org). The boost libraries are only needed if
|
||||
you want to build the testing code.
|
||||
|
||||
When building using MS Visual Studio, you will also need [libzeep](https://github.com/mhekkel/libzeep)
|
||||
since MSVC does not yet provide a C++ template required by libcifpp.
|
||||
|
||||
Building
|
||||
--------
|
||||
@@ -26,7 +86,7 @@ On linux e.g. you would issue the following commands to build and install
|
||||
libcifpp in your `$HOME/.local` folder:
|
||||
|
||||
```bash
|
||||
git clone https://github.com/PDB-REDO/libcifpp.git
|
||||
git clone https://github.com/PDB-REDO/libcifpp.git --recurse-submodules
|
||||
cd libcifpp
|
||||
cmake -S . -B build -DCMAKE_INSTALL_PREFIX=$HOME/.local -DCMAKE_BUILD_TYPE=Release
|
||||
cmake --build build
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
Version 5.1
|
||||
- New parser, optimised for speed
|
||||
- Fix in unique ID generator
|
||||
|
||||
Version 5.0.10
|
||||
- Fix in progress_bar, was using too much CPU
|
||||
- Optimised mmCIF parser
|
||||
|
||||
Version 5.0.9
|
||||
- Fix in dihedral angle calculations
|
||||
- Added create_water to model
|
||||
|
||||
@@ -1,24 +1,32 @@
|
||||
#include <iostream>
|
||||
#include <filesystem>
|
||||
#include <iostream>
|
||||
|
||||
#include <cif++.hpp>
|
||||
|
||||
namespace fs = std::filesystem;
|
||||
|
||||
int main()
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
cif::file file;
|
||||
file.load("1cbs.cif.gz");
|
||||
if (argc != 2)
|
||||
exit(1);
|
||||
|
||||
auto& db = file.front();
|
||||
cif::file file = cif::pdb::read(argv[1]);
|
||||
|
||||
if (file.empty())
|
||||
{
|
||||
std::cerr << "Empty file" << std::endl;
|
||||
exit(1);
|
||||
}
|
||||
|
||||
auto &db = file.front();
|
||||
auto &atom_site = db["atom_site"];
|
||||
auto n = atom_site.find(cif::key("label_atom_id") == "OXT").size();
|
||||
|
||||
std::cout << "File contains " << atom_site.size() << " atoms of which " << n << (n == 1 ? " is" : " are") << " OXT" << std::endl
|
||||
<< "residues with an OXT are:" << std::endl;
|
||||
|
||||
for (const auto& [asym, comp, seqnr]: atom_site.find<std::string,std::string,int>(
|
||||
cif::key("label_atom_id") == "OXT", "label_asym_id", "label_comp_id", "label_seq_id"))
|
||||
<< "residues with an OXT are:" << std::endl;
|
||||
|
||||
for (const auto &[asym, comp, seqnr] : atom_site.find<std::string, std::string, int>(
|
||||
cif::key("label_atom_id") == "OXT", "label_asym_id", "label_comp_id", "label_seq_id"))
|
||||
{
|
||||
std::cout << asym << ' ' << comp << ' ' << seqnr << std::endl;
|
||||
}
|
||||
|
||||
@@ -32,5 +32,6 @@ namespace cif
|
||||
{
|
||||
|
||||
validator parse_dictionary(std::string_view name, std::istream &is);
|
||||
void extend_dictionary(validator &v, std::istream &is);
|
||||
|
||||
} // namespace cif
|
||||
|
||||
@@ -29,7 +29,6 @@
|
||||
#include "cif++/row.hpp"
|
||||
|
||||
#include <map>
|
||||
#include <regex>
|
||||
|
||||
namespace cif
|
||||
{
|
||||
@@ -54,8 +53,6 @@ class sac_parser
|
||||
public:
|
||||
using datablock_index = std::map<std::string, std::size_t>;
|
||||
|
||||
sac_parser(std::istream &is, bool init = true);
|
||||
|
||||
virtual ~sac_parser() = default;
|
||||
|
||||
enum CharTraitsMask : uint8_t
|
||||
@@ -66,9 +63,14 @@ class sac_parser
|
||||
kAnyPrintMask = 1 << 3
|
||||
};
|
||||
|
||||
static bool is_white(int ch)
|
||||
static constexpr bool is_space(int ch)
|
||||
{
|
||||
return std::isspace(ch) or ch == '#';
|
||||
return ch == ' ' or ch == '\t' or ch == '\r' or ch == '\n';
|
||||
}
|
||||
|
||||
static constexpr bool is_white(int ch)
|
||||
{
|
||||
return is_space(ch) or ch == '#';
|
||||
}
|
||||
|
||||
static constexpr bool is_ordinary(int ch)
|
||||
@@ -92,26 +94,7 @@ class sac_parser
|
||||
(ch >= 0x20 and ch <= 0x7f and (kCharTraitsTable[ch - 0x20] & kAnyPrintMask) != 0);
|
||||
}
|
||||
|
||||
static bool is_unquoted_string(std::string_view text)
|
||||
{
|
||||
bool result = text.empty() or is_ordinary(text.front());
|
||||
|
||||
if (result)
|
||||
{
|
||||
for (auto ch : text)
|
||||
{
|
||||
if (is_non_blank(ch))
|
||||
continue;
|
||||
result = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static const std::regex kReservedRx(R"(loop_|stop_|global_|data_\S+|save_\S+)", std::regex_constants::icase);
|
||||
|
||||
// but be careful it does not contain e.g. stop_
|
||||
return result and not std::regex_match(text.begin(), text.end(), kReservedRx);
|
||||
}
|
||||
static bool is_unquoted_string(std::string_view text);
|
||||
|
||||
protected:
|
||||
static constexpr uint8_t kCharTraitsTable[128] = {
|
||||
@@ -133,7 +116,8 @@ class sac_parser
|
||||
DATA,
|
||||
LOOP,
|
||||
GLOBAL,
|
||||
SAVE,
|
||||
SAVE_,
|
||||
SAVE_NAME,
|
||||
STOP,
|
||||
Tag,
|
||||
Value
|
||||
@@ -148,7 +132,8 @@ class sac_parser
|
||||
case CIFToken::DATA: return "DATA";
|
||||
case CIFToken::LOOP: return "LOOP";
|
||||
case CIFToken::GLOBAL: return "GLOBAL";
|
||||
case CIFToken::SAVE: return "SAVE";
|
||||
case CIFToken::SAVE_: return "SAVE";
|
||||
case CIFToken::SAVE_NAME: return "SAVE+name";
|
||||
case CIFToken::STOP: return "STOP";
|
||||
case CIFToken::Tag: return "Tag";
|
||||
case CIFToken::Value: return "Value";
|
||||
@@ -156,41 +141,13 @@ class sac_parser
|
||||
}
|
||||
}
|
||||
|
||||
enum class CIFValue
|
||||
{
|
||||
Int,
|
||||
Float,
|
||||
Numeric,
|
||||
String,
|
||||
TextField,
|
||||
Inapplicable,
|
||||
Unknown
|
||||
};
|
||||
|
||||
static constexpr const char *get_value_name(CIFValue type)
|
||||
{
|
||||
switch (type)
|
||||
{
|
||||
case CIFValue::Int: return "Int";
|
||||
case CIFValue::Float: return "Float";
|
||||
case CIFValue::Numeric: return "Numeric";
|
||||
case CIFValue::String: return "String";
|
||||
case CIFValue::TextField: return "TextField";
|
||||
case CIFValue::Inapplicable: return "Inapplicable";
|
||||
case CIFValue::Unknown: return "Unknown";
|
||||
default: return "Invalid type parameter";
|
||||
}
|
||||
}
|
||||
|
||||
// get_next_char takes a char from the buffer, or if it is empty
|
||||
// from the istream. This function also does carriage/linefeed
|
||||
// translation.
|
||||
// get_next_char takes the next character from the istream.
|
||||
// This function also does carriage/linefeed translation.
|
||||
int get_next_char();
|
||||
|
||||
// Put the last read character back into the istream
|
||||
void retract();
|
||||
|
||||
int restart(int start);
|
||||
|
||||
CIFToken get_next_token();
|
||||
|
||||
void match(CIFToken token);
|
||||
@@ -205,6 +162,9 @@ class sac_parser
|
||||
void parse_file();
|
||||
|
||||
protected:
|
||||
|
||||
sac_parser(std::istream &is, bool init = true);
|
||||
|
||||
void parse_global();
|
||||
|
||||
void parse_datablock();
|
||||
@@ -227,13 +187,14 @@ class sac_parser
|
||||
|
||||
// production methods, these are pure virtual here
|
||||
|
||||
virtual void produce_datablock(const std::string &name) = 0;
|
||||
virtual void produce_category(const std::string &name) = 0;
|
||||
virtual void produce_datablock(std::string_view name) = 0;
|
||||
virtual void produce_category(std::string_view name) = 0;
|
||||
virtual void produce_row() = 0;
|
||||
virtual void produce_item(const std::string &category, const std::string &item, const std::string &value) = 0;
|
||||
virtual void produce_item(std::string_view category, std::string_view item, std::string_view value) = 0;
|
||||
|
||||
protected:
|
||||
enum State
|
||||
|
||||
enum class State
|
||||
{
|
||||
Start,
|
||||
White,
|
||||
@@ -246,23 +207,21 @@ class sac_parser
|
||||
UnquotedString,
|
||||
Tag,
|
||||
TextField,
|
||||
Float = 100,
|
||||
Int = 110,
|
||||
Value = 300,
|
||||
DATA,
|
||||
SAVE
|
||||
TextFieldNL,
|
||||
Reserved,
|
||||
Value
|
||||
};
|
||||
|
||||
std::streambuf &m_source;
|
||||
|
||||
// Parser state
|
||||
bool m_validate;
|
||||
uint32_t m_line_nr;
|
||||
bool m_bol;
|
||||
CIFToken m_lookahead;
|
||||
std::string m_token_value;
|
||||
CIFValue mTokenType;
|
||||
std::vector<int> m_buffer; // retract buffer, used to be a stack<char>
|
||||
|
||||
// token buffer
|
||||
std::vector<char> m_token_buffer;
|
||||
std::string_view m_token_value;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
@@ -276,13 +235,13 @@ class parser : public sac_parser
|
||||
{
|
||||
}
|
||||
|
||||
void produce_datablock(const std::string &name) override;
|
||||
void produce_datablock(std::string_view name) override;
|
||||
|
||||
void produce_category(const std::string &name) override;
|
||||
void produce_category(std::string_view name) override;
|
||||
|
||||
void produce_row() override;
|
||||
|
||||
void produce_item(const std::string &category, const std::string &item, const std::string &value) override;
|
||||
void produce_item(std::string_view category, std::string_view item, std::string_view value) override;
|
||||
|
||||
protected:
|
||||
file &m_file;
|
||||
|
||||
@@ -31,6 +31,7 @@
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <valarray>
|
||||
|
||||
|
||||
@@ -228,8 +228,9 @@ class validator_factory
|
||||
|
||||
const validator &operator[](std::string_view dictionary_name);
|
||||
|
||||
const validator &construct_validator(std::string_view name, std::istream &is);
|
||||
|
||||
private:
|
||||
void construct_validator(std::string_view name, std::istream &is);
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -1227,23 +1227,37 @@ std::string category::get_unique_id(std::function<std::string(int)> generator)
|
||||
{
|
||||
using namespace cif::literals;
|
||||
|
||||
std::string id_tag = "id";
|
||||
if (m_cat_validator != nullptr and m_cat_validator->m_keys.size() == 1)
|
||||
id_tag = m_cat_validator->m_keys.front();
|
||||
|
||||
// calling size() often is a waste of resources
|
||||
if (m_last_unique_num == 0)
|
||||
m_last_unique_num = static_cast<uint32_t>(size());
|
||||
|
||||
for (;;)
|
||||
std::string result = generator(static_cast<int>(m_last_unique_num++));
|
||||
|
||||
std::string id_tag = "id";
|
||||
if (m_cat_validator != nullptr and m_cat_validator->m_keys.size() == 1)
|
||||
{
|
||||
std::string result = generator(static_cast<int>(m_last_unique_num++));
|
||||
|
||||
if (exists(key(id_tag) == result))
|
||||
continue;
|
||||
|
||||
return result;
|
||||
if (m_index == nullptr and m_cat_validator != nullptr)
|
||||
m_index = new category_index(this);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (m_index->find_by_value({{ id_tag, result }}) == nullptr)
|
||||
break;
|
||||
result = generator(static_cast<int>(m_last_unique_num++));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (;;)
|
||||
{
|
||||
if (not exists(key(id_tag) == result))
|
||||
break;
|
||||
|
||||
result = generator(static_cast<int>(m_last_unique_num++));
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void category::update_value(const std::vector<row_handle> &rows, std::string_view tag, std::string_view value)
|
||||
|
||||
@@ -117,7 +117,7 @@ class dictionary_parser : public parser
|
||||
if (not m_collected_item_types)
|
||||
m_collected_item_types = collect_item_types();
|
||||
|
||||
std::string saveFrameName = m_token_value;
|
||||
std::string saveFrameName { m_token_value };
|
||||
|
||||
if (saveFrameName.empty())
|
||||
error("Invalid save frame, should contain more than just 'save_' here");
|
||||
@@ -127,7 +127,7 @@ class dictionary_parser : public parser
|
||||
datablock dict(m_token_value);
|
||||
datablock::iterator cat = dict.end();
|
||||
|
||||
match(CIFToken::SAVE);
|
||||
match(CIFToken::SAVE_NAME);
|
||||
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag)
|
||||
{
|
||||
if (m_lookahead == CIFToken::LOOP)
|
||||
@@ -183,7 +183,7 @@ class dictionary_parser : public parser
|
||||
}
|
||||
}
|
||||
|
||||
match(CIFToken::SAVE);
|
||||
match(CIFToken::SAVE_);
|
||||
|
||||
if (isCategorySaveFrame)
|
||||
{
|
||||
@@ -481,4 +481,11 @@ validator parse_dictionary(std::string_view name, std::istream &is)
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
void extend_dictionary(validator &v, std::istream &is)
|
||||
{
|
||||
file f;
|
||||
dictionary_parser p(v, is, f);
|
||||
p.load_dictionary();
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
|
||||
506
src/parser.cpp
506
src/parser.cpp
@@ -32,7 +32,6 @@
|
||||
#include <cassert>
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <regex>
|
||||
#include <stack>
|
||||
|
||||
namespace cif
|
||||
@@ -40,13 +39,152 @@ namespace cif
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
class reserved_words_automaton
|
||||
{
|
||||
public:
|
||||
reserved_words_automaton() {}
|
||||
|
||||
enum move_result
|
||||
{
|
||||
undefined,
|
||||
no_keyword,
|
||||
data,
|
||||
global,
|
||||
loop,
|
||||
save,
|
||||
save_plus,
|
||||
stop
|
||||
};
|
||||
|
||||
constexpr bool finished() const
|
||||
{
|
||||
return m_state <= 0;
|
||||
}
|
||||
|
||||
constexpr bool matched() const
|
||||
{
|
||||
return m_state < 0;
|
||||
}
|
||||
|
||||
constexpr move_result move(int ch)
|
||||
{
|
||||
move_result result = undefined;
|
||||
|
||||
switch (m_state)
|
||||
{
|
||||
case 0:
|
||||
break;
|
||||
|
||||
case -1: // data_
|
||||
if (sac_parser::is_non_blank(ch))
|
||||
m_seen_trailing_chars = true;
|
||||
else if (m_seen_trailing_chars)
|
||||
result = data;
|
||||
else
|
||||
result = no_keyword;
|
||||
break;
|
||||
|
||||
case -2: // global_
|
||||
result = sac_parser::is_non_blank(ch) ? no_keyword : global;
|
||||
break;
|
||||
|
||||
case -3: // loop_
|
||||
result = sac_parser::is_non_blank(ch) ? no_keyword : loop;
|
||||
break;
|
||||
|
||||
case -4: // save_
|
||||
if (sac_parser::is_non_blank(ch))
|
||||
m_seen_trailing_chars = true;
|
||||
else if (m_seen_trailing_chars)
|
||||
result = save_plus;
|
||||
else
|
||||
result = save;
|
||||
break;
|
||||
|
||||
case -5: // stop_
|
||||
result = sac_parser::is_non_blank(ch) ? no_keyword : stop;
|
||||
break;
|
||||
|
||||
default:
|
||||
assert(m_state > 0 and m_state < NODE_COUNT);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
if (s_dag[m_state].ch == (ch & ~0x20))
|
||||
{
|
||||
m_state = s_dag[m_state].next_match;
|
||||
break;
|
||||
}
|
||||
|
||||
m_state = s_dag[m_state].next_nomatch;
|
||||
|
||||
if (m_state == 0)
|
||||
{
|
||||
result = no_keyword;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (result != undefined)
|
||||
m_state = 0;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
static constexpr struct node
|
||||
{
|
||||
int16_t ch;
|
||||
int8_t next_match;
|
||||
int8_t next_nomatch;
|
||||
} s_dag[] = {
|
||||
{ 0 },
|
||||
{ 'D', 5, 2 },
|
||||
{ 'G', 9, 3 },
|
||||
{ 'L', 15, 4 },
|
||||
{ 'S', 19, 0 },
|
||||
{ 'A', 6, 0 },
|
||||
{ 'T', 7, 0 },
|
||||
{ 'A', 8, 0 },
|
||||
{ '_', -1, 0 },
|
||||
{ 'L', 10, 0 },
|
||||
{ 'O', 11, 0 },
|
||||
{ 'B', 12, 0 },
|
||||
{ 'A', 13, 0 },
|
||||
{ 'L', 14, 0 },
|
||||
{ '_', -2, 0 },
|
||||
{ 'O', 16, 0},
|
||||
{ 'O', 17, 0 },
|
||||
{ 'P', 18, 0 },
|
||||
{ '_', -3, 0 },
|
||||
{ 'A', 21, 20 },
|
||||
{ 'T', 24, 0 },
|
||||
{ 'V', 22, 0 },
|
||||
{ 'E', 23, 0 },
|
||||
{ '_', -4, 0 },
|
||||
{ 'O', 25, 0 },
|
||||
{ 'P', 26, 0 },
|
||||
{ '_', -5, 0 },
|
||||
};
|
||||
|
||||
static constexpr int NODE_COUNT = sizeof(s_dag) / sizeof(node);
|
||||
|
||||
int m_state = 1;
|
||||
bool m_seen_trailing_chars = false;
|
||||
};
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
sac_parser::sac_parser(std::istream &is, bool init)
|
||||
: m_source(*is.rdbuf())
|
||||
{
|
||||
m_token_buffer.reserve(8192);
|
||||
|
||||
if (is.rdbuf() == nullptr)
|
||||
throw std::runtime_error("Attempt to read from uninitialised stream");
|
||||
|
||||
m_validate = true;
|
||||
m_line_nr = 1;
|
||||
m_bol = true;
|
||||
|
||||
@@ -54,45 +192,54 @@ sac_parser::sac_parser(std::istream &is, bool init)
|
||||
m_lookahead = get_next_token();
|
||||
}
|
||||
|
||||
bool sac_parser::is_unquoted_string(std::string_view text)
|
||||
{
|
||||
bool result = text.empty() or is_ordinary(text.front());
|
||||
if (result)
|
||||
{
|
||||
reserved_words_automaton automaton;
|
||||
|
||||
for (char ch : text)
|
||||
{
|
||||
if (not is_non_blank(ch))
|
||||
{
|
||||
result = false;
|
||||
break;
|
||||
}
|
||||
|
||||
automaton.move(ch);
|
||||
}
|
||||
|
||||
if (automaton.matched())
|
||||
result = false;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// get_next_char takes a char from the buffer, or if it is empty
|
||||
// from the istream. This function also does carriage/linefeed
|
||||
// translation.
|
||||
int sac_parser::get_next_char()
|
||||
{
|
||||
int result = std::char_traits<char>::eof();
|
||||
|
||||
if (m_buffer.empty())
|
||||
result = m_source.sbumpc();
|
||||
else
|
||||
{
|
||||
result = m_buffer.back();
|
||||
m_buffer.pop_back();
|
||||
}
|
||||
|
||||
// very simple CR/LF translation into LF
|
||||
if (result == '\r')
|
||||
{
|
||||
int lookahead = m_source.sbumpc();
|
||||
if (lookahead != '\n')
|
||||
m_buffer.push_back(lookahead);
|
||||
result = '\n';
|
||||
}
|
||||
int result = m_source.sbumpc();
|
||||
|
||||
if (result == std::char_traits<char>::eof())
|
||||
m_token_value.push_back(0);
|
||||
m_token_buffer.push_back(0);
|
||||
else
|
||||
m_token_value.push_back(std::char_traits<char>::to_char_type(result));
|
||||
|
||||
if (result == '\n')
|
||||
++m_line_nr;
|
||||
|
||||
if (VERBOSE >= 6)
|
||||
{
|
||||
std::cerr << "get_next_char => ";
|
||||
if (iscntrl(result) or not isprint(result))
|
||||
std::cerr << int(result) << std::endl;
|
||||
else
|
||||
std::cerr << char(result) << std::endl;
|
||||
if (result == '\r')
|
||||
{
|
||||
if (m_source.sgetc() == '\n')
|
||||
m_source.sbumpc();
|
||||
|
||||
++m_line_nr;
|
||||
result = '\n';
|
||||
}
|
||||
else if (result == '\n')
|
||||
++m_line_nr;
|
||||
|
||||
m_token_buffer.push_back(std::char_traits<char>::to_char_type(result));
|
||||
}
|
||||
|
||||
return result;
|
||||
@@ -100,44 +247,22 @@ int sac_parser::get_next_char()
|
||||
|
||||
void sac_parser::retract()
|
||||
{
|
||||
assert(not m_token_value.empty());
|
||||
assert(not m_token_buffer.empty());
|
||||
|
||||
char ch = m_token_value.back();
|
||||
char ch = m_token_buffer.back();
|
||||
if (ch == '\n')
|
||||
--m_line_nr;
|
||||
|
||||
m_buffer.push_back(ch == 0 ? std::char_traits<char>::eof() : std::char_traits<char>::to_int_type(ch));
|
||||
m_token_value.pop_back();
|
||||
}
|
||||
|
||||
int sac_parser::restart(int start)
|
||||
{
|
||||
int result = 0;
|
||||
|
||||
while (not m_token_value.empty())
|
||||
retract();
|
||||
|
||||
switch (start)
|
||||
if (ch != 0)
|
||||
{
|
||||
case State::Start:
|
||||
result = State::Float;
|
||||
break;
|
||||
// since we always putback at most a single character,
|
||||
// the test below should never fail.
|
||||
|
||||
case State::Float:
|
||||
result = State::Int;
|
||||
break;
|
||||
|
||||
case State::Int:
|
||||
result = State::Value;
|
||||
break;
|
||||
|
||||
default:
|
||||
error("Invalid state in SacParser");
|
||||
if (m_source.sputbackc(ch) == std::char_traits<char>::eof())
|
||||
throw std::runtime_error("putback failure");
|
||||
}
|
||||
|
||||
m_bol = false;
|
||||
|
||||
return result;
|
||||
m_token_buffer.pop_back();
|
||||
}
|
||||
|
||||
sac_parser::CIFToken sac_parser::get_next_token()
|
||||
@@ -146,11 +271,13 @@ sac_parser::CIFToken sac_parser::get_next_token()
|
||||
|
||||
CIFToken result = CIFToken::Unknown;
|
||||
int quoteChar = 0;
|
||||
int state = State::Start, start = State::Start;
|
||||
State state = State::Start;
|
||||
m_bol = false;
|
||||
|
||||
m_token_value.clear();
|
||||
mTokenType = CIFValue::Unknown;
|
||||
m_token_buffer.clear();
|
||||
m_token_value = {};
|
||||
|
||||
reserved_words_automaton dag;
|
||||
|
||||
while (result == CIFToken::Unknown)
|
||||
{
|
||||
@@ -174,23 +301,27 @@ sac_parser::CIFToken sac_parser::get_next_token()
|
||||
state = State::Tag;
|
||||
else if (ch == ';' and m_bol)
|
||||
state = State::TextField;
|
||||
else if (ch == '?')
|
||||
state = State::QuestionMark;
|
||||
else if (ch == '\'' or ch == '"')
|
||||
{
|
||||
quoteChar = ch;
|
||||
state = State::QuotedString;
|
||||
}
|
||||
else if (dag.move(ch) == reserved_words_automaton::undefined)
|
||||
state = State::Reserved;
|
||||
else
|
||||
state = start = restart(start);
|
||||
state = State::Value;
|
||||
break;
|
||||
|
||||
case State::White:
|
||||
if (ch == kEOF)
|
||||
result = CIFToken::Eof;
|
||||
else if (not isspace(ch))
|
||||
else if (not is_space(ch))
|
||||
{
|
||||
state = State::Start;
|
||||
retract();
|
||||
m_token_value.clear();
|
||||
m_token_buffer.clear();
|
||||
}
|
||||
else
|
||||
m_bol = (ch == '\n');
|
||||
@@ -201,38 +332,40 @@ sac_parser::CIFToken sac_parser::get_next_token()
|
||||
{
|
||||
state = State::Start;
|
||||
m_bol = true;
|
||||
m_token_value.clear();
|
||||
m_token_buffer.clear();
|
||||
}
|
||||
else if (ch == kEOF)
|
||||
result = CIFToken::Eof;
|
||||
else if (not is_any_print(ch))
|
||||
error("invalid character in comment");
|
||||
break;
|
||||
|
||||
case State::QuestionMark:
|
||||
if (not is_non_blank(ch))
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
}
|
||||
else
|
||||
state = State::Value;
|
||||
break;
|
||||
|
||||
case State::TextField:
|
||||
if (ch == '\n')
|
||||
state = State::TextField + 1;
|
||||
state = State::TextFieldNL;
|
||||
else if (ch == kEOF)
|
||||
error("unterminated textfield");
|
||||
// else if (ch == '\\')
|
||||
// state = State::Esc;
|
||||
else if (not is_any_print(ch) and cif::VERBOSE > 2)
|
||||
warning("invalid character in text field '" + std::string({static_cast<char>(ch)}) + "' (" + std::to_string((int)ch) + ")");
|
||||
break;
|
||||
|
||||
// case State::Esc:
|
||||
// if (ch == '\n')
|
||||
|
||||
// break;
|
||||
|
||||
case State::TextField + 1:
|
||||
case State::TextFieldNL:
|
||||
if (is_text_lead(ch) or ch == ' ' or ch == '\t')
|
||||
state = State::TextField;
|
||||
else if (ch == ';')
|
||||
{
|
||||
assert(m_token_value.length() >= 2);
|
||||
m_token_value = m_token_value.substr(1, m_token_value.length() - 3);
|
||||
mTokenType = CIFValue::TextField;
|
||||
assert(m_token_buffer.size() >= 2);
|
||||
m_token_value = std::string_view(m_token_buffer.data() + 1, m_token_buffer.size() - 3);
|
||||
result = CIFToken::Value;
|
||||
}
|
||||
else if (ch == kEOF)
|
||||
@@ -255,12 +388,10 @@ sac_parser::CIFToken sac_parser::get_next_token()
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
mTokenType = CIFValue::String;
|
||||
|
||||
if (m_token_value.length() < 2)
|
||||
if (m_token_buffer.size() < 2)
|
||||
error("Invalid quoted string token");
|
||||
|
||||
m_token_value = m_token_value.substr(1, m_token_value.length() - 2);
|
||||
m_token_value = std::string_view(m_token_buffer.data() + 1, m_token_buffer.size() - 2);
|
||||
}
|
||||
else if (ch == quoteChar)
|
||||
;
|
||||
@@ -277,149 +408,68 @@ sac_parser::CIFToken sac_parser::get_next_token()
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Tag;
|
||||
m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.size());
|
||||
}
|
||||
break;
|
||||
|
||||
case State::Float:
|
||||
if (ch == '+' or ch == '-')
|
||||
case State::Reserved:
|
||||
switch (dag.move(ch))
|
||||
{
|
||||
state = State::Float + 1;
|
||||
case reserved_words_automaton::undefined:
|
||||
break;
|
||||
|
||||
case reserved_words_automaton::no_keyword:
|
||||
if (not is_non_blank(ch))
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.size());
|
||||
}
|
||||
else
|
||||
state = State::Value;
|
||||
break;
|
||||
|
||||
case reserved_words_automaton::data:
|
||||
retract();
|
||||
m_token_value = std::string_view(m_token_buffer.data() + 5, m_token_buffer.size() - 5);
|
||||
result = CIFToken::DATA;
|
||||
break;
|
||||
|
||||
case reserved_words_automaton::global:
|
||||
retract();
|
||||
result = CIFToken::GLOBAL;
|
||||
break;
|
||||
|
||||
case reserved_words_automaton::loop:
|
||||
retract();
|
||||
result = CIFToken::LOOP;
|
||||
break;
|
||||
|
||||
case reserved_words_automaton::save:
|
||||
retract();
|
||||
result = CIFToken::SAVE_;
|
||||
break;
|
||||
|
||||
case reserved_words_automaton::save_plus:
|
||||
retract();
|
||||
m_token_value = std::string_view(m_token_buffer.data() + 5, m_token_buffer.size() - 5);
|
||||
result = CIFToken::SAVE_NAME;
|
||||
break;
|
||||
|
||||
case reserved_words_automaton::stop:
|
||||
retract();
|
||||
result = CIFToken::STOP;
|
||||
break;
|
||||
}
|
||||
else if (isdigit(ch))
|
||||
state = State::Float + 1;
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::Float + 1:
|
||||
// if (ch == '(') // numeric???
|
||||
// mState = State::NumericSuffix;
|
||||
// else
|
||||
if (ch == '.')
|
||||
state = State::Float + 2;
|
||||
else if (tolower(ch) == 'e')
|
||||
state = State::Float + 3;
|
||||
else if (is_white(ch) or ch == kEOF)
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
mTokenType = CIFValue::Int;
|
||||
}
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
// parsed '.'
|
||||
case State::Float + 2:
|
||||
if (tolower(ch) == 'e')
|
||||
state = State::Float + 3;
|
||||
else if (is_white(ch) or ch == kEOF)
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
mTokenType = CIFValue::Float;
|
||||
}
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
// parsed 'e'
|
||||
case State::Float + 3:
|
||||
if (ch == '-' or ch == '+')
|
||||
state = State::Float + 4;
|
||||
else if (isdigit(ch))
|
||||
state = State::Float + 5;
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::Float + 4:
|
||||
if (isdigit(ch))
|
||||
state = State::Float + 5;
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::Float + 5:
|
||||
if (is_white(ch) or ch == kEOF)
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
mTokenType = CIFValue::Float;
|
||||
}
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::Int:
|
||||
if (isdigit(ch) or ch == '+' or ch == '-')
|
||||
state = State::Int + 1;
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::Int + 1:
|
||||
if (is_white(ch) or ch == kEOF)
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
mTokenType = CIFValue::Int;
|
||||
}
|
||||
else
|
||||
state = start = restart(start);
|
||||
break;
|
||||
|
||||
case State::Value:
|
||||
if (ch == '_')
|
||||
{
|
||||
std::string s = to_lower_copy(m_token_value);
|
||||
|
||||
if (s == "data_")
|
||||
{
|
||||
state = State::DATA;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (s == "save_")
|
||||
{
|
||||
state = State::SAVE;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (result == CIFToken::Unknown and not is_non_blank(ch))
|
||||
{
|
||||
retract();
|
||||
result = CIFToken::Value;
|
||||
|
||||
if (m_token_value == ".")
|
||||
mTokenType = CIFValue::Inapplicable;
|
||||
else if (iequals(m_token_value, "global_"))
|
||||
result = CIFToken::GLOBAL;
|
||||
else if (iequals(m_token_value, "stop_"))
|
||||
result = CIFToken::STOP;
|
||||
else if (iequals(m_token_value, "loop_"))
|
||||
result = CIFToken::LOOP;
|
||||
else if (m_token_value == "?")
|
||||
{
|
||||
mTokenType = CIFValue::Unknown;
|
||||
m_token_value.clear();
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
case State::DATA:
|
||||
case State::SAVE:
|
||||
if (not is_non_blank(ch))
|
||||
{
|
||||
retract();
|
||||
|
||||
if (state == State::DATA)
|
||||
result = CIFToken::DATA;
|
||||
else
|
||||
result = CIFToken::SAVE;
|
||||
|
||||
m_token_value.erase(m_token_value.begin(), m_token_value.begin() + 5);
|
||||
result = CIFToken::Value;
|
||||
m_token_value = std::string_view(m_token_buffer.data(), m_token_buffer.size());
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -433,8 +483,6 @@ sac_parser::CIFToken sac_parser::get_next_token()
|
||||
if (VERBOSE >= 5)
|
||||
{
|
||||
std::cerr << get_token_name(result);
|
||||
if (mTokenType != CIFValue::Unknown)
|
||||
std::cerr << ' ' << get_value_name(mTokenType);
|
||||
if (result != CIFToken::Eof)
|
||||
std::cerr << " " << std::quoted(m_token_value);
|
||||
std::cerr << std::endl;
|
||||
@@ -506,7 +554,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock)
|
||||
break;
|
||||
|
||||
case string_quote:
|
||||
if (std::isspace(ch))
|
||||
if (is_space(ch))
|
||||
state = start;
|
||||
else
|
||||
state = string;
|
||||
@@ -518,7 +566,7 @@ bool sac_parser::parse_single_datablock(const std::string &datablock)
|
||||
break;
|
||||
|
||||
case data:
|
||||
if (isspace(ch) and dblk[si] == 0)
|
||||
if (is_space(ch) and dblk[si] == 0)
|
||||
found = true;
|
||||
else if (dblk[si++] != ch)
|
||||
state = start;
|
||||
@@ -596,7 +644,7 @@ sac_parser::datablock_index sac_parser::index_datablocks()
|
||||
break;
|
||||
|
||||
case string_quote:
|
||||
if (std::isspace(ch))
|
||||
if (is_space(ch))
|
||||
state = start;
|
||||
else
|
||||
state = string;
|
||||
@@ -620,7 +668,7 @@ sac_parser::datablock_index sac_parser::index_datablocks()
|
||||
case data_name:
|
||||
if (is_non_blank(ch))
|
||||
datablock.insert(datablock.end(), char(ch));
|
||||
else if (isspace(ch))
|
||||
else if (is_space(ch))
|
||||
{
|
||||
if (not datablock.empty())
|
||||
index[datablock] = m_source.pubseekoff(0, std::ios_base::cur, std::ios_base::in);
|
||||
@@ -696,7 +744,7 @@ void sac_parser::parse_datablock()
|
||||
static const std::string kUnitializedCategory("<invalid>");
|
||||
std::string cat = kUnitializedCategory; // intial value acts as a guard for empty category names
|
||||
|
||||
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag or m_lookahead == CIFToken::SAVE)
|
||||
while (m_lookahead == CIFToken::LOOP or m_lookahead == CIFToken::Tag or m_lookahead == CIFToken::SAVE_NAME)
|
||||
{
|
||||
switch (m_lookahead)
|
||||
{
|
||||
@@ -761,7 +809,7 @@ void sac_parser::parse_datablock()
|
||||
break;
|
||||
}
|
||||
|
||||
case CIFToken::SAVE:
|
||||
case CIFToken::SAVE_NAME:
|
||||
parse_save_frame();
|
||||
break;
|
||||
|
||||
@@ -779,7 +827,7 @@ void sac_parser::parse_save_frame()
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
void parser::produce_datablock(const std::string &name)
|
||||
void parser::produce_datablock(std::string_view name)
|
||||
{
|
||||
if (VERBOSE >= 4)
|
||||
std::cerr << "producing data_" << name << std::endl;
|
||||
@@ -788,7 +836,7 @@ void parser::produce_datablock(const std::string &name)
|
||||
m_datablock = &(*iter);
|
||||
}
|
||||
|
||||
void parser::produce_category(const std::string &name)
|
||||
void parser::produce_category(std::string_view name)
|
||||
{
|
||||
if (VERBOSE >= 4)
|
||||
std::cerr << "producing category " << name << std::endl;
|
||||
@@ -810,7 +858,7 @@ void parser::produce_row()
|
||||
// m_row.lineNr(m_line_nr);
|
||||
}
|
||||
|
||||
void parser::produce_item(const std::string &category, const std::string &item, const std::string &value)
|
||||
void parser::produce_item(std::string_view category, std::string_view item, std::string_view value)
|
||||
{
|
||||
if (VERBOSE >= 4)
|
||||
std::cerr << "producing _" << category << '.' << item << " -> " << value << std::endl;
|
||||
@@ -821,4 +869,4 @@ void parser::produce_item(const std::string &category, const std::string &item,
|
||||
m_row[item] = m_token_value;
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
} // namespace cif
|
||||
|
||||
25
src/text.cpp
25
src/text.cpp
@@ -236,28 +236,19 @@ std::string cif_id_for_number(int number)
|
||||
{
|
||||
std::string result;
|
||||
|
||||
if (number >= 26 * 26 * 26)
|
||||
result = 'L' + std::to_string(number);
|
||||
else
|
||||
do
|
||||
{
|
||||
if (number >= 26 * 26)
|
||||
{
|
||||
int v = number / (26 * 26);
|
||||
result += char('A' - 1 + v);
|
||||
number %= (26 * 26);
|
||||
}
|
||||
int r = number % 26;
|
||||
result += 'A' + r;
|
||||
|
||||
if (number >= 26)
|
||||
{
|
||||
int v = number / 26;
|
||||
result += char('A' - 1 + v);
|
||||
number %= 26;
|
||||
}
|
||||
|
||||
result += char('A' + number);
|
||||
number = (number - r) / 26 - 1;
|
||||
}
|
||||
while (number >= 0);
|
||||
|
||||
std::reverse(result.begin(), result.end());
|
||||
|
||||
assert(not result.empty());
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@@ -40,7 +40,6 @@
|
||||
#include <iostream>
|
||||
#include <map>
|
||||
#include <mutex>
|
||||
#include <regex>
|
||||
#include <sstream>
|
||||
#include <thread>
|
||||
|
||||
@@ -161,6 +160,8 @@ struct progress_bar_impl
|
||||
void print_progress();
|
||||
void print_done();
|
||||
|
||||
using time_point = std::chrono::time_point<std::chrono::system_clock>;
|
||||
|
||||
int64_t m_max_value;
|
||||
std::atomic<int64_t> m_consumed;
|
||||
int64_t m_last_consumed = 0;
|
||||
@@ -168,8 +169,8 @@ struct progress_bar_impl
|
||||
std::string m_action, m_message;
|
||||
std::mutex m_mutex;
|
||||
std::thread m_thread;
|
||||
std::chrono::time_point<std::chrono::system_clock>
|
||||
m_start = std::chrono::system_clock::now();
|
||||
time_point m_start = std::chrono::system_clock::now();
|
||||
time_point m_last = std::chrono::system_clock::now();
|
||||
bool m_stop = false;
|
||||
};
|
||||
|
||||
@@ -192,7 +193,9 @@ void progress_bar_impl::run()
|
||||
{
|
||||
while (not m_stop)
|
||||
{
|
||||
if (std::chrono::system_clock::now() - m_start < 2s)
|
||||
auto now = std::chrono::system_clock::now();
|
||||
|
||||
if (now - m_start < 2s or now - m_last < 100ms)
|
||||
{
|
||||
std::this_thread::sleep_for(10ms);
|
||||
continue;
|
||||
@@ -206,6 +209,7 @@ void progress_bar_impl::run()
|
||||
print_progress();
|
||||
|
||||
printedAny = true;
|
||||
m_last = std::chrono::system_clock::now();
|
||||
}
|
||||
}
|
||||
catch (...)
|
||||
|
||||
@@ -491,9 +491,9 @@ const validator &validator_factory::operator[](std::string_view dictionary_name)
|
||||
}
|
||||
}
|
||||
|
||||
void validator_factory::construct_validator(std::string_view name, std::istream &is)
|
||||
const validator &validator_factory::construct_validator(std::string_view name, std::istream &is)
|
||||
{
|
||||
m_validators.emplace_back(parse_dictionary(name, is));
|
||||
return m_validators.emplace_back(parse_dictionary(name, is));
|
||||
}
|
||||
|
||||
} // namespace cif
|
||||
|
||||
39
test/io-test.cpp
Normal file
39
test/io-test.cpp
Normal file
@@ -0,0 +1,39 @@
|
||||
#include <cif++.hpp>
|
||||
|
||||
class dummy_parser : public cif::sac_parser
|
||||
{
|
||||
public:
|
||||
dummy_parser(std::istream &is)
|
||||
: sac_parser(is)
|
||||
{
|
||||
}
|
||||
|
||||
void produce_datablock(std::string_view name) override
|
||||
{
|
||||
}
|
||||
|
||||
void produce_category(std::string_view name) override
|
||||
{
|
||||
}
|
||||
|
||||
void produce_row() override
|
||||
{
|
||||
}
|
||||
|
||||
void produce_item(std::string_view category, std::string_view item, std::string_view value) override
|
||||
{
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
cif::gzio::ifstream in("/srv/data/pdb/mmCIF/gl/8glv.cif.gz");
|
||||
|
||||
dummy_parser parser(in);
|
||||
parser.parse_file();
|
||||
|
||||
// cif::file f("/srv/data/pdb/mmCIF/gl/8glv.cif.gz");
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -75,6 +75,30 @@ bool init_unit_test()
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BOOST_AUTO_TEST_CASE(id_1)
|
||||
{
|
||||
BOOST_TEST(cif::cif_id_for_number(0) == "A");
|
||||
BOOST_TEST(cif::cif_id_for_number(25) == "Z");
|
||||
BOOST_TEST(cif::cif_id_for_number(26) == "AA");
|
||||
BOOST_TEST(cif::cif_id_for_number(26 + 1) == "AB");
|
||||
|
||||
BOOST_TEST(cif::cif_id_for_number(26 + 26 * 26 - 1) == "ZZ");
|
||||
BOOST_TEST(cif::cif_id_for_number(26 + 26 * 26) == "AAA");
|
||||
BOOST_TEST(cif::cif_id_for_number(26 + 26 * 26 + 1) == "AAB");
|
||||
|
||||
std::set<std::string> testset;
|
||||
|
||||
for (int i = 0; i < 100000; ++i)
|
||||
{
|
||||
std::string id = cif::cif_id_for_number(i);
|
||||
BOOST_TEST(testset.count(id) == 0);
|
||||
testset.insert(id);
|
||||
}
|
||||
BOOST_TEST(testset.size() == 100000);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
BOOST_AUTO_TEST_CASE(cc_1)
|
||||
{
|
||||
std::tuple<std::string_view, float, char> tests[] = {
|
||||
@@ -2357,8 +2381,6 @@ _test.text ??
|
||||
|
||||
BOOST_AUTO_TEST_CASE(output_test_1)
|
||||
{
|
||||
cif::VERBOSE = 5;
|
||||
|
||||
auto data1 = R"(
|
||||
data_Q
|
||||
loop_
|
||||
@@ -2863,7 +2885,7 @@ save__cat_1.name
|
||||
|
||||
std::istream is_dict(&buffer);
|
||||
|
||||
auto validator = cif::parse_dictionary("test_dict.dic", is_dict);
|
||||
auto &validator = cif::validator_factory::instance().construct_validator("test_dict.dic", is_dict);
|
||||
|
||||
cif::file f;
|
||||
f.set_validator(&validator);
|
||||
@@ -2901,8 +2923,6 @@ _cat_1.name
|
||||
ss << f;
|
||||
|
||||
cif::file f2(ss);
|
||||
|
||||
f2.set_validator(&validator);
|
||||
BOOST_ASSERT(f2.is_valid());
|
||||
|
||||
auto &audit_conform = f2.front()["audit_conform"];
|
||||
|
||||
Reference in New Issue
Block a user