From 71e525cd76790c0d1479fe668d07f7779c21c9f5 Mon Sep 17 00:00:00 2001 From: "Maarten L. Hekkelman" Date: Mon, 17 Feb 2025 09:40:36 +0100 Subject: [PATCH] Refactored dictionary loading --- CMakeLists.txt | 2 +- changelog | 4 + include/cif++/datablock.hpp | 13 ++++ include/cif++/file.hpp | 54 +++---------- include/cif++/parser.hpp | 11 +++ src/datablock.cpp | 29 +++++++ src/file.cpp | 148 ++++++++++++++++-------------------- src/parser.cpp | 3 + src/pdb/pdb2cif.cpp | 12 +-- src/pdb/reconstruct.cpp | 2 +- test/model-test.cpp | 16 ++-- test/unit-v2-test.cpp | 33 +++----- 12 files changed, 164 insertions(+), 163 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index dba933e..3a0046d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,7 +27,7 @@ cmake_minimum_required(VERSION 3.23) # set the project name project( libcifpp - VERSION 7.0.10 + VERSION 8.0.0 LANGUAGES CXX) list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") diff --git a/changelog b/changelog index 2865804..c3d0895 100644 --- a/changelog +++ b/changelog @@ -1,3 +1,7 @@ +Version 8.0.0 +- A dictionary is for a datablock and a file can have + datablocks with differing dictionaries. + Version 7.0.10 - Deal with missing _entity.type in reconstructing mmCIF files - Replace code creating quaternions from rotation matrices diff --git a/include/cif++/datablock.hpp b/include/cif++/datablock.hpp index 5d2b5f8..efa0b3f 100644 --- a/include/cif++/datablock.hpp +++ b/include/cif++/datablock.hpp @@ -98,6 +98,19 @@ class datablock : public std::list m_name = name; } + /** + * @brief Attempt to load the dictionary specified in audit_conform category + * + */ + void load_dictionary(); + + /** + * @brief Load the dictionary named @a dict_name + * + * @param dict_name + */ + void load_dictionary(std::string_view dict_name); + /** * @brief Set the validator object to @a v * diff --git a/include/cif++/file.hpp b/include/cif++/file.hpp index caa85b0..b8d8dbd 100644 --- a/include/cif++/file.hpp +++ b/include/cif++/file.hpp @@ -118,19 +118,6 @@ class file : public std::list /** @endcond */ - /** - * @brief Set the validator object to @a v - */ - void set_validator(const validator *v); - - /** - * @brief Get the validator object - */ - const validator *get_validator() const - { - return m_validator; - } - /** * @brief Validate the content and return true if everything was valid. * @@ -165,32 +152,6 @@ class file : public std::list */ bool validate_links() const; - /** - * @brief Attempt to load a dictionary (validator) based on - * the contents of the *audit_conform* category, if available. - */ - void load_dictionary(); - - - /** - * @brief Attempt to load the named dictionary @a name and - * create a validator based on it. - * - * The @a name can be the name of a single file, or even the - * stem of that filename. So, e.g. mmcif_pdbx is valid. - * - * Since libcifpp can use extensions to validators, you - * can add them to the name. So if you would like to add - * the dssp extensions you would have to write: - * - * @code{cpp} - * file.load_dictionary("mmcif_pdbx;dssp-extension"); - * @endcode - * - * @param name The name of the dictionary to load - */ - void load_dictionary(std::string_view name); - /** * @brief Return true if a datablock with the name @a name is part of this file */ @@ -243,6 +204,18 @@ class file : public std::list /** Load the data from @a is */ void load(std::istream &is); + /** Load the data from the file specified by @a p using validator @a v */ + void load(const std::filesystem::path &p, const validator &v); + + /** Load the data from @a is using validator @a v */ + void load(std::istream &is, const validator &v); + + /** Load the data from the file specified by @a p using a validator constructed from dictionary @a dict */ + void load(const std::filesystem::path &p, std::string_view dict); + + /** Load the data from @a is using a validator constructed from dictionary @a dict */ + void load(std::istream &is, std::string_view dict); + /** Save the data to the file specified by @a p */ void save(const std::filesystem::path &p) const; @@ -257,9 +230,6 @@ class file : public std::list f.save(os); return os; } - - private: - const validator *m_validator = nullptr; }; } // namespace cif \ No newline at end of file diff --git a/include/cif++/parser.hpp b/include/cif++/parser.hpp index 5b9d352..08bc974 100644 --- a/include/cif++/parser.hpp +++ b/include/cif++/parser.hpp @@ -39,6 +39,8 @@ namespace cif { +class validator; + // -------------------------------------------------------------------- /** Exception that is thrown when the mmCIF file contains a parsing error */ @@ -307,6 +309,14 @@ class sac_parser class parser : public sac_parser { public: + /// \brief constructor, generates data into @a file from @a is using validator @a v + parser(std::istream &is, file &file, const validator *v) + : sac_parser(is) + , m_file(file) + , m_validator(v) + { + } + /// \brief constructor, generates data into @a file from @a is parser(std::istream &is, file &file) : sac_parser(is) @@ -327,6 +337,7 @@ class parser : public sac_parser file &m_file; datablock *m_datablock = nullptr; category *m_category = nullptr; + const validator *m_validator = nullptr; row_handle m_row; /** @endcond */ diff --git a/src/datablock.cpp b/src/datablock.cpp index b3fcbcc..0243b97 100644 --- a/src/datablock.cpp +++ b/src/datablock.cpp @@ -38,6 +38,35 @@ datablock::datablock(const datablock &db) cat.update_links(*this); } +void datablock::load_dictionary() +{ + if (auto *audit_conform = get("audit_conform"); audit_conform and not audit_conform->empty()) + { + std::string name = audit_conform->front().get("dict_name"); + + if (name == "mmcif_pdbx_v50") + name = "mmcif_pdbx.dic"; // we had a bug here in libcifpp... + + if (not name.empty()) + { + try + { + load_dictionary(name); + } + catch (const std::exception &ex) + { + if (VERBOSE) + std::cerr << "Failed to load dictionary " << std::quoted(name) << ": " << ex.what() << '\n'; + } + } + } +} + +void datablock::load_dictionary(std::string_view name) +{ + set_validator(&validator_factory::instance()[name]); +} + void datablock::set_validator(const validator *v) { m_validator = v; diff --git a/src/file.cpp b/src/file.cpp index 5db3804..f7bf35d 100644 --- a/src/file.cpp +++ b/src/file.cpp @@ -30,40 +30,8 @@ namespace cif { -// -------------------------------------------------------------------- -// TODO: This is wrong. A validator should be assigned to datablocks, -// not to a file. Since audit_conform is a category specifying the -// content of a datablock. Not the entire file. - -void file::set_validator(const validator *v) -{ - m_validator = v; - for (bool first = true; auto &db : *this) - { - try - { - db.set_validator(v); - } - catch (const std::exception &e) - { - if (first) - throw; - - // Accept failure on secondary datablocks - // now that many mmCIF files have invalid - // restraint data concatenated. - std::cerr << e.what() << '\n'; - } - - first = false; - } -} - bool file::is_valid() const { - if (m_validator == nullptr) - std::runtime_error("No validator loaded explicitly, cannot continue"); - bool result = true; for (auto &d : *this) result = d.is_valid() and result; @@ -76,14 +44,6 @@ bool file::is_valid() const bool file::is_valid() { - if (m_validator == nullptr) - { - if (VERBOSE > 0) - std::cerr << "No dictionary loaded explicitly, loading default\n"; - - load_dictionary(); - } - bool result = not empty(); for (auto &d : *this) @@ -97,9 +57,6 @@ bool file::is_valid() bool file::validate_links() const { - if (m_validator == nullptr) - std::runtime_error("No validator loaded explicitly, cannot continue"); - bool result = true; for (auto &db : *this) @@ -108,41 +65,41 @@ bool file::validate_links() const return result; } -void file::load_dictionary() -{ - if (not empty()) - { - auto *audit_conform = front().get("audit_conform"); - if (audit_conform and not audit_conform->empty()) - { - std::string name = audit_conform->front().get("dict_name"); +// void file::load_dictionary() +// { +// if (not empty()) +// { +// auto *audit_conform = front().get("audit_conform"); +// if (audit_conform and not audit_conform->empty()) +// { +// std::string name = audit_conform->front().get("dict_name"); - if (name == "mmcif_pdbx_v50") - name = "mmcif_pdbx.dic"; // we had a bug here in libcifpp... +// if (name == "mmcif_pdbx_v50") +// name = "mmcif_pdbx.dic"; // we had a bug here in libcifpp... - if (not name.empty()) - { - try - { - load_dictionary(name); - } - catch (const std::exception &ex) - { - if (VERBOSE) - std::cerr << "Failed to load dictionary " << std::quoted(name) << ": " << ex.what() << '\n'; - } - } - } - } +// if (not name.empty()) +// { +// try +// { +// load_dictionary(name); +// } +// catch (const std::exception &ex) +// { +// if (VERBOSE) +// std::cerr << "Failed to load dictionary " << std::quoted(name) << ": " << ex.what() << '\n'; +// } +// } +// } +// } - // if (not m_validator) - // load_dictionary("mmcif_pdbx.dic"); // TODO: maybe incorrect? Perhaps improve? -} +// // if (not m_validator) +// // load_dictionary("mmcif_pdbx.dic"); // TODO: maybe incorrect? Perhaps improve? +// } -void file::load_dictionary(std::string_view name) -{ - set_validator(&validator_factory::instance()[name]); -} +// void file::load_dictionary(std::string_view name) +// { +// set_validator(&validator_factory::instance()[name]); +// } bool file::contains(std::string_view name) const { @@ -187,10 +144,7 @@ std::tuple file::emplace(std::string_view name) } if (is_new) - { i = insert(end(), { name }); - i->set_validator(m_validator); - } assert(i != end()); return std::make_tuple(i, is_new); @@ -212,18 +166,44 @@ void file::load(const std::filesystem::path &p) } } -void file::load(std::istream &is) +void file::load(const std::filesystem::path &p, std::string_view dict) { - auto saved = m_validator; - set_validator(nullptr); + load(p, validator_factory::instance().operator[](dict)); +} +void file::load(std::istream &is, std::string_view dict) +{ + load(is, validator_factory::instance().operator[](dict)); +} + +void file::load(const std::filesystem::path &p, const validator &v) +{ + gzio::ifstream in(p); + if (not in.is_open()) + throw std::runtime_error("Could not open file '" + p.string() + '\''); + + try + { + load(in, v); + } + catch (const std::exception &) + { + throw_with_nested(std::runtime_error("Error reading file '" + p.string() + '\'')); + } +} + +void file::load(std::istream &is, const validator &v) +{ parser p(is, *this); p.parse_file(); + for (auto &db : *this) + db.set_validator(&v); +} - if (saved != nullptr) - set_validator(saved); - else - load_dictionary(); +void file::load(std::istream &is) +{ + parser p(is, *this); + p.parse_file(); } void file::save(const std::filesystem::path &p) const diff --git a/src/parser.cpp b/src/parser.cpp index 1a02405..75f986c 100644 --- a/src/parser.cpp +++ b/src/parser.cpp @@ -837,6 +837,9 @@ void parser::produce_datablock(std::string_view name) const auto &[iter, ignore] = m_file.emplace(name); m_datablock = &(*iter); + + if (m_validator) + m_datablock->set_validator(m_validator); } void parser::produce_category(std::string_view name) diff --git a/src/pdb/pdb2cif.cpp b/src/pdb/pdb2cif.cpp index 970e855..f0da837 100644 --- a/src/pdb/pdb2cif.cpp +++ b/src/pdb/pdb2cif.cpp @@ -5909,7 +5909,8 @@ void PDBFileParser::Parse(std::istream &is, cif::file &result) { try { - mDatablock.set_validator(result.get_validator()); + if (mDatablock.get_validator() == nullptr) + mDatablock.load_dictionary(); PreParseInput(is); @@ -6373,10 +6374,11 @@ void read_pdb_file(std::istream &pdbFile, cif::file &cifFile) { PDBFileParser p; - cifFile.load_dictionary("mmcif_pdbx.dic"); - p.Parse(pdbFile, cifFile); + if (not cifFile.empty() and cifFile.front().get_validator() == nullptr) + cifFile.front().load_dictionary("mmcif_pdbx.dic"); + if (not cifFile.is_valid() and cif::VERBOSE >= 0) std::cerr << "Resulting mmCIF file is not valid!\n"; } @@ -6421,8 +6423,8 @@ file read(std::istream &is) } // Must be a PDB like file, right? - if (result.get_validator() == nullptr) - result.load_dictionary("mmcif_pdbx.dic"); + if (not result.empty() and result.front().get_validator() == nullptr) + result.front().load_dictionary("mmcif_pdbx.dic"); return result; } diff --git a/src/pdb/reconstruct.cpp b/src/pdb/reconstruct.cpp index 54ceff8..aa58bb9 100644 --- a/src/pdb/reconstruct.cpp +++ b/src/pdb/reconstruct.cpp @@ -1371,7 +1371,7 @@ bool reconstruct_pdbx(file &file, std::string_view dictionary) db["chem_comp"].reorder_by_index(); - file.load_dictionary(dictionary); + db.load_dictionary(dictionary); if (db.get("atom_site_anisotrop")) checkAtomAnisotropRecords(db); diff --git a/test/model-test.cpp b/test/model-test.cpp index f45b1c4..267a8cf 100644 --- a/test/model-test.cpp +++ b/test/model-test.cpp @@ -53,8 +53,8 @@ TEST_CASE("create_nonpoly_1") cif::VERBOSE = 1; cif::file file; - file.load_dictionary("mmcif_pdbx.dic"); - file.emplace("TEST"); // create a datablock + auto &&[dbi, ignore] = file.emplace("TEST"); // create a datablock + dbi->load_dictionary("mmcif_pdbx.dic"); cif::mm::structure structure(file); @@ -82,7 +82,7 @@ _atom_site.pdbx_formal_charge # that's enough to test with )"_cf; - atoms.load_dictionary("mmcif_pdbx.dic"); + atoms.front().load_dictionary("mmcif_pdbx.dic"); auto &hem_data = atoms["HEM"]; auto &atom_site = hem_data["atom_site"]; @@ -159,7 +159,7 @@ _struct_asym.details ? _atom_type.symbol C )"_cf; - expected.load_dictionary("mmcif_pdbx.dic"); + expected.front().load_dictionary("mmcif_pdbx.dic"); if (not(expected.front() == structure.get_datablock())) { @@ -177,8 +177,8 @@ TEST_CASE("create_nonpoly_2") cif::VERBOSE = 1; cif::file file; - file.load_dictionary("mmcif_pdbx.dic"); - file.emplace("TEST"); // create a datablock + auto &&[dbi, ignore] = file.emplace("TEST"); // create a datablock + dbi->load_dictionary("mmcif_pdbx.dic"); cif::mm::structure structure(file); @@ -270,7 +270,7 @@ _struct_asym.details ? _atom_type.symbol C )"_cf; - expected.load_dictionary("mmcif_pdbx.dic"); + expected.front().load_dictionary("mmcif_pdbx.dic"); REQUIRE(expected.front() == structure.get_datablock()); @@ -354,7 +354,7 @@ _struct_asym.details ? # )"_cf; - data.load_dictionary("mmcif_pdbx.dic"); + data.front().load_dictionary("mmcif_pdbx.dic"); cif::mm::structure s(data); diff --git a/test/unit-v2-test.cpp b/test/unit-v2-test.cpp index b085202..9f2d296 100644 --- a/test/unit-v2-test.cpp +++ b/test/unit-v2-test.cpp @@ -774,7 +774,6 @@ save__cat_2.desc auto validator = cif::parse_dictionary("test", is_dict); cif::file f; - f.set_validator(&validator); // -------------------------------------------------------------------- @@ -805,7 +804,7 @@ _cat_2.desc } data_buffer(const_cast(data), sizeof(data) - 1); std::istream is_data(&data_buffer); - f.load(is_data); + f.load(is_data, validator); SECTION("one") { @@ -927,7 +926,6 @@ save__cat_1.c auto validator = cif::parse_dictionary("test", is_dict); cif::file f; - f.set_validator(&validator); // -------------------------------------------------------------------- @@ -950,7 +948,7 @@ mies Mies } data_buffer(const_cast(data), sizeof(data) - 1); std::istream is_data(&data_buffer); - f.load(is_data); + f.load(is_data, validator); auto &cat1 = f.front()["cat_1"]; @@ -1089,7 +1087,6 @@ save__cat_2.desc auto validator = cif::parse_dictionary("test", is_dict); cif::file f; - f.set_validator(&validator); // -------------------------------------------------------------------- @@ -1123,7 +1120,7 @@ _cat_2.desc } data_buffer(const_cast(data), sizeof(data) - 1); std::istream is_data(&data_buffer); - f.load(is_data); + f.load(is_data, validator); auto &cat1 = f.front()["cat_1"]; auto &cat2 = f.front()["cat_2"]; @@ -1292,7 +1289,6 @@ save__cat_2.parent_id3 auto validator = cif::parse_dictionary("test", is_dict); cif::file f; - f.set_validator(&validator); // -------------------------------------------------------------------- @@ -1336,7 +1332,7 @@ _cat_2.parent_id3 } data_buffer(const_cast(data), sizeof(data) - 1); std::istream is_data(&data_buffer); - f.load(is_data); + f.load(is_data, validator); auto &cat1 = f.front()["cat_1"]; auto &cat2 = f.front()["cat_2"]; @@ -1513,7 +1509,6 @@ cat_2 3 cat_2:cat_1:3 auto validator = cif::parse_dictionary("test", is_dict); cif::file f; - f.set_validator(&validator); // -------------------------------------------------------------------- @@ -1550,7 +1545,7 @@ _cat_2.parent_id3 } data_buffer(const_cast(data), sizeof(data) - 1); std::istream is_data(&data_buffer); - f.load(is_data); + f.load(is_data, validator); auto &cat1 = f.front()["cat_1"]; auto &cat2 = f.front()["cat_2"]; @@ -1753,7 +1748,6 @@ cat_2 1 cat_2:cat_1:1 auto validator = cif::parse_dictionary("test", is_dict); cif::file f; - f.set_validator(&validator); // -------------------------------------------------------------------- @@ -1790,7 +1784,7 @@ _cat_2.parent_id_2 } data_buffer(const_cast(data), sizeof(data) - 1); std::istream is_data(&data_buffer); - f.load(is_data); + f.load(is_data, validator); // auto &cat1 = f.front()["cat_1"]; auto &cat2 = f.front()["cat_2"]; @@ -2149,7 +2143,6 @@ cat_2 1 '_cat_2.num' '_cat_3.num' cat_3 auto validator = cif::parse_dictionary("test", is_dict); cif::file f; - f.set_validator(&validator); // -------------------------------------------------------------------- @@ -2191,7 +2184,7 @@ _cat_3.num } data_buffer(const_cast(data), sizeof(data) - 1); std::istream is_data(&data_buffer); - f.load(is_data); + f.load(is_data, validator); auto &cat1 = f.front()["cat_1"]; auto &cat2 = f.front()["cat_2"]; @@ -2434,7 +2427,6 @@ cat_2 1 '_cat_2.num' '_cat_3.num' cat_3 auto validator = cif::parse_dictionary("test", is_dict); cif::file f; - f.set_validator(&validator); // -------------------------------------------------------------------- @@ -2476,7 +2468,7 @@ _cat_3.num } data_buffer(const_cast(data), sizeof(data) - 1); std::istream is_data(&data_buffer); - f.load(is_data); + f.load(is_data, validator); auto &cat1 = f.front()["cat_1"]; auto &cat2 = f.front()["cat_2"]; @@ -3031,7 +3023,6 @@ save__cat_1.name auto validator = cif::parse_dictionary("test", is_dict); cif::file f; - f.set_validator(&validator); // -------------------------------------------------------------------- @@ -3054,7 +3045,7 @@ _cat_1.name } data_buffer(const_cast(data), sizeof(data) - 1); std::istream is_data(&data_buffer); - f.load(is_data); + f.load(is_data, validator); REQUIRE(f.is_valid()); @@ -3226,7 +3217,6 @@ save__cat_1.name auto &validator = cif::validator_factory::instance().construct_validator("test_dict.dic", is_dict); cif::file f; - f.set_validator(&validator); // -------------------------------------------------------------------- @@ -3253,7 +3243,7 @@ _cat_1.name } data_buffer(const_cast(data), sizeof(data) - 1); std::istream is_data(&data_buffer); - f.load(is_data); + f.load(is_data, validator); REQUIRE(f.is_valid()); @@ -3333,7 +3323,6 @@ save__cat_1.id_2 auto validator = cif::parse_dictionary("test", is_dict); cif::file f; - f.set_validator(&validator); // -------------------------------------------------------------------- @@ -3358,7 +3347,7 @@ _cat_1.id_2 } data_buffer(const_cast(data), sizeof(data) - 1); std::istream is_data(&data_buffer); - f.load(is_data); + f.load(is_data, validator); auto &cat1 = f.front()["cat_1"];