Refactored dictionary loading

This commit is contained in:
Maarten L. Hekkelman
2025-02-17 09:40:36 +01:00
parent 1480706d8b
commit 71e525cd76
12 changed files with 164 additions and 163 deletions

View File

@@ -27,7 +27,7 @@ cmake_minimum_required(VERSION 3.23)
# set the project name
project(
libcifpp
VERSION 7.0.10
VERSION 8.0.0
LANGUAGES CXX)
list(PREPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")

View File

@@ -1,3 +1,7 @@
Version 8.0.0
- A dictionary is for a datablock and a file can have
datablocks with differing dictionaries.
Version 7.0.10
- Deal with missing _entity.type in reconstructing mmCIF files
- Replace code creating quaternions from rotation matrices

View File

@@ -98,6 +98,19 @@ class datablock : public std::list<category>
m_name = name;
}
/**
* @brief Attempt to load the dictionary specified in audit_conform category
*
*/
void load_dictionary();
/**
* @brief Load the dictionary named @a dict_name
*
* @param dict_name
*/
void load_dictionary(std::string_view dict_name);
/**
* @brief Set the validator object to @a v
*

View File

@@ -118,19 +118,6 @@ class file : public std::list<datablock>
/** @endcond */
/**
* @brief Set the validator object to @a v
*/
void set_validator(const validator *v);
/**
* @brief Get the validator object
*/
const validator *get_validator() const
{
return m_validator;
}
/**
* @brief Validate the content and return true if everything was valid.
*
@@ -165,32 +152,6 @@ class file : public std::list<datablock>
*/
bool validate_links() const;
/**
* @brief Attempt to load a dictionary (validator) based on
* the contents of the *audit_conform* category, if available.
*/
void load_dictionary();
/**
* @brief Attempt to load the named dictionary @a name and
* create a validator based on it.
*
* The @a name can be the name of a single file, or even the
* stem of that filename. So, e.g. mmcif_pdbx is valid.
*
* Since libcifpp can use extensions to validators, you
* can add them to the name. So if you would like to add
* the dssp extensions you would have to write:
*
* @code{cpp}
* file.load_dictionary("mmcif_pdbx;dssp-extension");
* @endcode
*
* @param name The name of the dictionary to load
*/
void load_dictionary(std::string_view name);
/**
* @brief Return true if a datablock with the name @a name is part of this file
*/
@@ -243,6 +204,18 @@ class file : public std::list<datablock>
/** Load the data from @a is */
void load(std::istream &is);
/** Load the data from the file specified by @a p using validator @a v */
void load(const std::filesystem::path &p, const validator &v);
/** Load the data from @a is using validator @a v */
void load(std::istream &is, const validator &v);
/** Load the data from the file specified by @a p using a validator constructed from dictionary @a dict */
void load(const std::filesystem::path &p, std::string_view dict);
/** Load the data from @a is using a validator constructed from dictionary @a dict */
void load(std::istream &is, std::string_view dict);
/** Save the data to the file specified by @a p */
void save(const std::filesystem::path &p) const;
@@ -257,9 +230,6 @@ class file : public std::list<datablock>
f.save(os);
return os;
}
private:
const validator *m_validator = nullptr;
};
} // namespace cif

View File

@@ -39,6 +39,8 @@
namespace cif
{
class validator;
// --------------------------------------------------------------------
/** Exception that is thrown when the mmCIF file contains a parsing error */
@@ -307,6 +309,14 @@ class sac_parser
class parser : public sac_parser
{
public:
/// \brief constructor, generates data into @a file from @a is using validator @a v
parser(std::istream &is, file &file, const validator *v)
: sac_parser(is)
, m_file(file)
, m_validator(v)
{
}
/// \brief constructor, generates data into @a file from @a is
parser(std::istream &is, file &file)
: sac_parser(is)
@@ -327,6 +337,7 @@ class parser : public sac_parser
file &m_file;
datablock *m_datablock = nullptr;
category *m_category = nullptr;
const validator *m_validator = nullptr;
row_handle m_row;
/** @endcond */

View File

@@ -38,6 +38,35 @@ datablock::datablock(const datablock &db)
cat.update_links(*this);
}
void datablock::load_dictionary()
{
if (auto *audit_conform = get("audit_conform"); audit_conform and not audit_conform->empty())
{
std::string name = audit_conform->front().get<std::string>("dict_name");
if (name == "mmcif_pdbx_v50")
name = "mmcif_pdbx.dic"; // we had a bug here in libcifpp...
if (not name.empty())
{
try
{
load_dictionary(name);
}
catch (const std::exception &ex)
{
if (VERBOSE)
std::cerr << "Failed to load dictionary " << std::quoted(name) << ": " << ex.what() << '\n';
}
}
}
}
void datablock::load_dictionary(std::string_view name)
{
set_validator(&validator_factory::instance()[name]);
}
void datablock::set_validator(const validator *v)
{
m_validator = v;

View File

@@ -30,40 +30,8 @@
namespace cif
{
// --------------------------------------------------------------------
// TODO: This is wrong. A validator should be assigned to datablocks,
// not to a file. Since audit_conform is a category specifying the
// content of a datablock. Not the entire file.
void file::set_validator(const validator *v)
{
m_validator = v;
for (bool first = true; auto &db : *this)
{
try
{
db.set_validator(v);
}
catch (const std::exception &e)
{
if (first)
throw;
// Accept failure on secondary datablocks
// now that many mmCIF files have invalid
// restraint data concatenated.
std::cerr << e.what() << '\n';
}
first = false;
}
}
bool file::is_valid() const
{
if (m_validator == nullptr)
std::runtime_error("No validator loaded explicitly, cannot continue");
bool result = true;
for (auto &d : *this)
result = d.is_valid() and result;
@@ -76,14 +44,6 @@ bool file::is_valid() const
bool file::is_valid()
{
if (m_validator == nullptr)
{
if (VERBOSE > 0)
std::cerr << "No dictionary loaded explicitly, loading default\n";
load_dictionary();
}
bool result = not empty();
for (auto &d : *this)
@@ -97,9 +57,6 @@ bool file::is_valid()
bool file::validate_links() const
{
if (m_validator == nullptr)
std::runtime_error("No validator loaded explicitly, cannot continue");
bool result = true;
for (auto &db : *this)
@@ -108,41 +65,41 @@ bool file::validate_links() const
return result;
}
void file::load_dictionary()
{
if (not empty())
{
auto *audit_conform = front().get("audit_conform");
if (audit_conform and not audit_conform->empty())
{
std::string name = audit_conform->front().get<std::string>("dict_name");
// void file::load_dictionary()
// {
// if (not empty())
// {
// auto *audit_conform = front().get("audit_conform");
// if (audit_conform and not audit_conform->empty())
// {
// std::string name = audit_conform->front().get<std::string>("dict_name");
if (name == "mmcif_pdbx_v50")
name = "mmcif_pdbx.dic"; // we had a bug here in libcifpp...
// if (name == "mmcif_pdbx_v50")
// name = "mmcif_pdbx.dic"; // we had a bug here in libcifpp...
if (not name.empty())
{
try
{
load_dictionary(name);
}
catch (const std::exception &ex)
{
if (VERBOSE)
std::cerr << "Failed to load dictionary " << std::quoted(name) << ": " << ex.what() << '\n';
}
}
}
}
// if (not name.empty())
// {
// try
// {
// load_dictionary(name);
// }
// catch (const std::exception &ex)
// {
// if (VERBOSE)
// std::cerr << "Failed to load dictionary " << std::quoted(name) << ": " << ex.what() << '\n';
// }
// }
// }
// }
// if (not m_validator)
// load_dictionary("mmcif_pdbx.dic"); // TODO: maybe incorrect? Perhaps improve?
}
// // if (not m_validator)
// // load_dictionary("mmcif_pdbx.dic"); // TODO: maybe incorrect? Perhaps improve?
// }
void file::load_dictionary(std::string_view name)
{
set_validator(&validator_factory::instance()[name]);
}
// void file::load_dictionary(std::string_view name)
// {
// set_validator(&validator_factory::instance()[name]);
// }
bool file::contains(std::string_view name) const
{
@@ -187,10 +144,7 @@ std::tuple<file::iterator, bool> file::emplace(std::string_view name)
}
if (is_new)
{
i = insert(end(), { name });
i->set_validator(m_validator);
}
assert(i != end());
return std::make_tuple(i, is_new);
@@ -212,18 +166,44 @@ void file::load(const std::filesystem::path &p)
}
}
void file::load(std::istream &is)
void file::load(const std::filesystem::path &p, std::string_view dict)
{
auto saved = m_validator;
set_validator(nullptr);
load(p, validator_factory::instance().operator[](dict));
}
void file::load(std::istream &is, std::string_view dict)
{
load(is, validator_factory::instance().operator[](dict));
}
void file::load(const std::filesystem::path &p, const validator &v)
{
gzio::ifstream in(p);
if (not in.is_open())
throw std::runtime_error("Could not open file '" + p.string() + '\'');
try
{
load(in, v);
}
catch (const std::exception &)
{
throw_with_nested(std::runtime_error("Error reading file '" + p.string() + '\''));
}
}
void file::load(std::istream &is, const validator &v)
{
parser p(is, *this);
p.parse_file();
for (auto &db : *this)
db.set_validator(&v);
}
if (saved != nullptr)
set_validator(saved);
else
load_dictionary();
void file::load(std::istream &is)
{
parser p(is, *this);
p.parse_file();
}
void file::save(const std::filesystem::path &p) const

View File

@@ -837,6 +837,9 @@ void parser::produce_datablock(std::string_view name)
const auto &[iter, ignore] = m_file.emplace(name);
m_datablock = &(*iter);
if (m_validator)
m_datablock->set_validator(m_validator);
}
void parser::produce_category(std::string_view name)

View File

@@ -5909,7 +5909,8 @@ void PDBFileParser::Parse(std::istream &is, cif::file &result)
{
try
{
mDatablock.set_validator(result.get_validator());
if (mDatablock.get_validator() == nullptr)
mDatablock.load_dictionary();
PreParseInput(is);
@@ -6373,10 +6374,11 @@ void read_pdb_file(std::istream &pdbFile, cif::file &cifFile)
{
PDBFileParser p;
cifFile.load_dictionary("mmcif_pdbx.dic");
p.Parse(pdbFile, cifFile);
if (not cifFile.empty() and cifFile.front().get_validator() == nullptr)
cifFile.front().load_dictionary("mmcif_pdbx.dic");
if (not cifFile.is_valid() and cif::VERBOSE >= 0)
std::cerr << "Resulting mmCIF file is not valid!\n";
}
@@ -6421,8 +6423,8 @@ file read(std::istream &is)
}
// Must be a PDB like file, right?
if (result.get_validator() == nullptr)
result.load_dictionary("mmcif_pdbx.dic");
if (not result.empty() and result.front().get_validator() == nullptr)
result.front().load_dictionary("mmcif_pdbx.dic");
return result;
}

View File

@@ -1371,7 +1371,7 @@ bool reconstruct_pdbx(file &file, std::string_view dictionary)
db["chem_comp"].reorder_by_index();
file.load_dictionary(dictionary);
db.load_dictionary(dictionary);
if (db.get("atom_site_anisotrop"))
checkAtomAnisotropRecords(db);

View File

@@ -53,8 +53,8 @@ TEST_CASE("create_nonpoly_1")
cif::VERBOSE = 1;
cif::file file;
file.load_dictionary("mmcif_pdbx.dic");
file.emplace("TEST"); // create a datablock
auto &&[dbi, ignore] = file.emplace("TEST"); // create a datablock
dbi->load_dictionary("mmcif_pdbx.dic");
cif::mm::structure structure(file);
@@ -82,7 +82,7 @@ _atom_site.pdbx_formal_charge
# that's enough to test with
)"_cf;
atoms.load_dictionary("mmcif_pdbx.dic");
atoms.front().load_dictionary("mmcif_pdbx.dic");
auto &hem_data = atoms["HEM"];
auto &atom_site = hem_data["atom_site"];
@@ -159,7 +159,7 @@ _struct_asym.details ?
_atom_type.symbol C
)"_cf;
expected.load_dictionary("mmcif_pdbx.dic");
expected.front().load_dictionary("mmcif_pdbx.dic");
if (not(expected.front() == structure.get_datablock()))
{
@@ -177,8 +177,8 @@ TEST_CASE("create_nonpoly_2")
cif::VERBOSE = 1;
cif::file file;
file.load_dictionary("mmcif_pdbx.dic");
file.emplace("TEST"); // create a datablock
auto &&[dbi, ignore] = file.emplace("TEST"); // create a datablock
dbi->load_dictionary("mmcif_pdbx.dic");
cif::mm::structure structure(file);
@@ -270,7 +270,7 @@ _struct_asym.details ?
_atom_type.symbol C
)"_cf;
expected.load_dictionary("mmcif_pdbx.dic");
expected.front().load_dictionary("mmcif_pdbx.dic");
REQUIRE(expected.front() == structure.get_datablock());
@@ -354,7 +354,7 @@ _struct_asym.details ?
#
)"_cf;
data.load_dictionary("mmcif_pdbx.dic");
data.front().load_dictionary("mmcif_pdbx.dic");
cif::mm::structure s(data);

View File

@@ -774,7 +774,6 @@ save__cat_2.desc
auto validator = cif::parse_dictionary("test", is_dict);
cif::file f;
f.set_validator(&validator);
// --------------------------------------------------------------------
@@ -805,7 +804,7 @@ _cat_2.desc
} data_buffer(const_cast<char *>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer);
f.load(is_data);
f.load(is_data, validator);
SECTION("one")
{
@@ -927,7 +926,6 @@ save__cat_1.c
auto validator = cif::parse_dictionary("test", is_dict);
cif::file f;
f.set_validator(&validator);
// --------------------------------------------------------------------
@@ -950,7 +948,7 @@ mies Mies
} data_buffer(const_cast<char *>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer);
f.load(is_data);
f.load(is_data, validator);
auto &cat1 = f.front()["cat_1"];
@@ -1089,7 +1087,6 @@ save__cat_2.desc
auto validator = cif::parse_dictionary("test", is_dict);
cif::file f;
f.set_validator(&validator);
// --------------------------------------------------------------------
@@ -1123,7 +1120,7 @@ _cat_2.desc
} data_buffer(const_cast<char *>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer);
f.load(is_data);
f.load(is_data, validator);
auto &cat1 = f.front()["cat_1"];
auto &cat2 = f.front()["cat_2"];
@@ -1292,7 +1289,6 @@ save__cat_2.parent_id3
auto validator = cif::parse_dictionary("test", is_dict);
cif::file f;
f.set_validator(&validator);
// --------------------------------------------------------------------
@@ -1336,7 +1332,7 @@ _cat_2.parent_id3
} data_buffer(const_cast<char *>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer);
f.load(is_data);
f.load(is_data, validator);
auto &cat1 = f.front()["cat_1"];
auto &cat2 = f.front()["cat_2"];
@@ -1513,7 +1509,6 @@ cat_2 3 cat_2:cat_1:3
auto validator = cif::parse_dictionary("test", is_dict);
cif::file f;
f.set_validator(&validator);
// --------------------------------------------------------------------
@@ -1550,7 +1545,7 @@ _cat_2.parent_id3
} data_buffer(const_cast<char *>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer);
f.load(is_data);
f.load(is_data, validator);
auto &cat1 = f.front()["cat_1"];
auto &cat2 = f.front()["cat_2"];
@@ -1753,7 +1748,6 @@ cat_2 1 cat_2:cat_1:1
auto validator = cif::parse_dictionary("test", is_dict);
cif::file f;
f.set_validator(&validator);
// --------------------------------------------------------------------
@@ -1790,7 +1784,7 @@ _cat_2.parent_id_2
} data_buffer(const_cast<char *>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer);
f.load(is_data);
f.load(is_data, validator);
// auto &cat1 = f.front()["cat_1"];
auto &cat2 = f.front()["cat_2"];
@@ -2149,7 +2143,6 @@ cat_2 1 '_cat_2.num' '_cat_3.num' cat_3
auto validator = cif::parse_dictionary("test", is_dict);
cif::file f;
f.set_validator(&validator);
// --------------------------------------------------------------------
@@ -2191,7 +2184,7 @@ _cat_3.num
} data_buffer(const_cast<char *>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer);
f.load(is_data);
f.load(is_data, validator);
auto &cat1 = f.front()["cat_1"];
auto &cat2 = f.front()["cat_2"];
@@ -2434,7 +2427,6 @@ cat_2 1 '_cat_2.num' '_cat_3.num' cat_3
auto validator = cif::parse_dictionary("test", is_dict);
cif::file f;
f.set_validator(&validator);
// --------------------------------------------------------------------
@@ -2476,7 +2468,7 @@ _cat_3.num
} data_buffer(const_cast<char *>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer);
f.load(is_data);
f.load(is_data, validator);
auto &cat1 = f.front()["cat_1"];
auto &cat2 = f.front()["cat_2"];
@@ -3031,7 +3023,6 @@ save__cat_1.name
auto validator = cif::parse_dictionary("test", is_dict);
cif::file f;
f.set_validator(&validator);
// --------------------------------------------------------------------
@@ -3054,7 +3045,7 @@ _cat_1.name
} data_buffer(const_cast<char *>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer);
f.load(is_data);
f.load(is_data, validator);
REQUIRE(f.is_valid());
@@ -3226,7 +3217,6 @@ save__cat_1.name
auto &validator = cif::validator_factory::instance().construct_validator("test_dict.dic", is_dict);
cif::file f;
f.set_validator(&validator);
// --------------------------------------------------------------------
@@ -3253,7 +3243,7 @@ _cat_1.name
} data_buffer(const_cast<char *>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer);
f.load(is_data);
f.load(is_data, validator);
REQUIRE(f.is_valid());
@@ -3333,7 +3323,6 @@ save__cat_1.id_2
auto validator = cif::parse_dictionary("test", is_dict);
cif::file f;
f.set_validator(&validator);
// --------------------------------------------------------------------
@@ -3358,7 +3347,7 @@ _cat_1.id_2
} data_buffer(const_cast<char *>(data), sizeof(data) - 1);
std::istream is_data(&data_buffer);
f.load(is_data);
f.load(is_data, validator);
auto &cat1 = f.front()["cat_1"];