mirror of
https://github.com/PDB-REDO/libcifpp.git
synced 2026-06-04 13:54:25 +08:00
moving towards using CCD, parsing single datablock
This commit is contained in:
@@ -218,7 +218,7 @@ $(1)-test: test/$(1)-test
|
||||
|
||||
endef
|
||||
|
||||
TESTS = unit # pdb2cif
|
||||
TESTS = unit pdb2cif
|
||||
|
||||
$(foreach part,$(TESTS),$(eval $(call TEST_template,$(part))))
|
||||
|
||||
|
||||
3
configure
vendored
3
configure
vendored
@@ -1474,7 +1474,8 @@ Some influential environment variables:
|
||||
DEBUG Build a debug version of the application
|
||||
LT_SYS_LIBRARY_PATH
|
||||
User-defined run-time library search path.
|
||||
CCP4 The location where CCP4 is installed
|
||||
CCP4 The location where CCP4 is installed. This is only required if
|
||||
you want to rebuild the symmetry operations table.
|
||||
MRC Specify a location for the mrc executable
|
||||
DATA_LIB_DIR
|
||||
The location where to store packaged dictionary files
|
||||
|
||||
@@ -53,7 +53,7 @@ dnl and now for the semantic version
|
||||
LIBCIF_SEMANTIC_VERSION=libcifpp_version
|
||||
AC_SUBST(LIBCIF_SEMANTIC_VERSION)
|
||||
|
||||
AC_ARG_VAR([CCP4], [The location where CCP4 is installed])
|
||||
AC_ARG_VAR([CCP4], [The location where CCP4 is installed. CCP4 is only required if you want to rebuild the symmetry operations table.])
|
||||
|
||||
AX_MRC
|
||||
|
||||
@@ -88,11 +88,11 @@ AX_IOSTREAMS_BZ2
|
||||
AC_CHECK_LIB([atomic], [atomic_flag_clear])
|
||||
|
||||
dnl Set output variables for the various directories
|
||||
AC_ARG_VAR([DATA_LIB_DIR], [The location where to store packaged dictionary files])
|
||||
AC_ARG_VAR([DATA_LIB_DIR], [The location where to store packaged dictionary and CCD files])
|
||||
AC_SUBST([DATA_LIB_DIR])
|
||||
DATA_LIB_DIR=$datadir/libcifpp
|
||||
|
||||
AC_ARG_VAR([DATA_CACHE_DIR], [The location where to store cached dictionary files])
|
||||
AC_ARG_VAR([DATA_CACHE_DIR], [The location where to store cached dictionary and CCD files])
|
||||
AC_SUBST([DATA_CACHE_DIR])
|
||||
DATA_CACHE_DIR=/var/cache/libcifpp
|
||||
|
||||
|
||||
@@ -1922,6 +1922,9 @@ class File
|
||||
void load(std::istream& is);
|
||||
void save(std::ostream& os);
|
||||
|
||||
/// \brief Load only the data block \a datablock from the mmCIF file
|
||||
void load(std::istream& is, const std::string& datablock);
|
||||
|
||||
void save(std::ostream& os, const std::vector<std::string>& order) { write(os, order); }
|
||||
void write(std::ostream& os, const std::vector<std::string>& order);
|
||||
|
||||
|
||||
@@ -101,7 +101,7 @@ std::tuple<std::string,std::string> splitTagName(const std::string& tag);
|
||||
class SacParser
|
||||
{
|
||||
public:
|
||||
SacParser(std::istream& is);
|
||||
SacParser(std::istream& is, bool init = true);
|
||||
virtual ~SacParser() {}
|
||||
|
||||
enum CIFToken
|
||||
@@ -142,6 +142,8 @@ class SacParser
|
||||
CIFToken getNextToken();
|
||||
void match(CIFToken token);
|
||||
|
||||
bool parseFile(const std::string& datablock);
|
||||
|
||||
void parseFile();
|
||||
void parseGlobal();
|
||||
void parseDataBlock();
|
||||
|
||||
@@ -98,7 +98,6 @@ class Compound
|
||||
{
|
||||
public:
|
||||
Compound(cif::Datablock &db);
|
||||
~Compound();
|
||||
|
||||
/// \brief factory method, create a Compound based on the three letter code
|
||||
/// (for amino acids) or the one-letter code (for bases) or the
|
||||
@@ -106,21 +105,6 @@ class Compound
|
||||
|
||||
static const Compound *create(const std::string &id);
|
||||
|
||||
// /// this second factory method can create a Compound even if it is not
|
||||
// /// recorded in the library. It will take the values from the CCP4 lib
|
||||
// /// unless the value passed to this function is not empty.
|
||||
// static const Compound* create(const std::string& id, const std::string& name,
|
||||
// const std::string& type, const std::string& formula);
|
||||
|
||||
/// \brief Create compounds based on the data in the file \a components
|
||||
///
|
||||
/// It is often required to add information about unknown components.
|
||||
/// This file parses either a CCP4 or a CCD formatted components file
|
||||
///
|
||||
/// \param components The mmCIF file containing the components
|
||||
/// \result An array containing the ID's of the added components
|
||||
static std::vector<std::string> addExtraComponents(const std::filesystem::path &components);
|
||||
|
||||
// accessors
|
||||
|
||||
std::string id() const { return mID; }
|
||||
|
||||
@@ -3067,6 +3067,21 @@ void File::load(std::istream& is)
|
||||
}
|
||||
}
|
||||
|
||||
void File::load(std::istream& is, const std::string& datablock)
|
||||
{
|
||||
Validator* saved = mValidator;
|
||||
setValidator(nullptr);
|
||||
|
||||
Parser p(is, *this);
|
||||
p.parseFile(datablock);
|
||||
|
||||
if (saved != nullptr)
|
||||
{
|
||||
setValidator(saved);
|
||||
(void)isValid();
|
||||
}
|
||||
}
|
||||
|
||||
void File::save(std::ostream& os)
|
||||
{
|
||||
Datablock* e = mHead;
|
||||
|
||||
@@ -84,13 +84,15 @@ const char* SacParser::kValueName[] = {
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
SacParser::SacParser(std::istream& is)
|
||||
SacParser::SacParser(std::istream& is, bool init)
|
||||
: mData(is)
|
||||
{
|
||||
mValidate = true;
|
||||
mLineNr = 1;
|
||||
mBol = true;
|
||||
mLookahead = getNextToken();
|
||||
|
||||
if (init)
|
||||
mLookahead = getNextToken();
|
||||
}
|
||||
|
||||
void SacParser::error(const std::string& msg)
|
||||
@@ -521,6 +523,90 @@ SacParser::CIFToken SacParser::getNextToken()
|
||||
return result;
|
||||
}
|
||||
|
||||
bool SacParser::parseFile(const std::string& datablock)
|
||||
{
|
||||
// first locate the start, as fast as we can
|
||||
auto &sb = *mData.rdbuf();
|
||||
|
||||
enum {
|
||||
start, comment, string, string_quote, qstring, data
|
||||
} state = start;
|
||||
|
||||
int quote = 0;
|
||||
bool bol = true;
|
||||
std::string dblk = "data_" + datablock;
|
||||
std::string::size_type si = 0;
|
||||
bool found = false;
|
||||
|
||||
while (sb.in_avail() > 0 and not found)
|
||||
{
|
||||
int ch = sb.sbumpc();
|
||||
switch (state)
|
||||
{
|
||||
case start:
|
||||
switch (ch)
|
||||
{
|
||||
case '#': state = comment; break;
|
||||
case 'd':
|
||||
case 'D':
|
||||
state = data;
|
||||
si = 1;
|
||||
break;
|
||||
case '\'':
|
||||
case '"':
|
||||
state = string;
|
||||
quote = ch;
|
||||
break;
|
||||
case ';':
|
||||
if (bol)
|
||||
state = qstring;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case comment:
|
||||
if (ch == '\n')
|
||||
state = start;
|
||||
break;
|
||||
|
||||
case string:
|
||||
if (ch == quote)
|
||||
state = string_quote;
|
||||
break;
|
||||
|
||||
case string_quote:
|
||||
if (std::isspace(ch))
|
||||
state = start;
|
||||
else
|
||||
state = string;
|
||||
break;
|
||||
|
||||
case qstring:
|
||||
if (ch == ';' and bol)
|
||||
state = start;
|
||||
break;
|
||||
|
||||
case data:
|
||||
if (isspace(ch) and dblk[si] == 0)
|
||||
found = true;
|
||||
else if (dblk[si++] != ch)
|
||||
state = start;
|
||||
break;
|
||||
}
|
||||
|
||||
bol = (ch == '\n');
|
||||
}
|
||||
|
||||
if (found)
|
||||
{
|
||||
produceDatablock(datablock);
|
||||
mLookahead = getNextToken();
|
||||
parseDataBlock();
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
void SacParser::parseFile()
|
||||
{
|
||||
while (mLookahead != eCIFTokenEOF)
|
||||
|
||||
@@ -106,7 +106,6 @@ struct CompoundBondLess
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Compound
|
||||
|
||||
@@ -304,13 +303,13 @@ class CompoundFactoryImpl
|
||||
|
||||
CompoundFactoryImpl(const std::string &file, CompoundFactoryImpl *next);
|
||||
|
||||
~CompoundFactoryImpl()
|
||||
virtual ~CompoundFactoryImpl()
|
||||
{
|
||||
delete mNext;
|
||||
}
|
||||
|
||||
Compound *get(std::string id);
|
||||
Compound *create(std::string id);
|
||||
virtual Compound *create(std::string id);
|
||||
|
||||
CompoundFactoryImpl *pop()
|
||||
{
|
||||
@@ -335,7 +334,6 @@ class CompoundFactoryImpl
|
||||
private:
|
||||
std::shared_timed_mutex mMutex;
|
||||
|
||||
std::string mPath;
|
||||
std::vector<Compound *> mCompounds;
|
||||
std::set<std::string> mKnownPeptides;
|
||||
std::set<std::string> mKnownBases;
|
||||
@@ -355,8 +353,7 @@ CompoundFactoryImpl::CompoundFactoryImpl()
|
||||
}
|
||||
|
||||
CompoundFactoryImpl::CompoundFactoryImpl(const std::string &file, CompoundFactoryImpl *next)
|
||||
: mPath(file)
|
||||
, mNext(next)
|
||||
: mNext(next)
|
||||
{
|
||||
cif::File cifFile(file);
|
||||
if (not cifFile.isValid())
|
||||
@@ -449,6 +446,48 @@ Compound *CompoundFactoryImpl::create(std::string id)
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// Version for the default compounds, based on the cached components.cif file from CCD
|
||||
|
||||
class CCDCompoundFactoryImpl : public CompoundFactoryImpl
|
||||
{
|
||||
public:
|
||||
CCDCompoundFactoryImpl() {}
|
||||
|
||||
Compound *create(std::string id) override;
|
||||
};
|
||||
|
||||
Compound *CCDCompoundFactoryImpl::create(std::string id)
|
||||
{
|
||||
ba::to_upper(id);
|
||||
|
||||
Compound *result = get(id);
|
||||
|
||||
auto ccd = cif::loadResource("components.cif");
|
||||
if (not ccd)
|
||||
throw std::runtime_error("Could not locate the CCD components.cif file, please make sure the software is installed properly and/or use the update-dictionary-script to fetch the data.");
|
||||
|
||||
if (cif::VERBOSE)
|
||||
{
|
||||
std::cout << "Loading component " << id << "...";
|
||||
std::cout.flush();
|
||||
}
|
||||
|
||||
cif::File file;
|
||||
file.load(*ccd, id);
|
||||
|
||||
if (cif::VERBOSE)
|
||||
std::cout << " done" << std::endl;
|
||||
|
||||
auto &db = file.firstDatablock();
|
||||
if (db.getName() == id)
|
||||
result = new Compound(db);
|
||||
else if (cif::VERBOSE)
|
||||
std::cerr << "Could not locate compound " << id << " in the CCD components file" << std::endl;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
CompoundFactory *CompoundFactory::sInstance;
|
||||
@@ -461,7 +500,7 @@ void CompoundFactory::init(bool useThreadLocalInstanceOnly)
|
||||
}
|
||||
|
||||
CompoundFactory::CompoundFactory()
|
||||
: mImpl(nullptr)
|
||||
: mImpl(new CCDCompoundFactoryImpl)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -544,12 +583,4 @@ bool CompoundFactory::isKnownBase(const std::string &resName) const
|
||||
return mImpl->isKnownBase(resName);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
std::vector<std::string> Compound::addExtraComponents(const std::filesystem::path &components)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
|
||||
} // namespace mmcif
|
||||
|
||||
@@ -21,22 +21,27 @@ if ! [ -d @DATA_CACHE_DIR@ ]; then
|
||||
exit
|
||||
fi
|
||||
|
||||
# fetch the dictionary
|
||||
fetch_dictionary () {
|
||||
dict=$1
|
||||
source=$2
|
||||
|
||||
dict=@DATA_CACHE_DIR@/mmcif_pdbx_v50.dic
|
||||
source=https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic.gz
|
||||
wget -O${dict}.gz ${source}
|
||||
|
||||
wget -O${dict}.gz ${source}
|
||||
# be careful not to nuke an existing dictionary file
|
||||
# extract to a temporary file first
|
||||
|
||||
# be careful not to nuke an existing dictionary file
|
||||
# extract to a temporary file first
|
||||
gunzip -c ${dict}.gz > ${dict}-tmp
|
||||
|
||||
gunzip -c ${dict}.gz > ${dict}-tmp
|
||||
# then move the extracted file to the final location
|
||||
|
||||
# then move the extracted file to the final location
|
||||
mv ${dict}-tmp ${dict}
|
||||
|
||||
mv ${dict}-tmp ${dict}
|
||||
# and clean up afterwards
|
||||
|
||||
# and clean up afterwards
|
||||
rm ${dict}.gz
|
||||
}
|
||||
|
||||
rm ${dict}.gz
|
||||
# fetch the dictionaries
|
||||
|
||||
fetch_dictionary "@DATA_CACHE_DIR@/mmcif_pdbx_v50.dic" "https://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic.gz"
|
||||
fetch_dictionary "@DATA_CACHE_DIR@/components.cif" "ftp://ftp.wwpdb.org/pub/pdb/data/monomers/components.cif.gz"
|
||||
|
||||
Reference in New Issue
Block a user