add model-archive cif schema

This commit is contained in:
Alexander Rose
2021-12-11 16:40:29 -08:00
parent e4f630dbef
commit beff1ecb3e
3 changed files with 400 additions and 5 deletions

View File

@@ -246,6 +246,14 @@ citation_author.ordinal
exptl.entry_id
exptl.method
software.classification
software.date
software.description
software.name
software.pdbx_ordinal
software.type
software.version
struct.entry_id
struct.title
struct.pdbx_descriptor
@@ -802,4 +810,58 @@ ihm_multi_state_modeling.population_fraction_sd
ihm_multi_state_modeling.state_type
ihm_multi_state_modeling.state_name
ihm_multi_state_modeling.experiment_type
ihm_multi_state_modeling.details
ihm_multi_state_modeling.details
ma_data.content_type
ma_data.content_type_other_details
ma_data.id
ma_data.name
ma_model_list.data_id
ma_model_list.model_group_id
ma_model_list.model_group_name
ma_model_list.model_id
ma_model_list.model_name
ma_model_list.model_type
ma_model_list.ordinal_id
ma_qa_metric.id
ma_qa_metric.mode
ma_qa_metric.name
ma_qa_metric.software_group_id
ma_qa_metric.type
ma_qa_metric_global.metric_id
ma_qa_metric_global.metric_value
ma_qa_metric_global.model_id
ma_qa_metric_global.ordinal_id
ma_qa_metric_local.label_asym_id
ma_qa_metric_local.label_comp_id
ma_qa_metric_local.label_seq_id
ma_qa_metric_local.metric_id
ma_qa_metric_local.metric_value
ma_qa_metric_local.model_id
ma_qa_metric_local.ordinal_id
ma_software_group.group_id
ma_software_group.ordinal_id
ma_software_group.software_id
ma_target_entity.data_id
ma_target_entity.entity_id
ma_target_entity.origin
ma_target_entity_instance.asym_id
ma_target_entity_instance.details
ma_target_entity_instance.entity_id
ma_target_ref_db_details.db_accession
ma_target_ref_db_details.db_code
ma_target_ref_db_details.db_name
ma_target_ref_db_details.ncbi_taxonomy_id
ma_target_ref_db_details.organism_scientific
ma_target_ref_db_details.seq_db_align_begin
ma_target_ref_db_details.seq_db_align_end
ma_target_ref_db_details.seq_db_isoform
ma_target_ref_db_details.target_entity_id
1 atom_sites.entry_id
246 struct_conn.conn_type_id struct_conf.end_auth_comp_id
247 struct_conn.pdbx_PDB_id struct_conf.end_auth_asym_id
248 struct_conn.ptnr1_label_asym_id struct_conf.end_auth_seq_id
249 struct_conf.pdbx_PDB_helix_class
250 struct_conf.details
251 struct_conf.pdbx_PDB_helix_length
252 struct_conn.id
253 struct_conn.conn_type_id
254 struct_conn.pdbx_PDB_id
255 struct_conn.ptnr1_label_asym_id
256 struct_conn.ptnr1_label_comp_id
257 struct_conn.ptnr1_label_comp_id struct_conn.ptnr1_label_seq_id
258 struct_conn.ptnr1_label_seq_id struct_conn.ptnr1_label_atom_id
259 struct_conn.ptnr1_label_atom_id struct_conn.pdbx_ptnr1_label_alt_id
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env node
/**
* Copyright (c) 2017-2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
* Copyright (c) 2017-2021 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
@@ -35,6 +35,10 @@ async function runGenerateSchemaMmcif(name: string, fieldNamesPath: string, type
const ihmDic = await parseCifText(fs.readFileSync(IHM_DIC_PATH, 'utf8')).run();
if (ihmDic.isError) throw ihmDic;
await ensureMaDicAvailable();
const maDic = await parseCifText(fs.readFileSync(MA_DIC_PATH, 'utf8')).run();
if (maDic.isError) throw maDic;
await ensureCarbBranchDicAvailable();
const carbBranchDic = await parseCifText(fs.readFileSync(CARB_BRANCH_DIC_PATH, 'utf8')).run();
if (carbBranchDic.isError) throw carbBranchDic;
@@ -45,10 +49,11 @@ async function runGenerateSchemaMmcif(name: string, fieldNamesPath: string, type
const mmcifDicVersion = getDicVersion(mmcifDic.result.blocks[0]);
const ihmDicVersion = getDicVersion(ihmDic.result.blocks[0]);
const maDicVersion = getDicVersion(maDic.result.blocks[0]);
const carbDicVersion = 'draft';
const version = `Dictionary versions: mmCIF ${mmcifDicVersion}, IHM ${ihmDicVersion}, CARB ${carbDicVersion}.`;
const version = `Dictionary versions: mmCIF ${mmcifDicVersion}, IHM ${ihmDicVersion}, MA ${maDicVersion}, CARB ${carbDicVersion}.`;
const frames: CifFrame[] = [...mmcifDic.result.blocks[0].saveFrames, ...ihmDic.result.blocks[0].saveFrames, ...carbBranchDic.result.blocks[0].saveFrames, ...carbCompDic.result.blocks[0].saveFrames];
const frames: CifFrame[] = [...mmcifDic.result.blocks[0].saveFrames, ...ihmDic.result.blocks[0].saveFrames, ...maDic.result.blocks[0].saveFrames, ...carbBranchDic.result.blocks[0].saveFrames, ...carbCompDic.result.blocks[0].saveFrames];
const schema = generateSchema(frames);
await runGenerateSchema(name, version, schema, fieldNamesPath, typescript, out, moldbImportPath, addAliases);
@@ -139,6 +144,7 @@ async function getFieldNamesFilter(fieldNamesPath: string): Promise<Filter> {
async function ensureMmcifDicAvailable() { await ensureDicAvailable(MMCIF_DIC_PATH, MMCIF_DIC_URL); }
async function ensureIhmDicAvailable() { await ensureDicAvailable(IHM_DIC_PATH, IHM_DIC_URL); }
async function ensureMaDicAvailable() { await ensureDicAvailable(MA_DIC_PATH, MA_DIC_URL); }
async function ensureCarbBranchDicAvailable() { await ensureDicAvailable(CARB_BRANCH_DIC_PATH, CARB_BRANCH_DIC_URL); }
async function ensureCarbCompDicAvailable() { await ensureDicAvailable(CARB_COMP_DIC_PATH, CARB_COMP_DIC_URL); }
async function ensureCifCoreDicAvailable() {
@@ -165,6 +171,8 @@ const MMCIF_DIC_PATH = `${DIC_DIR}/mmcif_pdbx_v50.dic`;
const MMCIF_DIC_URL = 'http://mmcif.wwpdb.org/dictionaries/ascii/mmcif_pdbx_v50.dic';
const IHM_DIC_PATH = `${DIC_DIR}/ihm-extension.dic`;
const IHM_DIC_URL = 'https://raw.githubusercontent.com/ihmwg/IHM-dictionary/master/ihm-extension.dic';
const MA_DIC_PATH = `${DIC_DIR}/ma-extension.dic`;
const MA_DIC_URL = 'https://raw.githubusercontent.com/ihmwg/MA-dictionary/master/mmcif_ma.dic';
const CARB_BRANCH_DIC_PATH = `${DIC_DIR}/entity_branch-extension.dic`;
const CARB_BRANCH_DIC_URL = 'https://raw.githubusercontent.com/pdbxmmcifwg/carbohydrate-extension/master/dict/entity_branch-extension.dic';
const CARB_COMP_DIC_PATH = `${DIC_DIR}/chem_comp-extension.dic`;

View File

@@ -1,7 +1,7 @@
/**
* Copyright (c) 2017-2020 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* Code-generated 'mmCIF' schema file. Dictionary versions: mmCIF 5.352, IHM 1.17, CARB draft.
* Code-generated 'mmCIF' schema file. Dictionary versions: mmCIF 5.352, IHM 1.17, MA 1.3.3, CARB draft.
*
* @author molstar/ciftools package
*/
@@ -942,6 +942,48 @@ export const mmCIF_Schema = {
*/
method: Aliased<'X-RAY DIFFRACTION' | 'NEUTRON DIFFRACTION' | 'FIBER DIFFRACTION' | 'ELECTRON CRYSTALLOGRAPHY' | 'ELECTRON MICROSCOPY' | 'SOLUTION NMR' | 'SOLID-STATE NMR' | 'SOLUTION SCATTERING' | 'POWDER DIFFRACTION' | 'INFRARED SPECTROSCOPY' | 'EPR' | 'FLUORESCENCE TRANSFER' | 'THEORETICAL MODEL'>(str),
},
/**
* Data items in the SOFTWARE category record details about
* the software used in the structure analysis, which implies
* any software used in the generation of any data items
* associated with the structure determination and
* structure representation.
*
* These data items allow computer programs to be referenced
* in more detail than data items in the COMPUTING category do.
*/
software: {
/**
* The classification of the program according to its
* major function.
*/
classification: str,
/**
* The date the software was released.
*/
date: str,
/**
* Description of the software.
*/
description: str,
/**
* The name of the software.
*/
name: str,
/**
* The classification of the software according to the most
* common types.
*/
type: Aliased<'program' | 'library' | 'package' | 'filter' | 'jiffy' | 'other'>(str),
/**
* The version of the software.
*/
version: str,
/**
* An ordinal index for this category
*/
pdbx_ordinal: int,
},
/**
* Data items in the STRUCT category record details about the
* description of the crystallographic structure.
@@ -4717,6 +4759,289 @@ export const mmCIF_Schema = {
*/
dataset_list_id: int,
},
/**
* Data items in the MA_MODEL_LIST category record the
* details of the models being deposited.
*/
ma_model_list: {
/**
* A unique identifier for the model / model group combination.
*/
ordinal_id: int,
/**
* A unique identifier for the structural model being deposited.
*/
model_id: int,
/**
* An identifier to group structural models into collections or sets.
* A cluster of models and its representative can either be grouped together
* or can be separate groups in the ma_model_list table. The choice between
* the two options should be decided based on how the modeling was carried out
* and how the representative was chosen. If the representative is a member of
* the ensemble (i.e., best scoring model), then it is recommended that the
* representative and the ensemble belong to the same model group. If the
* representative is calculated from the ensemble (i.e., centroid), then it is
* recommended that the representative be separated into a different group.
* If the models do not need to be grouped into collections, then the
* _ma_model_list.model_group_id is the same as _ma_model_list.model_id.
*/
model_group_id: int,
/**
* A decsriptive name for the model.
*/
model_name: str,
/**
* A decsriptive name for the model group.
*/
model_group_name: str,
/**
* The type of model.
*/
model_type: Aliased<'Homology model' | 'Ab initio model' | 'Other'>(str),
/**
* The data_id identifier. This data item is a pointer to
* _ma_data.id in the MA_DATA category.
*/
data_id: int,
},
/**
* Data items in the MA_TARGET_ENTITY category record details about
* the target entities. The details are provided for each entity
* being modeled.
*/
ma_target_entity: {
/**
* A unique identifier for the distinct molecular entity of the target.
* This data item is a pointer to _entity.id in the ENTITY category.
*/
entity_id: str,
/**
* The data_id identifier. This data item is a pointer to
* _ma_data.id in the MA_DATA category.
*/
data_id: int,
/**
* The origin of the target entity.
*/
origin: Aliased<'reference database' | 'designed'>(str),
},
/**
* Data items in the MA_TARGET_ENTITY_INSTANCE category record details about
* the instances of target entities modeled.
*/
ma_target_entity_instance: {
/**
* A unique identifier for the instance of the entity.
*/
asym_id: str,
/**
* A unique identifier for the distinct molecular entity of the target.
* This data item is a pointer to _ma_target_entity.entity_id in the
* MA_TARGET_ENTITY category.
*/
entity_id: str,
/**
* Additional details about the entity instance.
*/
details: str,
},
/**
* Data items in the MA_TARGET_REF_DB_DETAILS category record details about
* the reference databases for the target sequences.
*/
ma_target_ref_db_details: {
/**
* An identifier for the target entity.
*/
target_entity_id: str,
/**
* The name of the database containing reference information about
* this entity or biological unit.
*/
db_name: Aliased<'UNP' | 'GB' | 'OrthoDB' | 'NCBI' | 'JGI' | 'Other'>(str),
/**
* The code for this entity or biological unit or for a closely
* related entity or biological unit in the named database.
* This can include the version number.
*/
db_code: str,
/**
* Accession code assigned by the reference database.
*/
db_accession: str,
/**
* Database code assigned by the reference database for a sequence isoform. An isoform sequence is an
* alternative protein sequence that can be generated from the same gene by a single or by a combination of
* biological events such as: alternative promoter usage, alternative splicing, alternative initiation
* and ribosomal frameshifting.
*/
seq_db_isoform: str,
/**
* Beginning index in the chemical sequence from the
* reference database.
*/
seq_db_align_begin: str,
/**
* Ending index in the chemical sequence from the
* reference database.
*/
seq_db_align_end: str,
/**
* Taxonomy identifier provided by NCBI.
*/
ncbi_taxonomy_id: str,
/**
* Scientific name of the organism.
*/
organism_scientific: str,
},
/**
* Data items in the MA_DATA category capture the different kinds of
* data used in the modeling. These can be multiple sequence
* alignments, spatial restraints, template structures etc.
*/
ma_data: {
/**
* A unique identifier for the data.
*/
id: int,
/**
* The type of data held in the dataset.
*/
content_type: Aliased<'target' | 'template structure' | 'polymeric template library' | 'spatial restraints' | 'target-template alignment' | 'coevolution MSA' | 'model coordinates' | 'other'>(str),
/**
* Details for other content types.
*/
content_type_other_details: str,
/**
* An author-given name for the content held in the dataset.
*/
name: str,
},
/**
* Data items in the MA_SOFTWARE_GROUP category describes the
* collection of software into groups so that they can be used
* efficiently in the MA_PROTOCOL_STEP category.
*/
ma_software_group: {
/**
* A unique identifier for the category.
*/
ordinal_id: int,
/**
* An identifier for the group entry.
* If data does not need to be grouped, then _ma_software_group.group_id
* is the same as _ma_software_group.software_id.
*/
group_id: int,
/**
* The identifier for the software.
* This data item is a pointer to _software.pdbx_ordinal
* in the SOFTWARE category.
*/
software_id: int,
},
/**
* Data items in the MA_QA_METRIC category record the
* details of the metrics use to assess model quality.
*/
ma_qa_metric: {
/**
* An identifier for the QA metric.
*/
id: int,
/**
* Name of the QA metric.
*/
name: str,
/**
* The type of QA metric.
*/
type: Aliased<'zscore' | 'energy' | 'distance' | 'normalized score' | 'pLDDT' | 'PAE' | 'contact probability' | 'other'>(str),
/**
* The mode of calculation of the QA metric.
*/
mode: Aliased<'local' | 'global' | 'local-pairwise'>(str),
/**
* Identifier to the set of software used to calculate the QA metric.
* This data item is a pointer to the _ma_software_group.group_id in the
* MA_SOFTWARE_GROUP category.
*/
software_group_id: int,
},
/**
* Data items in the MA_QA_METRIC_GLOBAL category captures the
* details of the global QA metrics, calculated at the model-level.
*/
ma_qa_metric_global: {
/**
* A unique identifier for the category.
*/
ordinal_id: int,
/**
* The identifier for the structural model, for which global QA metric is provided.
* This data item is a pointer to _ma_model_list.model_id
* in the MA_MODEL_LIST category.
*/
model_id: int,
/**
* The identifier for the QA metric.
* This data item is a pointer to _ma_qa_metric.id in the
* MA_QA_METRIC category.
*/
metric_id: int,
/**
* The value of the global QA metric.
*/
metric_value: float,
},
/**
* Data items in the MA_QA_METRIC_LOCAL category captures the
* details of the local QA metrics, calculated at the residue-level.
*/
ma_qa_metric_local: {
/**
* A unique identifier for the category.
*/
ordinal_id: int,
/**
* The identifier for the structural model, for which local QA metric is provided.
* This data item is a pointer to _ma_model_list.model_id
* in the MA_MODEL_LIST category.
*/
model_id: int,
/**
* The identifier for the asym id of the residue in the
* structural model, for which local QA metric is provided.
* This data item is a pointer to _atom_site.label_asym_id
* in the ATOM_SITE category.
*/
label_asym_id: str,
/**
* The identifier for the sequence index of the residue
* in the structural model, for which local QA metric is provided.
* This data item is a pointer to _atom_site.label_seq_id
* in the ATOM_SITE category.
*/
label_seq_id: int,
/**
* The component identifier for the residue in the
* structural model, for which local QA metric is provided.
* This data item is a pointer to _atom_site.label_comp_id
* in the ATOM_SITE category.
*/
label_comp_id: str,
/**
* The identifier for the QA metric.
* This data item is a pointer to _ma_qa_metric.id in the
* MA_QA_METRIC category.
*/
metric_id: int,
/**
* The value of the local QA metric.
*/
metric_value: float,
},
};
export type mmCIF_Schema = typeof mmCIF_Schema;