cif schema updates

This commit is contained in:
Alexander Rose
2019-05-11 22:03:45 -07:00
parent ae28594055
commit 9f6e65918d
5 changed files with 72 additions and 14 deletions

View File

@@ -91,6 +91,14 @@ entity_poly_seq.num
entity_poly_seq.mon_id
entity_poly_seq.hetero
entity_src_gen.entity_id
entity_src_gen.pdbx_src_id
entity_src_gen.pdbx_alt_source_flag
entity_src_gen.pdbx_seq_type
entity_src_gen.pdbx_beg_seq_num
entity_src_gen.pdbx_end_seq_num
entity_src_gen.pdbx_gene_src_gene
pdbx_entity_branch.entity_id
pdbx_entity_branch.type
1 atom_sites.entry_id
91 pdbx_entity_branch_link.entity_id pdbx_entity_branch.type
92 pdbx_entity_branch_link.entity_branch_list_num_1 pdbx_entity_branch_list.entity_id
93 pdbx_entity_branch_link.comp_id_1 pdbx_entity_branch_list.comp_id
94 pdbx_entity_branch_list.num
95 pdbx_entity_branch_list.component_comp_id
96 pdbx_entity_branch_list.hetero
97 pdbx_entity_branch_link.link_id
98 pdbx_entity_branch_link.entity_id
99 pdbx_entity_branch_link.entity_branch_list_num_1
100 pdbx_entity_branch_link.comp_id_1
101 pdbx_entity_branch_link.atom_id_1
102 pdbx_entity_branch_link.atom_id_1 pdbx_entity_branch_link.leaving_atom_id_1
103 pdbx_entity_branch_link.leaving_atom_id_1 pdbx_entity_branch_link.atom_stereo_config_1
104 pdbx_entity_branch_link.atom_stereo_config_1 pdbx_entity_branch_link.entity_branch_list_num_2

View File

@@ -52,6 +52,8 @@ export function getFieldType (type: string, description: string, values?: string
case 'date_dep':
case 'url':
case 'symop':
case 'exp_data_doi':
case 'asym_id':
return StrCol(description)
case 'int':
case 'non_negative_int':
@@ -63,6 +65,8 @@ export function getFieldType (type: string, description: string, values?: string
case 'ucode-alphanum-csv':
case 'id_list':
return ListCol('str', ',', description)
case 'id_list_spc':
return ListCol('str', ' ', description)
}
console.log(`unknown type '${type}'`)
return StrCol(description)
@@ -163,6 +167,7 @@ const COMMA_SEPARATED_LIST_FIELDS = [
'_entity.pdbx_description', // Endolysin,Beta-2 adrenergic receptor
'_entity.pdbx_ec',
'_entity_poly.pdbx_strand_id', // A,B
'_entity_src_gen.pdbx_gene_src_gene', // ADRB2, ADRB2R, B2AR
'_pdbx_depui_entry_details.experimental_methods',
'_pdbx_depui_entry_details.requested_accession_types',
'_pdbx_soln_scatter_model.software_list', // INSIGHT II, HOMOLOGY, DISCOVERY, BIOPOLYMER, DELPHI
@@ -196,12 +201,7 @@ const SEMICOLON_SEPARATED_LIST_FIELDS = [
* values are available in the existing dictionary.
*/
const EXTRA_ENUM_VALUES: { [k: string]: string[] } = {
// TODO for carbohydrate extension draft, remove when added to chem_comp dic
'_pdbx_chem_comp_identifier.type': [
'CONDENSED IUPAC CARB SYMBOL',
'IUPAC CARB SYMBOL',
'SNFG CARB SYMBOL'
]
}
export function generateSchema (frames: CifFrame[]) {

View File

@@ -1,7 +1,7 @@
/**
* Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* Code-generated 'BIRD' schema file. Dictionary versions: mmCIF 5.305, IHM 0.139, CARB draft.
* Code-generated 'BIRD' schema file. Dictionary versions: mmCIF 5.309, IHM 0.141, CARB draft.
*
* @author mol-star package (src/apps/schema-generator/generate)
*/

View File

@@ -1,7 +1,7 @@
/**
* Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* Code-generated 'CCD' schema file. Dictionary versions: mmCIF 5.305, IHM 0.139, CARB draft.
* Code-generated 'CCD' schema file. Dictionary versions: mmCIF 5.309, IHM 0.141, CARB draft.
*
* @author mol-star package (src/apps/schema-generator/generate)
*/

View File

@@ -1,7 +1,7 @@
/**
* Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* Code-generated 'mmCIF' schema file. Dictionary versions: mmCIF 5.305, IHM 0.139, CARB draft.
* Code-generated 'mmCIF' schema file. Dictionary versions: mmCIF 5.309, IHM 0.141, CARB draft.
*
* @author mol-star package (src/apps/schema-generator/generate)
*/
@@ -485,7 +485,7 @@ export const mmCIF_Schema = {
* Water entities are not expected to have corresponding
* entries in the ENTITY category.
*/
type: Aliased<'polymer' | 'non-polymer' | 'macrolide' | 'water'>(str),
type: Aliased<'polymer' | 'non-polymer' | 'macrolide' | 'water' | 'branched'>(str),
/**
* A description of the entity.
*
@@ -496,7 +496,7 @@ export const mmCIF_Schema = {
* A place holder for the number of molecules of the entity in
* the entry.
*/
pdbx_number_of_molecules: float,
pdbx_number_of_molecules: int,
/**
* Details about any entity mutation(s).
*/
@@ -534,7 +534,7 @@ export const mmCIF_Schema = {
/**
* The type of the polymer.
*/
type: Aliased<'polypeptide(D)' | 'polypeptide(L)' | 'polydeoxyribonucleotide' | 'polyribonucleotide' | 'polysaccharide(D)' | 'polysaccharide(L)' | 'polydeoxyribonucleotide/polyribonucleotide hybrid' | 'cyclic-pseudo-peptide' | 'peptide nucleic acid' | 'other'>(str),
type: Aliased<'polypeptide(D)' | 'polypeptide(L)' | 'polydeoxyribonucleotide' | 'polyribonucleotide' | 'polydeoxyribonucleotide/polyribonucleotide hybrid' | 'cyclic-pseudo-peptide' | 'peptide nucleic acid' | 'other'>(str),
/**
* The PDB strand/chain id(s) corresponding to this polymer entity.
*/
@@ -1964,6 +1964,52 @@ export const mmCIF_Schema = {
*/
details: str,
},
/**
* Data items in the ENTITY_SRC_GEN category record details of
* the source from which the entity was obtained in cases
* where the source was genetically manipulated. The
* following are treated separately: items pertaining to the tissue
* from which the gene was obtained, items pertaining to the host
* organism for gene expression and items pertaining to the actual
* producing organism (plasmid).
*/
entity_src_gen: {
/**
* This data item is a pointer to _entity.id in the ENTITY category.
*/
entity_id: str,
/**
* Identifies the gene.
*/
pdbx_gene_src_gene: List(',', x => x),
/**
* This data item is an ordinal identifier for entity_src_gen data records.
*/
pdbx_src_id: int,
/**
* This data item identifies cases in which an alternative source
* modeled.
*/
pdbx_alt_source_flag: Aliased<'sample' | 'model'>(str),
/**
* This data item povides additional information about the sequence type.
*/
pdbx_seq_type: Aliased<'N-terminal tag' | 'C-terminal tag' | 'Biological sequence' | 'Linker'>(str),
/**
* The beginning polymer sequence position for the polymer section corresponding
* to this source.
*
* A reference to the sequence position in the entity_poly category.
*/
pdbx_beg_seq_num: int,
/**
* The ending polymer sequence position for the polymer section corresponding
* to this source.
*
* A reference to the sequence position in the entity_poly category.
*/
pdbx_end_seq_num: int,
},
/**
* Data items in the PDBX_ENTITY_DESCRIPTOR category provide
* string descriptors of entity chemical structure.
@@ -1982,7 +2028,7 @@ export const mmCIF_Schema = {
/**
* This data item contains the descriptor type.
*/
type: Aliased<'LINUCS'>(str),
type: Aliased<'LINUCS' | 'Glycam Condensed Sequence' | 'Glycam Condensed Core Sequence'>(str),
/**
* This data item contains the name of the program
* or library used to compute the descriptor.
@@ -3716,7 +3762,7 @@ export const mmCIF_Schema = {
*/
entity_id: str,
/**
* An asym/strand identifier for the residue / residue range.
* An asym/strand identifier for the residue / residue range, if applicable.
* This data item is a pointer to _struct_asym.id in the
* STRUCT_ASYM category.
*/
@@ -3794,6 +3840,10 @@ export const mmCIF_Schema = {
* Identifier to the input data from which the distance restraint is derived.
* This data item is a pointer to the _ihm_dataset_list.id in the
* IHM_DATASET_LIST category.
* This data item may not be applicable for all cases. For example, in case of
* ambiguous interface restraints where the interface residues are identified
* from multiple experiments, the reference to the _ihm_dataset_list.id is
* handled in the IHM_INTERFACE_RESIDUE_FEATURE category rather than here.
*/
dataset_list_id: int,
},