diff --git a/data/mmcif-field-names.csv b/data/mmcif-field-names.csv index 66b42958b..61649087a 100644 --- a/data/mmcif-field-names.csv +++ b/data/mmcif-field-names.csv @@ -58,6 +58,11 @@ entity.pdbx_mutation entity.pdbx_fragment entity.pdbx_ec +entity_poly_seq.entity_id +entity_poly_seq.num +entity_poly_seq.mon_id +entity_poly_seq.hetero + entry.id exptl.entry_id diff --git a/src/apps/structure-info/index.ts b/src/apps/structure-info/index.ts index 5233f459b..da2da6874 100644 --- a/src/apps/structure-info/index.ts +++ b/src/apps/structure-info/index.ts @@ -39,7 +39,7 @@ export function atomLabel(model: Model, aI: number) { } -function printBonds(structure: Structure) { +export function printBonds(structure: Structure) { const { units, elements } = structure; const unitIds = ElementSet.unitIndices(elements); @@ -67,12 +67,24 @@ function printBonds(structure: Structure) { } } +export function printSequence(model: Model) { + const { byEntityKey } = model.sequence; + for (const key of Object.keys(byEntityKey)) { + const seq = byEntityKey[+key]; + console.log(`${seq.entityId} (${seq.num.value(0)}, ${seq.num.value(seq.num.rowCount - 1)}) (${seq.compId.value(0)}, ${seq.compId.value(seq.compId.rowCount - 1)})`); + // for (let i = 0; i < seq.compId.rowCount; i++) { + // console.log(`${seq.entityId} ${seq.num.value(i)} ${seq.compId.value(i)}`); + // } + } +} + async function run(pdb: string) { const mmcif = await getPdb(pdb) const models = Model.create({ kind: 'mmCIF', data: mmcif }); - const structure = Structure.ofModel(models[0]) + //const structure = Structure.ofModel(models[0]) // console.log(structure) - printBonds(structure) + // printBonds(structure) + printSequence(models[0]); } const parser = new argparse.ArgumentParser({ diff --git a/src/mol-data/db/column.ts b/src/mol-data/db/column.ts index 03394b28d..93461e8a8 100644 --- a/src/mol-data/db/column.ts +++ b/src/mol-data/db/column.ts @@ -169,7 +169,7 @@ function createFirstIndexMapOfColumn(c: Column): Map { const map = new Map(); for (let i = 0, _i = c.rowCount; i < _i; i++) { const v = c.value(i); - if (!map.has(v)) return map.set(c.value(i), i); + if (!map.has(v)) map.set(c.value(i), i); } return map; } diff --git a/src/mol-io/reader/cif/schema/mmcif.ts b/src/mol-io/reader/cif/schema/mmcif.ts index 8660f258d..373fa1ae8 100644 --- a/src/mol-io/reader/cif/schema/mmcif.ts +++ b/src/mol-io/reader/cif/schema/mmcif.ts @@ -86,6 +86,12 @@ export const mmCIF_Schema = { pdbx_fragment: str, pdbx_ec: List(',', x => x), }, + entity_poly_seq: { + entity_id: str, + num: int, + mon_id: str, + hetero: Aliased<'no' | 'n' | 'yes' | 'y'>(str) + }, entry: { id: str, }, diff --git a/src/mol-model/structure/model/formats/gro.ts b/src/mol-model/structure/model/formats/gro.ts index 09f57950e..84b30517e 100644 --- a/src/mol-model/structure/model/formats/gro.ts +++ b/src/mol-model/structure/model/formats/gro.ts @@ -18,6 +18,7 @@ import { guessElement } from '../utils/guess-element' import { ElementSymbol} from '../types' import gro_Format = Format.gro +import Sequence from '../properties/sequence'; type HierarchyOffsets = { residues: ArrayLike, chains: ArrayLike } @@ -112,11 +113,13 @@ function createModel(format: gro_Format, modelNum: number, previous?: Model): Mo chainSegments: Segmentation.ofOffsets(hierarchyOffsets.chains, bounds), } const hierarchyKeys = findHierarchyKeys(hierarchyData, hierarchySegments); + const hierarchy = { ...hierarchyData, ...hierarchyKeys, ...hierarchySegments }; return { id: UUID.create(), sourceData: format, modelNum, - hierarchy: { ...hierarchyData, ...hierarchyKeys, ...hierarchySegments }, + hierarchy, + sequence: Sequence.fromHierarchy(hierarchy), conformation: getConformation(structure.atoms), coarseGrained: CoarseGrained.Empty, symmetry: { assemblies: [] }, diff --git a/src/mol-model/structure/model/formats/mmcif.ts b/src/mol-model/structure/model/formats/mmcif.ts index ff40bfa81..b06ed9ef2 100644 --- a/src/mol-model/structure/model/formats/mmcif.ts +++ b/src/mol-model/structure/model/formats/mmcif.ts @@ -18,6 +18,7 @@ import { ElementSymbol} from '../types' import createAssemblies from './mmcif/assembly' import mmCIF_Format = Format.mmCIF +import { getSequence } from './mmcif/sequence'; function findModelBounds({ data }: mmCIF_Format, startIndex: number) { const num = data.atom_site.pdbx_PDB_model_num; @@ -106,11 +107,15 @@ function createModel(format: mmCIF_Format, bounds: Interval, previous?: Model): chainSegments: Segmentation.ofOffsets(hierarchyOffsets.chains, bounds), } const hierarchyKeys = findHierarchyKeys(hierarchyData, hierarchySegments); + + const hierarchy = { ...hierarchyData, ...hierarchyKeys, ...hierarchySegments }; + return { id: UUID.create(), sourceData: format, modelNum: format.data.atom_site.pdbx_PDB_model_num.value(Interval.start(bounds)), - hierarchy: { ...hierarchyData, ...hierarchyKeys, ...hierarchySegments }, + hierarchy, + sequence: getSequence(format.data, hierarchy), conformation: getConformation(format, bounds), coarseGrained: CoarseGrained.Empty, symmetry: getSymmetry(format), diff --git a/src/mol-model/structure/model/formats/mmcif/sequence.ts b/src/mol-model/structure/model/formats/mmcif/sequence.ts new file mode 100644 index 000000000..85bb572ba --- /dev/null +++ b/src/mol-model/structure/model/formats/mmcif/sequence.ts @@ -0,0 +1,31 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal + */ + +import { mmCIF_Database as mmCIF } from 'mol-io/reader/cif/schema/mmcif' +import Sequence from '../../properties/sequence' +import { Column } from 'mol-data/db'; +import { Hierarchy } from '../../properties/hierarchy'; + +export function getSequence(cif: mmCIF, hierarchy: Hierarchy): Sequence { + if (!cif.entity_poly_seq._rowCount) return Sequence.fromHierarchy(hierarchy); + + const { entity_id, num, mon_id } = cif.entity_poly_seq; + + const byEntityKey: Sequence['byEntityKey'] = {}; + const count = entity_id.rowCount; + + let i = 0; + while (i < count) { + const start = i; + while (i < count - 1 && entity_id.areValuesEqual(i, i + 1)) i++; + i++; + + const id = entity_id.value(start); + byEntityKey[hierarchy.findEntityKey(id)] = { entityId: id, compId: Column.window(mon_id, start, i), num: Column.window(num, start, i) } + } + + return { byEntityKey }; +} \ No newline at end of file diff --git a/src/mol-model/structure/model/model.ts b/src/mol-model/structure/model/model.ts index e7f0fe0a8..daf777734 100644 --- a/src/mol-model/structure/model/model.ts +++ b/src/mol-model/structure/model/model.ts @@ -6,6 +6,7 @@ import UUID from 'mol-util/uuid' import Format from './format' +import Sequence from './properties/sequence' import Hierarchy from './properties/hierarchy' import Conformation from './properties/conformation' import Symmetry from './properties/symmetry' @@ -26,6 +27,7 @@ interface Model extends Readonly<{ sourceData: Format, + sequence: Sequence, hierarchy: Hierarchy, conformation: Conformation, symmetry: Symmetry, diff --git a/src/mol-model/structure/model/properties/sequence.ts b/src/mol-model/structure/model/properties/sequence.ts new file mode 100644 index 000000000..d7aa43951 --- /dev/null +++ b/src/mol-model/structure/model/properties/sequence.ts @@ -0,0 +1,29 @@ +/** + * Copyright (c) 2018 mol* contributors, licensed under MIT, See LICENSE file for more info. + * + * @author David Sehnal + */ + +import { Column } from 'mol-data/db' +import { Hierarchy } from './hierarchy'; + +interface Sequence { + readonly byEntityKey: { [key: number]: Sequence.Entity } +} + +namespace Sequence { + export interface Entity { + readonly entityId: string, + readonly num: Column + // _entity_poly_seq.mon_id + readonly compId: Column + } + + export function fromHierarchy(hierarchy: Hierarchy): Sequence { + // const { label_comp_id } = hierarchy.residues; + + throw 'not implemented'; + } +} + +export default Sequence \ No newline at end of file