entity-source coloring (replaces gene coloring)

This commit is contained in:
Alexander Rose
2019-05-20 07:55:33 -07:00
parent 2b53026d42
commit aa481fc698
5 changed files with 227 additions and 132 deletions

View File

@@ -93,12 +93,24 @@ entity_poly_seq.hetero
entity_src_gen.entity_id
entity_src_gen.pdbx_src_id
entity_src_gen.pdbx_alt_source_flag
entity_src_gen.pdbx_seq_type
entity_src_gen.pdbx_alt_source_flag
entity_src_gen.pdbx_seq_type
entity_src_gen.pdbx_beg_seq_num
entity_src_gen.pdbx_end_seq_num
entity_src_gen.pdbx_gene_src_gene
entity_src_nat.entity_id
entity_src_nat.pdbx_src_id
entity_src_nat.pdbx_alt_source_flag
entity_src_nat.pdbx_beg_seq_num
entity_src_nat.pdbx_end_seq_num
pdbx_entity_src_syn.entity_id
pdbx_entity_src_syn.pdbx_src_id
pdbx_entity_src_syn.pdbx_alt_source_flag
pdbx_entity_src_syn.pdbx_beg_seq_num
pdbx_entity_src_syn.pdbx_end_seq_num
pdbx_entity_branch.entity_id
pdbx_entity_branch.type
1 atom_sites.entry_id
93 pdbx_entity_branch_list.comp_id entity_src_nat.pdbx_beg_seq_num
94 pdbx_entity_branch_list.num entity_src_nat.pdbx_end_seq_num
95 pdbx_entity_branch_list.component_comp_id pdbx_entity_src_syn.entity_id
96 pdbx_entity_branch_list.hetero pdbx_entity_src_syn.pdbx_src_id
97 pdbx_entity_branch_link.link_id pdbx_entity_src_syn.pdbx_alt_source_flag
98 pdbx_entity_branch_link.entity_id pdbx_entity_src_syn.pdbx_beg_seq_num
99 pdbx_entity_branch_link.entity_branch_list_num_1 pdbx_entity_src_syn.pdbx_end_seq_num
100 pdbx_entity_branch_link.comp_id_1 pdbx_entity_branch.entity_id
101 pdbx_entity_branch_link.atom_id_1 pdbx_entity_branch.type
102 pdbx_entity_branch_list.entity_id
103 pdbx_entity_branch_list.comp_id
104 pdbx_entity_branch_list.num
105 pdbx_entity_branch_list.component_comp_id
106 pdbx_entity_branch_list.hetero
107 pdbx_entity_branch_link.link_id
108 pdbx_entity_branch_link.entity_id
109 pdbx_entity_branch_link.entity_branch_list_num_1
110 pdbx_entity_branch_link.comp_id_1
111 pdbx_entity_branch_link.atom_id_1
112 pdbx_entity_branch_link.leaving_atom_id_1
113 pdbx_entity_branch_link.atom_stereo_config_1
114 pdbx_entity_branch_link.leaving_atom_id_1 pdbx_entity_branch_link.entity_branch_list_num_2
115 pdbx_entity_branch_link.atom_stereo_config_1 pdbx_entity_branch_link.comp_id_2
116 pdbx_entity_branch_link.entity_branch_list_num_2 pdbx_entity_branch_link.atom_id_2

View File

@@ -1,7 +1,7 @@
/**
* Copyright (c) 2017-2018 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* Code-generated 'mmCIF' schema file. Dictionary versions: mmCIF 5.309, IHM 0.141, CARB draft.
* Code-generated 'mmCIF' schema file. Dictionary versions: mmCIF 5.310, IHM 0.141, CARB draft.
*
* @author mol-star package (src/apps/schema-generator/generate)
*/
@@ -1964,6 +1964,40 @@ export const mmCIF_Schema = {
*/
details: str,
},
/**
* Data items in the ENTITY_SRC_NAT category record details of
* the source from which the entity was obtained in cases
* where the entity was isolated directly from a natural tissue.
*/
entity_src_nat: {
/**
* This data item is a pointer to _entity.id in the ENTITY category.
*/
entity_id: str,
/**
* This data item is an ordinal identifier for entity_src_nat data records.
*/
pdbx_src_id: int,
/**
* This data item identifies cases in which an alternative source
* modeled.
*/
pdbx_alt_source_flag: Aliased<'sample' | 'model'>(str),
/**
* The beginning polymer sequence position for the polymer section corresponding
* to this source.
*
* A reference to the sequence position in the entity_poly category.
*/
pdbx_beg_seq_num: int,
/**
* The ending polymer sequence position for the polymer section corresponding
* to this source.
*
* A reference to the sequence position in the entity_poly category.
*/
pdbx_end_seq_num: int,
},
/**
* Data items in the ENTITY_SRC_GEN category record details of
* the source from which the entity was obtained in cases
@@ -2010,6 +2044,39 @@ export const mmCIF_Schema = {
*/
pdbx_end_seq_num: int,
},
/**
* The data items in category PDBX_ENTITY_SRC_SYN record the source details
* about chemically synthesized molecules.
*/
pdbx_entity_src_syn: {
/**
* This data item is a pointer to _entity.id in the ENTITY category.
*/
entity_id: str,
/**
* This data item is an ordinal identifier for pdbx_entity_src_syn data records.
*/
pdbx_src_id: int,
/**
* This data item identifies cases in which an alternative source
* modeled.
*/
pdbx_alt_source_flag: Aliased<'sample' | 'model'>(str),
/**
* The beginning polymer sequence position for the polymer section corresponding
* to this source.
*
* A reference to the sequence position in the entity_poly category.
*/
pdbx_beg_seq_num: int,
/**
* The ending polymer sequence position for the polymer section corresponding
* to this source.
*
* A reference to the sequence position in the entity_poly category.
*/
pdbx_end_seq_num: int,
},
/**
* Data items in the PDBX_ENTITY_DESCRIPTOR category provide
* string descriptors of entity chemical structure.

View File

@@ -27,7 +27,7 @@ import { UnitIndexColorThemeProvider } from './color/unit-index';
import { ScaleLegend } from 'mol-util/color/scale';
import { TableLegend } from 'mol-util/color/tables';
import { UncertaintyColorThemeProvider } from './color/uncertainty';
import { GeneColorThemeProvider } from './color/gene';
import { EntitySourceColorThemeProvider } from './color/entity-source';
import { IllustrativeColorThemeProvider } from './color/illustrative';
import { HydrophobicityColorThemeProvider } from './color/hydrophobicity';
@@ -76,7 +76,7 @@ export const BuiltInColorThemes = {
'cross-link': CrossLinkColorThemeProvider,
'element-index': ElementIndexColorThemeProvider,
'element-symbol': ElementSymbolColorThemeProvider,
'gene': GeneColorThemeProvider,
'entity-source': EntitySourceColorThemeProvider,
'hydrophobicity': HydrophobicityColorThemeProvider,
'illustrative': IllustrativeColorThemeProvider,
'molecule-type': MoleculeTypeColorThemeProvider,

View File

@@ -0,0 +1,143 @@
/**
* Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import { StructureProperties, StructureElement, Link, Model } from 'mol-model/structure';
import { ColorScale, Color } from 'mol-util/color';
import { Location } from 'mol-model/location';
import { ColorTheme, LocationColor } from '../color';
import { ParamDefinition as PD } from 'mol-util/param-definition'
import { ThemeDataContext } from 'mol-theme/theme';
import { ColorListOptions, ColorListName } from 'mol-util/color/scale';
import { Table, Column } from 'mol-data/db';
import { mmCIF_Schema } from 'mol-io/reader/cif/schema/mmcif';
const DefaultColor = Color(0xCCCCCC)
const Description = 'Gives ranges of a polymer chain a color based on the entity source it originates from. Genes get the same color per entity'
export const EntitySourceColorThemeParams = {
list: PD.ColorScale<ColorListName>('RedYellowBlue', ColorListOptions),
}
export type EntitySourceColorThemeParams = typeof EntitySourceColorThemeParams
export function getEntitySourceColorThemeParams(ctx: ThemeDataContext) {
return EntitySourceColorThemeParams // TODO return copy
}
function modelEntityKey(modelIndex: number, entityId: string) {
return `${modelIndex}|${entityId}`
}
type EntitySrc = Table<{
entity_id: mmCIF_Schema['entity_src_gen']['entity_id'],
pdbx_src_id: mmCIF_Schema['entity_src_gen']['pdbx_src_id'],
pdbx_beg_seq_num: mmCIF_Schema['entity_src_gen']['pdbx_beg_seq_num'],
pdbx_end_seq_num: mmCIF_Schema['entity_src_gen']['pdbx_end_seq_num'],
}>
type GeneSrcGene = Column<mmCIF_Schema['entity_src_gen']['pdbx_gene_src_gene']['T']>
function srcKey(modelIndex: number, entityId: string, srcId: number, gene: string) {
return `${modelIndex}|${entityId}|${gene ? gene : srcId}`
}
function addSrc(seqToSrcByModelEntity: Map<string, Int16Array>, srcKeySerialMap: Map<string, number>, modelIndex: number, model: Model, entity_src: EntitySrc, gene_src_gene?: GeneSrcGene) {
const { entity_id, pdbx_src_id, pdbx_beg_seq_num, pdbx_end_seq_num } = entity_src
for (let j = 0, jl = entity_src._rowCount; j < jl; ++j) {
const entityId = entity_id.value(j)
const mK = modelEntityKey(modelIndex, entityId)
let seqToSrc: Int16Array
if (!seqToSrcByModelEntity.has(mK)) {
const entityIndex = model.entities.getEntityIndex(entityId)
const seq = model.sequence.sequences[entityIndex].sequence
seqToSrc = new Int16Array(seq.sequence.length)
seqToSrcByModelEntity.set(mK, seqToSrc)
} else {
seqToSrc = seqToSrcByModelEntity.get(mK)!
}
const sK = srcKey(modelIndex, entityId, pdbx_src_id.value(j), gene_src_gene ? gene_src_gene.value(j).join(',') : '')
// may not be given (= 0) indicating src is for the whole seq
const beg = pdbx_beg_seq_num.valueKind(j) === Column.ValueKind.Present ? pdbx_beg_seq_num.value(j) : 1
const end = pdbx_end_seq_num.valueKind(j) === Column.ValueKind.Present ? pdbx_end_seq_num.value(j) : seqToSrc.length
let srcIndex: number // serial no starting from 1
if (srcKeySerialMap.has(sK)) {
srcIndex = srcKeySerialMap.get(sK)!
} else {
srcIndex = srcKeySerialMap.size + 1
srcKeySerialMap.set(sK, srcIndex)
}
// set src index
for (let i = beg, il = end; i <= il; ++i) {
seqToSrc[i - 1] = srcIndex
}
}
}
export function EntitySourceColorTheme(ctx: ThemeDataContext, props: PD.Values<EntitySourceColorThemeParams>): ColorTheme<EntitySourceColorThemeParams> {
let color: LocationColor
const scale = ColorScale.create({ listOrName: props.list, minLabel: 'Start', maxLabel: 'End' })
const { structure } = ctx
if (structure) {
const l = StructureElement.create()
const { models } = structure
const seqToSrcByModelEntity = new Map<string, Int16Array>()
const srcKeySerialMap = new Map<string, number>() // serial no starting from 1
for (let i = 0, il = models.length; i <il; ++i) {
const m = models[i]
if (m.sourceData.kind !== 'mmCIF') continue
const { entity_src_gen, entity_src_nat, pdbx_entity_src_syn } = m.sourceData.data
addSrc(seqToSrcByModelEntity, srcKeySerialMap, i, m, entity_src_gen, entity_src_gen.pdbx_gene_src_gene)
addSrc(seqToSrcByModelEntity, srcKeySerialMap, i, m, entity_src_nat)
addSrc(seqToSrcByModelEntity, srcKeySerialMap, i, m, pdbx_entity_src_syn)
}
scale.setDomain(1, srcKeySerialMap.size)
const scaleColor = scale.color
const getSrcColor = (location: StructureElement) => {
const modelIndex = structure.models.indexOf(location.unit.model)
const entityId = StructureProperties.entity.id(location)
const mK = modelEntityKey(modelIndex, entityId)
const seqToSrc = seqToSrcByModelEntity.get(mK)
if (seqToSrc) {
// minus 1 to convert seqId to array index
return scaleColor(seqToSrc[StructureProperties.residue.label_seq_id(location) - 1])
} else {
return DefaultColor
}
}
color = (location: Location): Color => {
if (StructureElement.isLocation(location)) {
return getSrcColor(location)
} else if (Link.isLocation(location)) {
l.unit = location.aUnit
l.element = location.aUnit.elements[location.aIndex]
return getSrcColor(l)
}
return DefaultColor
}
} else {
color = () => DefaultColor
}
return {
factory: EntitySourceColorTheme,
granularity: 'group',
color,
props,
description: Description,
legend: scale ? scale.legend : undefined
}
}
export const EntitySourceColorThemeProvider: ColorTheme.Provider<EntitySourceColorThemeParams> = {
label: 'Entity Source',
factory: EntitySourceColorTheme,
getParams: getEntitySourceColorThemeParams,
defaultValues: PD.getDefaultValues(EntitySourceColorThemeParams),
isApplicable: (ctx: ThemeDataContext) => !!ctx.structure
}

View File

@@ -1,127 +0,0 @@
/**
* Copyright (c) 2019 mol* contributors, licensed under MIT, See LICENSE file for more info.
*
* @author Alexander Rose <alexander.rose@weirdbyte.de>
*/
import { StructureProperties, StructureElement, Link } from 'mol-model/structure';
import { ColorScale, Color } from 'mol-util/color';
import { Location } from 'mol-model/location';
import { ColorTheme, LocationColor } from '../color';
import { ParamDefinition as PD } from 'mol-util/param-definition'
import { ThemeDataContext } from 'mol-theme/theme';
import { ColorListOptions, ColorListName } from 'mol-util/color/scale';
import { NumberArray } from 'mol-util/type-helpers';
const DefaultColor = Color(0xCCCCCC)
const Description = 'Gives ranges of a polymer chain a color based on the gene (or linker/terminal extension) it originates from.'
export const GeneColorThemeParams = {
list: PD.ColorScale<ColorListName>('RedYellowBlue', ColorListOptions),
}
export type GeneColorThemeParams = typeof GeneColorThemeParams
export function getGeneColorThemeParams(ctx: ThemeDataContext) {
return GeneColorThemeParams // TODO return copy
}
function modelEntityKey(modelIndex: number, entityId: string) {
return `${modelIndex}|${entityId}`
}
function addGene(geneSerialMap: Map<string, number>, geneNames: string[], beg: number, end: number, seqToSrcGen: NumberArray) {
const gene = geneNames.map(s => s.toUpperCase()).sort().join(',')
let geneIndex = 0 // serial no starting from 1
if (gene === '') {
geneIndex = geneSerialMap.size + 1
geneSerialMap.set(`UNKNOWN${geneIndex}`, geneIndex)
} else if (geneSerialMap.has(gene)) {
geneIndex = geneSerialMap.get(gene)!
} else {
geneIndex = geneSerialMap.size + 1
geneSerialMap.set(gene, geneIndex)
}
for (let i = beg, il = end; i <= il; ++i) {
seqToSrcGen[i - 1] = geneIndex
}
}
export function GeneColorTheme(ctx: ThemeDataContext, props: PD.Values<GeneColorThemeParams>): ColorTheme<GeneColorThemeParams> {
let color: LocationColor
const scale = ColorScale.create({ listOrName: props.list, minLabel: 'Start', maxLabel: 'End' })
const { structure } = ctx
if (structure) {
const l = StructureElement.create()
const { models } = structure
const seqToSrcGenByModelEntity = new Map<string, NumberArray>()
const geneSerialMap = new Map<string, number>() // serial no starting from 1
for (let i = 0, il = models.length; i <il; ++i) {
const m = models[i]
if (m.sourceData.kind !== 'mmCIF') continue
const { entity_src_gen } = m.sourceData.data
const { entity_id, pdbx_beg_seq_num, pdbx_end_seq_num, pdbx_gene_src_gene } = entity_src_gen
for (let j = 0, jl = entity_src_gen._rowCount; j < jl; ++j) {
const entityId = entity_id.value(j)
const k = modelEntityKey(i, entityId)
if (!seqToSrcGenByModelEntity.has(k)) {
const entityIndex = m.entities.getEntityIndex(entityId)
const seq = m.sequence.sequences[entityIndex].sequence
const seqLength = seq.sequence.length
const seqToGene = new Int16Array(seqLength)
addGene(geneSerialMap, pdbx_gene_src_gene.value(j), pdbx_beg_seq_num.value(j), pdbx_end_seq_num.value(j), seqToGene)
seqToSrcGenByModelEntity.set(k, seqToGene)
} else {
const seqToGene = seqToSrcGenByModelEntity.get(k)!
addGene(geneSerialMap, pdbx_gene_src_gene.value(j), pdbx_beg_seq_num.value(j), pdbx_end_seq_num.value(j), seqToGene)
seqToSrcGenByModelEntity.set(k, seqToGene)
}
}
}
scale.setDomain(1, geneSerialMap.size)
const scaleColor = scale.color
const getGeneColor = (location: StructureElement) => {
const modelIndex = structure.models.indexOf(location.unit.model)
const entityId = StructureProperties.entity.id(location)
const k = modelEntityKey(modelIndex, entityId)
const seqToGene = seqToSrcGenByModelEntity.get(k)
if (seqToGene) {
// minus 1 to convert seqId to array index
return scaleColor(seqToGene[StructureProperties.residue.label_seq_id(location) - 1])
} else {
return DefaultColor
}
}
color = (location: Location): Color => {
if (StructureElement.isLocation(location)) {
return getGeneColor(location)
} else if (Link.isLocation(location)) {
l.unit = location.aUnit
l.element = location.aUnit.elements[location.aIndex]
return getGeneColor(l)
}
return DefaultColor
}
} else {
color = () => DefaultColor
}
return {
factory: GeneColorTheme,
granularity: 'group',
color,
props,
description: Description,
legend: scale ? scale.legend : undefined
}
}
export const GeneColorThemeProvider: ColorTheme.Provider<GeneColorThemeParams> = {
label: 'Gene',
factory: GeneColorTheme,
getParams: getGeneColorThemeParams,
defaultValues: PD.getDefaultValues(GeneColorThemeParams),
isApplicable: (ctx: ThemeDataContext) => !!ctx.structure
}