Started CIF parser

This commit is contained in:
David Sehnal
2017-09-23 22:48:45 +02:00
parent 18891f5bae
commit c236b5d015
19 changed files with 4110 additions and 129 deletions

2645
examples/1cbs_updated.cif Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -12,7 +12,8 @@
"bundle": "./node_modules/.bin/rollup -c",
"test": "./node_modules/.bin/jest",
"dist": "./node_modules/.bin/uglifyjs build/js/molio.dev.js -cm > dist/molio.js && cp build/js/molio.esm.js dist/molio.esm.js",
"script": "./node_modules/.bin/rollup build/js/src/script.js -e fs -f cjs -o build/js/script.js"
"script": "./node_modules/.bin/rollup build/js/src/script.js -e fs -f cjs -o build/js/script.js",
"runscript": "npm run script && node build\\js\\script.js"
},
"jest": {
"moduleFileExtensions": [

View File

@@ -0,0 +1 @@
// TODO

View File

@@ -0,0 +1 @@
// TODO

View File

@@ -4,6 +4,8 @@
* @author David Sehnal <david.sehnal@gmail.com>
*/
import * as Column from '../common/column'
export interface File {
readonly name?: string,
readonly blocks: ReadonlyArray<Block>
@@ -14,11 +16,11 @@ export function File(blocks: ArrayLike<Block>, name?: string): File {
}
export interface Block {
readonly header?: string,
readonly header: string,
readonly categories: { readonly [name: string]: Category }
}
export function Block(categories: { readonly [name: string]: Category }, header?: string): Block {
export function Block(categories: { readonly [name: string]: Category }, header: string): Block {
return { header, categories };
}
@@ -41,8 +43,6 @@ export const enum ValuePresence {
Unknown = 2
}
export type FieldArray = string[] | number[] | Float32Array | Float64Array | Int8Array | Int16Array | Int32Array | Uint8Array | Uint16Array | Uint32Array
/**
* Implementation note:
* Always implement this as a "plain" object so that the functions are "closures"
@@ -51,8 +51,9 @@ export type FieldArray = string[] | number[] | Float32Array | Float64Array | Int
*/
export interface Field {
readonly isDefined: boolean,
readonly rowCount: number,
str(row: number): string | null,
str(row: number): string,
int(row: number): number,
float(row: number): number,
@@ -61,7 +62,7 @@ export interface Field {
areValuesEqual(rowA: number, rowB: number): boolean,
stringEquals(row: number, value: string | null): boolean,
toStringArray(startRow: number, endRowExclusive: number, ctor: (size: number) => FieldArray): ReadonlyArray<string>,
toIntArray(startRow: number, endRowExclusive: number, ctor: (size: number) => FieldArray): ReadonlyArray<number>,
toFloatArray(startRow: number, endRowExclusive: number, ctor: (size: number) => FieldArray): ReadonlyArray<number>
toStringArray(ctor?: (size: number) => Column.ArrayType, startRow?: number, endRowExclusive?: number): ReadonlyArray<string>,
toIntArray(ctor?: (size: number) => Column.ArrayType, startRow?: number, endRowExclusive?: number): ReadonlyArray<number>,
toFloatArray(ctor?: (size: number) => Column.ArrayType, startRow?: number, endRowExclusive?: number): ReadonlyArray<number>
}

View File

@@ -4,7 +4,8 @@
* @author David Sehnal <david.sehnal@gmail.com>
*/
import * as Data from './data'
import * as Data from './data-model'
import * as Column from '../common/column'
/**
* A schema defines the shape of categories and fields.
@@ -57,12 +58,12 @@ export interface Field<T> {
areValuesEqual(rowA: number, rowB: number): boolean,
stringEquals(row: number, value: string | null): boolean,
/** Converts the selected row range to an array. ctor might or might not be called depedning on the source data format. */
toArray(startRow: number, endRowExclusive: number, ctor: (size: number) => Data.FieldArray): ReadonlyArray<T> | undefined
toArray(ctor?: (size: number) => Column.ArrayType, startRow?: number, endRowExclusive?: number): ReadonlyArray<T> | undefined
}
export namespace Field {
export interface Schema<T> { type: T, ctor: (field: Data.Field) => Field<T>, undefinedField: Data.Field, alias?: string };
export interface Spec { undefinedField?: Data.Field, alias?: string }
export interface Schema<T> { type: T, ctor: (field: Data.Field) => Field<T>, undefinedField: (c: number) => Data.Field, alias?: string };
export interface Spec { undefinedField?: (c: number) => Data.Field, alias?: string }
export function str(spec?: Spec) { return createSchema(spec, Str); }
export function int(spec?: Spec) { return createSchema(spec, Int); }
@@ -76,28 +77,26 @@ export namespace Field {
function Int(field: Data.Field) { return create(field, field.int, field.toIntArray); }
function Float(field: Data.Field) { return create(field, field.float, field.toFloatArray); }
const DefaultUndefined: Data.Field = {
isDefined: false,
str: row => null,
int: row => 0,
float: row => 0,
function defaultUndefined(rowCount: number): Data.Field {
return {
isDefined: false,
rowCount,
str: row => '',
int: row => 0,
float: row => 0,
presence: row => Data.ValuePresence.NotSpecified,
areValuesEqual: (rowA, rowB) => true,
stringEquals: (row, value) => value === null,
presence: row => Data.ValuePresence.NotSpecified,
areValuesEqual: (rowA, rowB) => true,
stringEquals: (row, value) => value === null,
toStringArray: (startRow, endRowExclusive, ctor) => {
const count = endRowExclusive - startRow;
const ret = ctor(count) as any;
for (let i = 0; i < count; i++) { ret[i] = null; }
return ret;
},
toIntArray: (startRow, endRowExclusive, ctor) => new Uint8Array(endRowExclusive - startRow) as any,
toFloatArray: (startRow, endRowExclusive, ctor) => new Float32Array(endRowExclusive - startRow) as any
};
toStringArray: (ctor, s, e) => Column.createArray(rowCount, ctor, s, e).array,
toIntArray: (ctor, s, e) => Column.createArray(rowCount, ctor, s, e).array,
toFloatArray: (ctor, s, e) => Column.createArray(rowCount, ctor, s, e).array
};
}
function createSchema<T>(spec: Spec | undefined, ctor: (field: Data.Field) => Field<T>): Schema<T> {
return { type: 0 as any, ctor, undefinedField: (spec && spec.undefinedField) || DefaultUndefined, alias: spec && spec.alias };
return { type: 0 as any, ctor, undefinedField: (spec && spec.undefinedField) || defaultUndefined, alias: spec && spec.alias };
}
}
@@ -122,7 +121,7 @@ class _Category implements Category<any> { // tslint:disable-line:class-name
Object.defineProperty(this, k, {
get: function() {
if (cache[k]) return cache[k];
const field = _category.getField(s.alias || k) || s.undefinedField;
const field = _category.getField(s.alias || k) || s.undefinedField(_category.rowCount);
cache[k] = s.ctor(field);
return cache[k];
},

View File

View File

View File

View File

@@ -0,0 +1,84 @@
/*
* Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
*/
import * as Column from '../common/column'
import * as Data from './data-model'
import { parseInt as fastParseInt, parseFloat as fastParseFloat } from '../common/text/number-parser'
import StringPool from '../../utils/short-string-pool'
export default function CifTextField(data: string, tokens: ArrayLike<number>, rowCount: number): Data.Field {
const stringPool = StringPool.create();
const str: Data.Field['str'] = row => {
const ret = StringPool.get(stringPool, data.substring(tokens[2 * row], tokens[2 * row + 1]));
if (ret === '.' || ret === '?') return '';
return ret;
};
const int: Data.Field['int'] = row => {
return fastParseInt(data, tokens[2 * row], tokens[2 * row + 1]) || 0;
};
const float: Data.Field['float'] = row => {
return fastParseFloat(data, tokens[2 * row], tokens[2 * row + 1]) || 0;
};
const presence: Data.Field['presence'] = row => {
const s = tokens[2 * row];
if (tokens[2 * row + 1] - s !== 1) return Data.ValuePresence.Present;
const v = data.charCodeAt(s);
if (v === 46 /* . */) return Data.ValuePresence.NotSpecified;
if (v === 63 /* ? */) return Data.ValuePresence.Unknown;
return Data.ValuePresence.Present;
};
return {
isDefined: true,
rowCount,
str,
int,
float,
presence,
areValuesEqual: (rowA, rowB) => {
const aS = tokens[2 * rowA], bS = tokens[2 * rowB];
const len = tokens[2 * rowA + 1] - aS;
if (len !== tokens[2 * rowB + 1] - bS) return false;
for (let i = 0; i < len; i++) {
if (data.charCodeAt(i + aS) !== data.charCodeAt(i + bS)) {
return false;
}
}
return true;
},
stringEquals: (row, value) => {
const s = tokens[2 * row];
if (!value) return presence(row) !== Data.ValuePresence.Present;
const len = value.length;
if (len !== tokens[2 * row + 1] - s) return false;
for (let i = 0; i < len; i++) {
if (data.charCodeAt(i + s) !== value.charCodeAt(i)) return false;
}
return true;
},
toStringArray: (ctor, s, e) => {
const { array, start } = Column.createArray(rowCount, ctor, s, e);
return fillArrayValues(str, array, start);
},
toIntArray: (ctor, s, e) => {
const { array, start } = Column.createArray(rowCount, ctor, s, e);
return fillArrayValues(int, array, start);
},
toFloatArray: (ctor, s, e) => {
const { array, start } = Column.createArray(rowCount, ctor, s, e);
return fillArrayValues(float, array, start);
}
}
}
function fillArrayValues(value: (row: number) => any, target: any[], start: number) {
for (let i = 0, _e = target.length; i < _e; i++) target[i] = value(start + i);
return target;
}

View File

@@ -0,0 +1,594 @@
/*
* Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
*
* @author David Sehnal <david.sehnal@gmail.com>
*/
/**
* mmCIF parser.
*
* Trying to be as close to the specification http://www.iucr.org/resources/cif/spec/version1.1/cifsyntax
*
* Differences I'm aware of:
* - Except keywords (data_, loop_, save_) everything is case sensitive.
* - The tokens . and ? are treated the same as the values '.' and '?'.
* - Ignores \ in the multiline values:
* ;abc\
* efg
* ;
* should have the value 'abcefg' but will have the value 'abc\\nefg' instead.
* Post processing of this is left to the consumer of the data.
* - Similarly, things like punctuation (\', ..) are left to be processed by the user if needed.
*
*/
import * as Data from './data-model'
import Field from './text-field'
import { Tokens } from '../common/text/tokenizer'
import Result from '../result'
/**
* Types of supported mmCIF tokens.
*/
const enum CifTokenType {
Data = 0,
Save = 1,
Loop = 2,
Value = 3,
ColumnName = 4,
Comment = 5,
End = 6
}
interface TokenizerState {
data: string;
position: number;
length: number;
isEscaped: boolean;
currentLineNumber: number;
currentTokenType: CifTokenType;
currentTokenStart: number;
currentTokenEnd: number;
}
/**
* Eat everything until a whitespace/newline occurs.
*/
function eatValue(state: TokenizerState) {
while (state.position < state.length) {
switch (state.data.charCodeAt(state.position)) {
case 9: // \t
case 10: // \n
case 13: // \r
case 32: // ' '
state.currentTokenEnd = state.position;
return;
default:
++state.position;
break;
}
}
state.currentTokenEnd = state.position;
}
/**
* Eats an escaped values. Handles the "degenerate" cases as well.
*
* "Degenerate" cases:
* - 'xx'x' => xx'x
* - 'xxxNEWLINE => 'xxx
*
*/
function eatEscaped(state: TokenizerState, esc: number) {
let next: number, c: number;
++state.position;
while (state.position < state.length) {
c = state.data.charCodeAt(state.position);
if (c === esc) {
next = state.data.charCodeAt(state.position + 1);
switch (next) {
case 9: // \t
case 10: // \n
case 13: // \r
case 32: // ' '
// get rid of the quotes.
state.currentTokenStart++;
state.currentTokenEnd = state.position;
state.isEscaped = true;
++state.position;
return;
default:
if (next === void 0) { // = "end of stream"
// get rid of the quotes.
state.currentTokenStart++;
state.currentTokenEnd = state.position;
state.isEscaped = true;
++state.position;
return;
}
++state.position;
break;
}
} else {
// handle 'xxxNEWLINE => 'xxx
if (c === 10 || c === 13) {
state.currentTokenEnd = state.position;
return;
}
++state.position;
}
}
state.currentTokenEnd = state.position;
}
/**
* Eats a multiline token of the form NL;....NL;
*/
function eatMultiline(state: TokenizerState) {
let prev = 59, pos = state.position + 1, c: number;
while (pos < state.length) {
c = state.data.charCodeAt(pos);
if (c === 59 && (prev === 10 || prev === 13)) { // ;, \n \r
state.position = pos + 1;
// get rid of the ;
state.currentTokenStart++;
// remove trailing newlines
pos--;
c = state.data.charCodeAt(pos);
while (c === 10 || c === 13) {
pos--;
c = state.data.charCodeAt(pos);
}
state.currentTokenEnd = pos + 1;
state.isEscaped = true;
return;
} else {
// handle line numbers
if (c === 13) { // \r
state.currentLineNumber++;
} else if (c === 10 && prev !== 13) { // \r\n
state.currentLineNumber++;
}
prev = c;
++pos;
}
}
state.position = pos;
return prev;
}
/**
* Skips until \n or \r occurs -- therefore the newlines get handled by the "skipWhitespace" function.
*/
function skipCommentLine(state: TokenizerState) {
while (state.position < state.length) {
let c = state.data.charCodeAt(state.position);
if (c === 10 || c === 13) {
return;
}
++state.position;
}
}
/**
* Skips all the whitespace - space, tab, newline, CR
* Handles incrementing line count.
*/
function skipWhitespace(state: TokenizerState): number {
let prev = 10;
while (state.position < state.length) {
let c = state.data.charCodeAt(state.position);
switch (c) {
case 9: // '\t'
case 32: // ' '
prev = c;
++state.position;
break;
case 10: // \n
// handle \r\n
if (prev !== 13) {
++state.currentLineNumber;
}
prev = c;
++state.position;
break;
case 13: // \r
prev = c;
++state.position;
++state.currentLineNumber;
break;
default:
return prev;
}
}
return prev;
}
function isData(state: TokenizerState): boolean {
// here we already assume the 5th char is _ and that the length >= 5
// d/D
let c = state.data.charCodeAt(state.currentTokenStart);
if (c !== 68 && c !== 100) return false;
// a/A
c = state.data.charCodeAt(state.currentTokenStart + 1);
if (c !== 65 && c !== 97) return false;
// t/t
c = state.data.charCodeAt(state.currentTokenStart + 2);
if (c !== 84 && c !== 116) return false;
// a/A
c = state.data.charCodeAt(state.currentTokenStart + 3);
if (c !== 65 && c !== 97) return false;
return true;
}
function isSave(state: TokenizerState): boolean {
// here we already assume the 5th char is _ and that the length >= 5
// s/S
let c = state.data.charCodeAt(state.currentTokenStart);
if (c !== 83 && c !== 115) return false;
// a/A
c = state.data.charCodeAt(state.currentTokenStart + 1);
if (c !== 65 && c !== 97) return false;
// v/V
c = state.data.charCodeAt(state.currentTokenStart + 2);
if (c !== 86 && c !== 118) return false;
// e/E
c = state.data.charCodeAt(state.currentTokenStart + 3);
if (c !== 69 && c !== 101) return false;
return true;
}
function isLoop(state: TokenizerState): boolean {
// here we already assume the 5th char is _ and that the length >= 5
if (state.currentTokenEnd - state.currentTokenStart !== 5) return false;
// l/L
let c = state.data.charCodeAt(state.currentTokenStart);
if (c !== 76 && c !== 108) return false;
// o/O
c = state.data.charCodeAt(state.currentTokenStart + 1);
if (c !== 79 && c !== 111) return false;
// o/O
c = state.data.charCodeAt(state.currentTokenStart + 2);
if (c !== 79 && c !== 111) return false;
// p/P
c = state.data.charCodeAt(state.currentTokenStart + 3);
if (c !== 80 && c !== 112) return false;
return true;
}
/**
* Checks if the current token shares the namespace with string at <start,end).
*/
function isNamespace(state: TokenizerState, start: number, end: number): boolean {
let i: number,
nsLen = end - start,
offset = state.currentTokenStart - start,
tokenLen = state.currentTokenEnd - state.currentTokenStart;
if (tokenLen < nsLen) return false;
for (i = start; i < end; ++i) {
if (state.data.charCodeAt(i) !== state.data.charCodeAt(i + offset)) return false;
}
if (nsLen === tokenLen) return true;
if (state.data.charCodeAt(i + offset) === 46) { // .
return true;
}
return false;
}
/**
* Returns the index of '.' in the current token. If no '.' is present, returns currentTokenEnd.
*/
function getNamespaceEnd(state: TokenizerState): number {
let i: number;
for (i = state.currentTokenStart; i < state.currentTokenEnd; ++i) {
if (state.data.charCodeAt(i) === 46) return i;
}
return i;
}
/**
* Get the namespace string. endIndex is obtained by the getNamespaceEnd() function.
*/
function getNamespace(state: TokenizerState, endIndex: number) {
return state.data.substring(state.currentTokenStart, endIndex);
}
/**
* String representation of the current token.
*/
function getTokenString(state: TokenizerState) {
return state.data.substring(state.currentTokenStart, state.currentTokenEnd);
}
/**
* Move to the next token.
*/
function moveNextInternal(state: TokenizerState) {
let prev = skipWhitespace(state);
if (state.position >= state.length) {
state.currentTokenType = CifTokenType.End;
return;
}
state.currentTokenStart = state.position;
state.currentTokenEnd = state.position;
state.isEscaped = false;
let c = state.data.charCodeAt(state.position);
switch (c) {
case 35: // #, comment
skipCommentLine(state);
state.currentTokenType = CifTokenType.Comment;
break;
case 34: // ", escaped value
case 39: // ', escaped value
eatEscaped(state, c);
state.currentTokenType = CifTokenType.Value;
break;
case 59: // ;, possible multiline value
// multiline value must start at the beginning of the line.
if (prev === 10 || prev === 13) { // /n or /r
eatMultiline(state);
} else {
eatValue(state);
}
state.currentTokenType = CifTokenType.Value;
break;
default:
eatValue(state);
// escaped is always Value
if (state.isEscaped) {
state.currentTokenType = CifTokenType.Value;
// _ always means column name
} else if (state.data.charCodeAt(state.currentTokenStart) === 95) { // _
state.currentTokenType = CifTokenType.ColumnName;
// 5th char needs to be _ for data_ or loop_
} else if (state.currentTokenEnd - state.currentTokenStart >= 5 && state.data.charCodeAt(state.currentTokenStart + 4) === 95) {
if (isData(state)) state.currentTokenType = CifTokenType.Data;
else if (isSave(state)) state.currentTokenType = CifTokenType.Save;
else if (isLoop(state)) state.currentTokenType = CifTokenType.Loop;
else state.currentTokenType = CifTokenType.Value;
// all other tests failed, we are at Value token.
} else {
state.currentTokenType = CifTokenType.Value;
}
break;
}
}
/**
* Moves to the next non-comment token.
*/
function moveNext(state: TokenizerState) {
moveNextInternal(state);
while (state.currentTokenType === CifTokenType.Comment) moveNextInternal(state);
}
function createTokenizer(data: string): TokenizerState {
return {
data,
length: data.length,
position: 0,
currentTokenStart: 0,
currentTokenEnd: 0,
currentTokenType: CifTokenType.End,
currentLineNumber: 1,
isEscaped: false
};
}
/**
* Helper shape of the category result.
*/
interface CifCategoryResult {
hasError: boolean;
errorLine: number;
errorMessage: string;
}
/**
* Reads a category containing a single row.
*/
function handleSingle(tokenizer: TokenizerState, categories: { [name: string]: Data.Category }): CifCategoryResult {
const nsStart = tokenizer.currentTokenStart, nsEnd = getNamespaceEnd(tokenizer);
const name = getNamespace(tokenizer, nsEnd);
const fields = Object.create(null);
let readingNames = true;
while (readingNames) {
if (tokenizer.currentTokenType !== CifTokenType.ColumnName || !isNamespace(tokenizer, nsStart, nsEnd)) {
readingNames = false;
break;
}
const fieldName = getTokenString(tokenizer).substring(name.length + 1);
moveNext(tokenizer);
if (tokenizer.currentTokenType as any !== CifTokenType.Value) {
return {
hasError: true,
errorLine: tokenizer.currentLineNumber,
errorMessage: 'Expected value.'
}
}
fields[fieldName] = Field(tokenizer.data, [tokenizer.currentTokenStart, tokenizer.currentTokenEnd], 1);
moveNext(tokenizer);
}
categories[name] = Data.Category(1, fields);
return {
hasError: false,
errorLine: 0,
errorMessage: ''
};
}
/**
* Reads a loop.
*/
function handleLoop(tokenizer: TokenizerState, categories: { [name: string]: Data.Category }): CifCategoryResult {
const loopLine = tokenizer.currentLineNumber;
moveNext(tokenizer);
const name = getNamespace(tokenizer, getNamespaceEnd(tokenizer));
const fieldNames: string[] = [];
while (tokenizer.currentTokenType === CifTokenType.ColumnName) {
fieldNames[fieldNames.length] = getTokenString(tokenizer).substring(name.length + 1);
moveNext(tokenizer);
}
const rowCountEstimate = name === '_atom_site' ? (tokenizer.data.length / 100) | 0 : 32;
const tokens: Tokens[] = [];
const fieldCount = fieldNames.length;
for (let i = 0; i < fieldCount; i++) tokens[i] = Tokens.create(rowCountEstimate);
let tokenCount = 0;
while (tokenizer.currentTokenType === CifTokenType.Value) {
Tokens.add(tokens[(tokenCount++) % fieldCount], tokenizer.currentTokenStart, tokenizer.currentTokenEnd);
moveNext(tokenizer);
}
if (tokenCount % fieldCount !== 0) {
return {
hasError: true,
errorLine: tokenizer.currentLineNumber,
errorMessage: 'The number of values for loop starting at line ' + loopLine + ' is not a multiple of the number of columns.'
};
}
const rowCount = (tokenCount / fieldCount) | 0;
const fields = Object.create(null);
for (let i = 0; i < fieldCount; i++) {
fields[fieldNames[i]] = Field(tokenizer.data, tokens[i].indices, rowCount);
}
categories[name] = Data.Category(rowCount, fields);
return {
hasError: false,
errorLine: 0,
errorMessage: ''
};
}
/**
* Creates an error result.
*/
function error(line: number, message: string) {
return Result.error<Data.File>(message, line);
}
/**
* Creates a data result.
*/
function result(data: Data.File) {
return Result.success(data);
}
/**
* Parses an mmCIF file.
*
* @returns CifParserResult wrapper of the result.
*/
function parseInternal(data: string): Result<Data.File> {
const dataBlocks: Data.Block[] = [];
const tokenizer = createTokenizer(data);
let blockHeader: string = '';
let blockCategories = Object.create(null);
//saveFrame = new DataBlock(data, "empty"),
//inSaveFrame = false,
//blockSaveFrames: any;
moveNext(tokenizer);
while (tokenizer.currentTokenType !== CifTokenType.End) {
let token = tokenizer.currentTokenType;
// Data block
if (token === CifTokenType.Data) {
// if (inSaveFrame) {
// return error(tokenizer.currentLineNumber, "Unexpected data block inside a save frame.");
// }
if (Object.keys(blockCategories).length > 0) {
dataBlocks.push(Data.Block(blockCategories, blockHeader));
}
blockHeader = data.substring(tokenizer.currentTokenStart + 5, tokenizer.currentTokenEnd);
blockCategories = Object.create(null);
moveNext(tokenizer);
}
/* // Save frame
} else if (token === CifTokenType.Save) {
id = data.substring(tokenizer.currentTokenStart + 5, tokenizer.currentTokenEnd);
if (id.length === 0) {
if (saveFrame.categories.length > 0) {
blockSaveFrames = blockCategories.additionalData["saveFrames"];
if (!blockSaveFrames) {
blockSaveFrames = [];
blockCategories.additionalData["saveFrames"] = blockSaveFrames;
}
blockSaveFrames[blockSaveFrames.length] = saveFrame;
}
inSaveFrame = false;
} else {
if (inSaveFrame) {
return error(tokenizer.currentLineNumber, "Save frames cannot be nested.");
}
inSaveFrame = true;
saveFrame = new DataBlock(data, id);
}
moveNext(tokenizer);
// Loop
} */ else if (token === CifTokenType.Loop) {
const cat = handleLoop(tokenizer, /*inSaveFrame ? saveFrame : */ blockCategories);
if (cat.hasError) {
return error(cat.errorLine, cat.errorMessage);
}
// Single row
} else if (token === CifTokenType.ColumnName) {
const cat = handleSingle(tokenizer, /*inSaveFrame ? saveFrame :*/ blockCategories);
if (cat.hasError) {
return error(cat.errorLine, cat.errorMessage);
}
// Out of options
} else {
return error(tokenizer.currentLineNumber, 'Unexpected token. Expected data_, loop_, or data name.');
}
}
// Check if the latest save frame was closed.
// if (inSaveFrame) {
// return error(tokenizer.currentLineNumber, "Unfinished save frame (`" + saveFrame.header + "`).");
// }
if (Object.keys(blockCategories).length > 0) {
dataBlocks.push(Data.Block(blockCategories, blockHeader));
}
return result(Data.File(dataBlocks));
}
export default function parse(data: string) {
return parseInternal(data);
}

View File

@@ -15,7 +15,7 @@ export namespace ColumnType {
}
export interface Column<T> {
readonly isColumnDefined: boolean,
readonly isDefined: boolean,
readonly rowCount: number,
value(row: number): T,
toArray(ctor?: (size: number) => ArrayType, startRow?: number, endRowExclusive?: number): ReadonlyArray<T>
@@ -24,7 +24,7 @@ export interface Column<T> {
export function UndefinedColumn<T extends ColumnType>(rowCount: number, type: T): Column<T['@type']> {
const value: Column<T['@type']>['value'] = type.kind === 'str' ? row => '' : row => 0;
return {
isColumnDefined: false,
isDefined: false,
rowCount,
value,
toArray(ctor, s, e) {

View File

@@ -21,7 +21,7 @@ export default function FixedColumnProvider(info: FixedColumnInfo) {
}
}
function getArrayValues(value: (row: number) => any, target: any[], start: number) {
function fillArrayValues(value: (row: number) => any, target: any[], start: number) {
for (let i = 0, _e = target.length; i < _e; i++) target[i] = value(start + i);
return target;
}
@@ -32,29 +32,33 @@ export function FixedColumn<T extends ColumnType>(info: FixedColumnInfo, offset:
const pool = kind === 'pooled-str' ? StringPool.create() : void 0;
const value: Column<T['@type']>['value'] = kind === 'str' ? row => {
let s = lines[2 * row] + offset, e = s + width, le = lines[2 * row + 1];
let s = lines[2 * row] + offset, le = lines[2 * row + 1];
if (s >= le) return '';
let e = s + width;
if (e > le) e = le;
return trimStr(data, s, e);
} : kind === 'pooled-str' ? row => {
let s = lines[2 * row] + offset, e = s + width, le = lines[2 * row + 1];
let s = lines[2 * row] + offset, le = lines[2 * row + 1];
if (s >= le) return '';
let e = s + width;
if (e > le) e = le;
return StringPool.get(pool!, trimStr(data, s, e));
} : kind === 'int' ? row => {
const s = lines[2 * row] + offset, e = s + width;
return parseIntSkipLeadingWhitespace(data, s, e);
const s = lines[2 * row] + offset;
if (s > lines[2 * row + 1]) return 0;
return parseIntSkipLeadingWhitespace(data, s, s + width);
} : row => {
const s = lines[2 * row] + offset, e = s + width;
return parseFloatSkipLeadingWhitespace(data, s, e);
}
const s = lines[2 * row] + offset;
if (s > lines[2 * row + 1]) return 0;
return parseFloatSkipLeadingWhitespace(data, s, s + width);
};
return {
isColumnDefined: true,
isDefined: true,
rowCount,
value,
toArray(ctor, s, e) {
const { array, start } = createArray(rowCount, ctor, s, e);
return getArrayValues(value, array, start);
return fillArrayValues(value, array, start);
}
};
}

View File

@@ -4,50 +4,24 @@
* @author David Sehnal <david.sehnal@gmail.com>
*/
import * as Data from '../cif/data'
import * as Data from '../cif/data-model'
import TextField from '../cif/text-field'
import * as Schema from '../cif/schema'
function Field(values: any[]): Data.Field {
return {
isDefined: true,
str: row => '' + values[row],
int: row => +values[row] || 0,
float: row => +values[row] || 0,
const columnData = `123abc`;
presence: row => Data.ValuePresence.Present,
areValuesEqual: (rowA, rowB) => values[rowA] === values[rowB],
stringEquals: (row, value) => '' + values[row] === value,
toStringArray: (startRow, endRowExclusive, ctor) => {
const count = endRowExclusive - startRow;
const ret = ctor(count) as any;
for (let i = 0; i < count; i++) { ret[i] = values[startRow + i]; }
return ret;
},
toIntArray: (startRow, endRowExclusive, ctor) => {
const count = endRowExclusive - startRow;
const ret = ctor(count) as any;
for (let i = 0; i < count; i++) { ret[i] = +values[startRow + i]; }
return ret;
},
toFloatArray: (startRow, endRowExclusive, ctor) => {
const count = endRowExclusive - startRow;
const ret = ctor(count) as any;
for (let i = 0; i < count; i++) { ret[i] = +values[startRow + i]; }
return ret;
}
}
}
const intField = TextField(columnData, [0, 1, 1, 2, 2, 3], 3);
const strField = TextField(columnData, [3, 4, 4, 5, 5, 6], 3);
const testBlock = Data.Block({
'atoms': Data.Category(2, {
x: Field([1, 2]),
name: Field(['C', 'O'])
'atoms': Data.Category(3, {
x: intField,
name: strField
})
});
}, 'test');
namespace TestSchema {
export const atoms = { x: Schema.Field.float(), name: Schema.Field.str() }
export const atoms = { x: Schema.Field.int(), name: Schema.Field.str() }
export const schema = { atoms }
}
@@ -56,13 +30,14 @@ describe('schema', () => {
it('property access', () => {
const { x, name } = data.atoms;
expect(x.value(0)).toBe(1);
expect(name.value(1)).toBe('O');
expect(name.value(1)).toBe('b');
});
it('toArray', () => {
const ret = data.atoms.x.toArray(0, 2, (s) => new Int32Array(s))!;
expect(ret.length).toBe(2);
const ret = data.atoms.x.toArray(s => new Int32Array(s))!;
expect(ret.length).toBe(3);
expect(ret[0]).toBe(1);
expect(ret[1]).toBe(2);
expect(ret[2]).toBe(3);
})
});

View File

@@ -5,8 +5,8 @@
* @author David Sehnal <david.sehnal@gmail.com>
*/
import FixedColumn from '../text/column/fixed'
import { ColumnType } from '../../common/column'
import FixedColumn from '../common/text/column/fixed'
import { ColumnType } from '../common/column'
const lines = [
'1.123 abc',

View File

@@ -8,63 +8,92 @@
import * as fs from 'fs'
import Gro from './reader/gro/parser'
import CIF from './reader/cif/text-parser'
//const file = '1crn.gro'
// const file = '1crn.gro'
// const file = 'water.gro'
// const file = 'test.gro'
const file = 'md_1u19_trj.gro'
fs.readFile(`./examples/${file}`, 'utf8', function (err,input) {
if (err) {
return console.log(err);
}
// console.log(data);
export function _gro() {
fs.readFile(`./examples/${file}`, 'utf8', function (err, input) {
if (err) {
return console.log(err);
}
// console.log(data);
console.time('parse')
const parsed = Gro(input)
console.timeEnd('parse')
if (parsed.isError) {
console.log(parsed)
return;
}
console.time('parse')
const parsed = Gro(input)
console.timeEnd('parse')
if (parsed.isError) {
console.log(parsed)
return;
}
const groFile = parsed.result
const groFile = parsed.result
console.log('structure count: ', groFile.structures.length);
console.log('structure count: ', groFile.structures.length);
const data = groFile.structures[0];
const data = groFile.structures[0];
// const header = groFile.blocks[0].getCategory('header')
const { header, atoms } = data;
console.log(JSON.stringify(header, null, 2));
console.log('number of atoms:', atoms.count);
// const header = groFile.blocks[0].getCategory('header')
const { header, atoms } = data;
console.log(JSON.stringify(header, null, 2));
console.log('number of atoms:', atoms.count);
console.log(`'${atoms.residueNumber.value(1)}'`)
console.log(`'${atoms.residueName.value(1)}'`)
console.log(`'${atoms.atomName.value(1)}'`)
console.log(atoms.z.value(1))
console.log(`'${atoms.z.value(1)}'`)
console.log(`'${atoms.residueNumber.value(1)}'`)
console.log(`'${atoms.residueName.value(1)}'`)
console.log(`'${atoms.atomName.value(1)}'`)
console.log(atoms.z.value(1))
console.log(`'${atoms.z.value(1)}'`)
const n = atoms.count;
console.log('rowCount', n)
const n = atoms.count;
console.log('rowCount', n)
console.time('getFloatArray x')
const x = atoms.x.toArray(x => new Float32Array(x))!
console.timeEnd('getFloatArray x')
console.log(x.length, x[0], x[x.length-1])
console.time('getFloatArray x')
const x = atoms.x.toArray(x => new Float32Array(x))!
console.timeEnd('getFloatArray x')
console.log(x.length, x[0], x[x.length - 1])
console.time('getFloatArray y')
const y = atoms.y.toArray(x => new Float32Array(x))!
console.timeEnd('getFloatArray y')
console.log(y.length, y[0], y[y.length-1])
console.time('getFloatArray y')
const y = atoms.y.toArray(x => new Float32Array(x))!
console.timeEnd('getFloatArray y')
console.log(y.length, y[0], y[y.length - 1])
console.time('getFloatArray z')
const z = atoms.z.toArray(x => new Float32Array(x))!
console.timeEnd('getFloatArray z')
console.log(z.length, z[0], z[z.length-1])
console.time('getFloatArray z')
const z = atoms.z.toArray(x => new Float32Array(x))!
console.timeEnd('getFloatArray z')
console.log(z.length, z[0], z[z.length - 1])
console.time('getIntArray residueNumber')
const residueNumber = atoms.residueNumber.toArray(x => new Int32Array(x))!
console.timeEnd('getIntArray residueNumber')
console.log(residueNumber.length, residueNumber[0], residueNumber[residueNumber.length-1])
});
console.time('getIntArray residueNumber')
const residueNumber = atoms.residueNumber.toArray(x => new Int32Array(x))!
console.timeEnd('getIntArray residueNumber')
console.log(residueNumber.length, residueNumber[0], residueNumber[residueNumber.length - 1])
});
}
export function _cif() {
const path = `./examples/1cbs_updated.cif`;
//const path = 'c:/test/quick/3j3q.cif';
fs.readFile(path, 'utf8', function (err, input) {
if (err) {
return console.log(err);
}
console.time('parseCIF');
const parsed = CIF(input);
console.timeEnd('parseCIF');
if (parsed.isError) {
console.log(parsed);
return;
}
const data = parsed.result.blocks[0];
const atom_site = data.categories._atom_site;
console.log(atom_site.getField('Cartn_x')!.float(0));
console.log(atom_site.getField('label_atom_id')!.toStringArray());
});
}
_cif();

227
src/utils/msgpack/decode.ts Normal file
View File

@@ -0,0 +1,227 @@
/*
* Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
*
* Adapted from https://github.com/rcsb/mmtf-javascript
* @author Alexander Rose <alexander.rose@weirdbyte.de>
* @author David Sehnal <david.sehnal@gmail.com>
*/
import { utf8Read } from '../utf8'
export default function decode(buffer: Uint8Array) {
return parse({ buffer, offset: 0, dataView: new DataView(buffer.buffer) });
}
// Loosely based on
// The MIT License (MIT)
// Copyright (c) 2013 Tim Caswell <tim@creationix.com>
// https://github.com/creationix/msgpack-js
interface State {
buffer: Uint8Array,
offset: number,
dataView: DataView
}
/**
* decode all key-value pairs of a map into an object
* @param {Integer} length - number of key-value pairs
* @return {Object} decoded map
*/
function map(state: State, length: number) {
let value: any = {};
for (let i = 0; i < length; i++) {
let key = parse(state);
value[key] = parse(state);
}
return value;
}
/**
* decode binary array
* @param {Integer} length - number of elements in the array
* @return {Uint8Array} decoded array
*/
function bin(state: State, length: number) {
// This approach to binary parsing wastes a bit of memory to trade for speed compared to:
//
// let value = buffer.subarray(offset, offset + length); //new Uint8Array(buffer.buffer, offset, length);
//
// It turns out that using the view created by subarray probably uses DataView
// in the background, which causes the element access to be several times slower
// than creating the new byte array.
let value = new Uint8Array(length);
let o = state.offset;
for (let i = 0; i < length; i++) value[i] = state.buffer[i + o];
state.offset += length;
return value;
}
/**
* decode string
* @param {Integer} length - number string characters
* @return {String} decoded string
*/
function str(state: State, length: number) {
let value = utf8Read(state.buffer, state.offset, length);
state.offset += length;
return value;
}
/**
* decode array
* @param {Integer} length - number of array elements
* @return {Array} decoded array
*/
function array(state: State, length: number) {
let value: any[] = new Array(length);
for (let i = 0; i < length; i++) {
value[i] = parse(state);
}
return value;
}
/**
* recursively parse the MessagePack data
* @return {Object|Array|String|Number|Boolean|null} decoded MessagePack data
*/
function parse(state: State) {
let type = state.buffer[state.offset];
let value: any, length: number;
// Positive FixInt
if ((type & 0x80) === 0x00) {
state.offset++;
return type;
}
// FixMap
if ((type & 0xf0) === 0x80) {
length = type & 0x0f;
state.offset++;
return map(state, length);
}
// FixArray
if ((type & 0xf0) === 0x90) {
length = type & 0x0f;
state.offset++;
return array(state, length);
}
// FixStr
if ((type & 0xe0) === 0xa0) {
length = type & 0x1f;
state.offset++;
return str(state, length);
}
// Negative FixInt
if ((type & 0xe0) === 0xe0) {
value = state.dataView.getInt8(state.offset);
state.offset++;
return value;
}
switch (type) {
// nil
case 0xc0:
state.offset++;
return null;
// false
case 0xc2:
state.offset++;
return false;
// true
case 0xc3:
state.offset++;
return true;
// bin 8
case 0xc4:
length = state.dataView.getUint8(state.offset + 1);
state.offset += 2;
return bin(state, length);
// bin 16
case 0xc5:
length = state.dataView.getUint16(state.offset + 1);
state.offset += 3;
return bin(state, length);
// bin 32
case 0xc6:
length = state.dataView.getUint32(state.offset + 1);
state.offset += 5;
return bin(state, length);
// float 32
case 0xca:
value = state.dataView.getFloat32(state.offset + 1);
state.offset += 5;
return value;
// float 64
case 0xcb:
value = state.dataView.getFloat64(state.offset + 1);
state.offset += 9;
return value;
// uint8
case 0xcc:
value = state.buffer[state.offset + 1];
state.offset += 2;
return value;
// uint 16
case 0xcd:
value = state.dataView.getUint16(state.offset + 1);
state.offset += 3;
return value;
// uint 32
case 0xce:
value = state.dataView.getUint32(state.offset + 1);
state.offset += 5;
return value;
// int 8
case 0xd0:
value = state.dataView.getInt8(state.offset + 1);
state.offset += 2;
return value;
// int 16
case 0xd1:
value = state.dataView.getInt16(state.offset + 1);
state.offset += 3;
return value;
// int 32
case 0xd2:
value = state.dataView.getInt32(state.offset + 1);
state.offset += 5;
return value;
// str 8
case 0xd9:
length = state.dataView.getUint8(state.offset + 1);
state.offset += 2;
return str(state, length);
// str 16
case 0xda:
length = state.dataView.getUint16(state.offset + 1);
state.offset += 3;
return str(state, length);
// str 32
case 0xdb:
length = state.dataView.getUint32(state.offset + 1);
state.offset += 5;
return str(state, length);
// array 16
case 0xdc:
length = state.dataView.getUint16(state.offset + 1);
state.offset += 3;
return array(state, length);
// array 32
case 0xdd:
length = state.dataView.getUint32(state.offset + 1);
state.offset += 5;
return array(state, length);
// map 16:
case 0xde:
length = state.dataView.getUint16(state.offset + 1);
state.offset += 3;
return map(state, length);
// map 32
case 0xdf:
length = state.dataView.getUint32(state.offset + 1);
state.offset += 5;
return map(state, length);
}
throw new Error('Unknown type 0x' + type.toString(16));
}

296
src/utils/msgpack/encode.ts Normal file
View File

@@ -0,0 +1,296 @@
/*
* Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
*
* Adapted from https://github.com/rcsb/mmtf-javascript
* @author Alexander Rose <alexander.rose@weirdbyte.de>
* @author David Sehnal <david.sehnal@gmail.com>
*/
import { utf8ByteCount, utf8Write } from '../utf8'
export default function encode(value: any) {
const buffer = new ArrayBuffer(encodedSize(value));
const view = new DataView(buffer);
const bytes = new Uint8Array(buffer);
encodeInternal(value, view, bytes, 0);
return bytes;
}
function encodedSize(value: any) {
let type = typeof value;
// Raw Bytes
if (type === 'string') {
let length = utf8ByteCount(value);
if (length < 0x20) {
return 1 + length;
}
if (length < 0x100) {
return 2 + length;
}
if (length < 0x10000) {
return 3 + length;
}
if (length < 0x100000000) {
return 5 + length;
}
}
if (value instanceof Uint8Array) {
let length = value.byteLength;
if (length < 0x100) {
return 2 + length;
}
if (length < 0x10000) {
return 3 + length;
}
if (length < 0x100000000) {
return 5 + length;
}
}
if (type === 'number') {
// Floating Point
// double
if (Math.floor(value) !== value) return 9;
// Integers
if (value >= 0) {
// positive fixnum
if (value < 0x80) return 1;
// uint 8
if (value < 0x100) return 2;
// uint 16
if (value < 0x10000) return 3;
// uint 32
if (value < 0x100000000) return 5;
throw new Error('Number too big 0x' + value.toString(16));
}
// negative fixnum
if (value >= -0x20) return 1;
// int 8
if (value >= -0x80) return 2;
// int 16
if (value >= -0x8000) return 3;
// int 32
if (value >= -0x80000000) return 5;
throw new Error('Number too small -0x' + value.toString(16).substr(1));
}
// Boolean, null
if (type === 'boolean' || value === null || value === void 0) return 1;
// Container Types
if (type === 'object') {
let length: number, size = 0;
if (Array.isArray(value)) {
length = value.length;
for (let i = 0; i < length; i++) {
size += encodedSize(value[i]);
}
}
else {
let keys = Object.keys(value);
length = keys.length;
for (let i = 0; i < length; i++) {
let key = keys[i];
size += encodedSize(key) + encodedSize(value[key]);
}
}
if (length < 0x10) {
return 1 + size;
}
if (length < 0x10000) {
return 3 + size;
}
if (length < 0x100000000) {
return 5 + size;
}
throw new Error('Array or object too long 0x' + length.toString(16));
}
throw new Error('Unknown type ' + type);
}
function encodeInternal(value: any, view: DataView, bytes: Uint8Array, offset: number) {
let type = typeof value;
// Strings Bytes
if (type === 'string') {
let length = utf8ByteCount(value);
// fix str
if (length < 0x20) {
view.setUint8(offset, length | 0xa0);
utf8Write(bytes, offset + 1, value);
return 1 + length;
}
// str 8
if (length < 0x100) {
view.setUint8(offset, 0xd9);
view.setUint8(offset + 1, length);
utf8Write(bytes, offset + 2, value);
return 2 + length;
}
// str 16
if (length < 0x10000) {
view.setUint8(offset, 0xda);
view.setUint16(offset + 1, length);
utf8Write(bytes, offset + 3, value);
return 3 + length;
}
// str 32
if (length < 0x100000000) {
view.setUint8(offset, 0xdb);
view.setUint32(offset + 1, length);
utf8Write(bytes, offset + 5, value);
return 5 + length;
}
}
if (value instanceof Uint8Array) {
let length = value.byteLength;
let bytes = new Uint8Array(view.buffer);
// bin 8
if (length < 0x100) {
view.setUint8(offset, 0xc4);
view.setUint8(offset + 1, length);
bytes.set(value, offset + 2);
return 2 + length;
}
// bin 16
if (length < 0x10000) {
view.setUint8(offset, 0xc5);
view.setUint16(offset + 1, length);
bytes.set(value, offset + 3);
return 3 + length;
}
// bin 32
if (length < 0x100000000) {
view.setUint8(offset, 0xc6);
view.setUint32(offset + 1, length);
bytes.set(value, offset + 5);
return 5 + length;
}
}
if (type === 'number') {
if (!isFinite(value)) {
throw new Error('Number not finite: ' + value);
}
// Floating point
if (Math.floor(value) !== value) {
view.setUint8(offset, 0xcb);
view.setFloat64(offset + 1, value);
return 9;
}
// Integers
if (value >= 0) {
// positive fixnum
if (value < 0x80) {
view.setUint8(offset, value);
return 1;
}
// uint 8
if (value < 0x100) {
view.setUint8(offset, 0xcc);
view.setUint8(offset + 1, value);
return 2;
}
// uint 16
if (value < 0x10000) {
view.setUint8(offset, 0xcd);
view.setUint16(offset + 1, value);
return 3;
}
// uint 32
if (value < 0x100000000) {
view.setUint8(offset, 0xce);
view.setUint32(offset + 1, value);
return 5;
}
throw new Error('Number too big 0x' + value.toString(16));
}
// negative fixnum
if (value >= -0x20) {
view.setInt8(offset, value);
return 1;
}
// int 8
if (value >= -0x80) {
view.setUint8(offset, 0xd0);
view.setInt8(offset + 1, value);
return 2;
}
// int 16
if (value >= -0x8000) {
view.setUint8(offset, 0xd1);
view.setInt16(offset + 1, value);
return 3;
}
// int 32
if (value >= -0x80000000) {
view.setUint8(offset, 0xd2);
view.setInt32(offset + 1, value);
return 5;
}
throw new Error('Number too small -0x' + (-value).toString(16).substr(1));
}
// null
if (value === null || value === undefined) {
view.setUint8(offset, 0xc0);
return 1;
}
// Boolean
if (type === 'boolean') {
view.setUint8(offset, value ? 0xc3 : 0xc2);
return 1;
}
// Container Types
if (type === 'object') {
let length: number, size = 0;
let isArray = Array.isArray(value);
let keys: string[] | undefined;
if (isArray) {
length = value.length;
}
else {
keys = Object.keys(value);
length = keys.length;
}
if (length < 0x10) {
view.setUint8(offset, length | (isArray ? 0x90 : 0x80));
size = 1;
}
else if (length < 0x10000) {
view.setUint8(offset, isArray ? 0xdc : 0xde);
view.setUint16(offset + 1, length);
size = 3;
}
else if (length < 0x100000000) {
view.setUint8(offset, isArray ? 0xdd : 0xdf);
view.setUint32(offset + 1, length);
size = 5;
}
if (isArray) {
for (let i = 0; i < length; i++) {
size += encodeInternal(value[i], view, bytes, offset + size);
}
}
else {
for (let key of keys!) {
size += encodeInternal(key, view, bytes, offset + size);
size += encodeInternal(value[key], view, bytes, offset + size);
}
}
return size;
}
throw new Error('Unknown type ' + type);
}

124
src/utils/utf8.ts Normal file
View File

@@ -0,0 +1,124 @@
/*
* Copyright (c) 2017 molio contributors, licensed under MIT, See LICENSE file for more info.
*
* Adapted from https://github.com/rcsb/mmtf-javascript
* @author Alexander Rose <alexander.rose@weirdbyte.de>
* @author David Sehnal <david.sehnal@gmail.com>
*/
export function utf8Write(data: Uint8Array, offset: number, str: string) {
for (let i = 0, l = str.length; i < l; i++) {
let codePoint = str.charCodeAt(i);
// One byte of UTF-8
if (codePoint < 0x80) {
data[offset++] = codePoint >>> 0 & 0x7f | 0x00;
continue;
}
// Two bytes of UTF-8
if (codePoint < 0x800) {
data[offset++] = codePoint >>> 6 & 0x1f | 0xc0;
data[offset++] = codePoint >>> 0 & 0x3f | 0x80;
continue;
}
// Three bytes of UTF-8.
if (codePoint < 0x10000) {
data[offset++] = codePoint >>> 12 & 0x0f | 0xe0;
data[offset++] = codePoint >>> 6 & 0x3f | 0x80;
data[offset++] = codePoint >>> 0 & 0x3f | 0x80;
continue;
}
// Four bytes of UTF-8
if (codePoint < 0x110000) {
data[offset++] = codePoint >>> 18 & 0x07 | 0xf0;
data[offset++] = codePoint >>> 12 & 0x3f | 0x80;
data[offset++] = codePoint >>> 6 & 0x3f | 0x80;
data[offset++] = codePoint >>> 0 & 0x3f | 0x80;
continue;
}
throw new Error('bad codepoint ' + codePoint);
}
}
const __chars = function () {
let data: string[] = [];
for (let i = 0; i < 1024; i++) data[i] = String.fromCharCode(i);
return data;
}();
function throwError(err: string) {
throw new Error(err);
}
export function utf8Read(data: Uint8Array, offset: number, length: number) {
let chars = __chars;
let str: string[] | undefined = void 0, chunk: string[] = [], chunkSize = 512, chunkOffset = 0;
for (let i = offset, end = offset + length; i < end; i++) {
let byte = data[i];
// One byte character
if ((byte & 0x80) === 0x00) {
chunk[chunkOffset++] = chars[byte];
}
// Two byte character
else if ((byte & 0xe0) === 0xc0) {
chunk[chunkOffset++] = chars[((byte & 0x0f) << 6) | (data[++i] & 0x3f)];
}
// Three byte character
else if ((byte & 0xf0) === 0xe0) {
chunk[chunkOffset++] = String.fromCharCode(
((byte & 0x0f) << 12) |
((data[++i] & 0x3f) << 6) |
((data[++i] & 0x3f) << 0)
);
}
// Four byte character
else if ((byte & 0xf8) === 0xf0) {
chunk[chunkOffset++] = String.fromCharCode(
((byte & 0x07) << 18) |
((data[++i] & 0x3f) << 12) |
((data[++i] & 0x3f) << 6) |
((data[++i] & 0x3f) << 0)
);
} else throwError('Invalid byte ' + byte.toString(16));
if (chunkOffset === chunkSize) {
str = str || [];
str[str.length] = chunk.join('');
chunkOffset = 0;
}
}
if (!str) return chunk.slice(0, chunkOffset).join('');
if (chunkOffset > 0) {
str[str.length] = chunk.slice(0, chunkOffset).join('');
}
return str.join('');
}
export function utf8ByteCount(str: string) {
let count = 0;
for (let i = 0, l = str.length; i < l; i++) {
let codePoint = str.charCodeAt(i);
if (codePoint < 0x80) {
count += 1;
continue;
}
if (codePoint < 0x800) {
count += 2;
continue;
}
if (codePoint < 0x10000) {
count += 3;
continue;
}
if (codePoint < 0x110000) {
count += 4;
continue;
}
throwError('bad codepoint ' + codePoint);
}
return count;
}