Skip to content

Commit

Permalink
feat!: make parser async and use native zlib decompression
Browse files Browse the repository at this point in the history
  • Loading branch information
lpatiny committed Dec 5, 2024
1 parent 4273aaa commit 17f7bcd
Show file tree
Hide file tree
Showing 10 changed files with 111 additions and 28 deletions.
4 changes: 1 addition & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ import { parseMZ } from 'mzdata';

// mzData files
const mzDataFile = readFileSync(__dirname + '/tiny.mzData.xml');
var response = parseMZ(mzDataFile);
var response = await parseMZ(mzDataFile);
```

## Ontology
Expand All @@ -58,8 +58,6 @@ You can find various examples files at:

http://www.psidev.info/mzML

## [API Documentation](https://cheminfo-js.github.io/mzData/)

## License

[MIT](./LICENSE)
Expand Down
6 changes: 6 additions & 0 deletions src/Options.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
/**
* @typedef {Object} Options
* @property {import('cheminfo-types').Logger} [logger] - A potential logger
*/

export {};
11 changes: 6 additions & 5 deletions src/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@ const decoder = new TextDecoder();
/**
* Reads a mzData v1.05 file
* @param {ArrayBuffer|string} xml - ArrayBuffer or String or any Typed Array (including Node.js' Buffer from v4) with the data
* @return {{times: Array<number>, series: { ms: { data:Array<Array<number>>}}}}
* @param {import('./Options.js').Options} [options={}]
* @return Promise<{{times: Array<number>, series: { ms: { data:Array<Array<number>>}}}}>
*/
export function parseMZ(xml) {
export async function parseMZ(xml, options = {}) {
if (typeof xml === 'string') {
const encoder = new TextEncoder();
xml = encoder.encode(xml);
Expand All @@ -24,11 +25,11 @@ export function parseMZ(xml) {
: xml.substring(0, 200);

if (header.includes('mzData')) {
return parseMzData(xml);
return parseMzData(xml, options);
} else if (header.includes('mzML')) {
return parseMzML(xml);
return parseMzML(xml, options);
} else if (header.includes('mzXML')) {
return parseMzXML(xml);
return parseMzXML(xml, options);
} else {
throw new Error(`MZ parser: unknown format`);
}
Expand Down
22 changes: 18 additions & 4 deletions src/mzdata/parseMzData.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { parse } from 'arraybuffer-xml-parser';
import { recursiveResolve } from 'ml-spectra-processing';

import { decodeBase64 } from '../util/decodeBase64';

Expand All @@ -7,7 +8,14 @@ import { processSpectrumList } from './processSpectrumList';

const decoder = new TextDecoder();

export function parseMzData(arrayBuffer) {
/**
*
* @param {*} arrayBuffer
* @param {import('../Options.js').Options} [options={}]
* @returns
*/
export async function parseMzData(arrayBuffer, options = {}) {
const { logger = console } = options;
const result = {
metadata: {},
times: [],
Expand All @@ -19,14 +27,20 @@ export function parseMzData(arrayBuffer) {
};

let parsed = parse(arrayBuffer, {
attributeNamePrefix: '',
attributesNodeName: 'attributes',
attributeNameProcessor: (attributeName) => attributeName,
tagValueProcessor: (value, node) => {
if (node.tagName !== 'data') return decoder.decode(value);
return decodeBase64(node.value, node.attributes);
const promise = decodeBase64(node.bytes, node.attributes);
// avoid unhandled promise rejection and swallow the error
promise.catch((error) => {
logger.error('error decoding base64', error);
return [];
});
return promise;
},
});

await recursiveResolve(parsed);
processMetadata(parsed.mzData, result.metadata);
processSpectrumList(parsed.mzData, result.times, result.series.ms.data);

Expand Down
17 changes: 13 additions & 4 deletions src/mzml/parseMzML.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import { parse } from 'arraybuffer-xml-parser';
import { recursiveResolve } from 'ml-spectra-processing';

import { decodeBase64 } from '../util/decodeBase64';

Expand All @@ -8,7 +9,8 @@ const decoder = new TextDecoder();

// https://www.psidev.info/mzml
// CV = Controlled vocabulary
export function parseMzML(arrayBuffer) {
export async function parseMzML(arrayBuffer, options = {}) {
const { logger = console } = options;
const result = {
metadata: {},
times: [],
Expand All @@ -20,17 +22,24 @@ export function parseMzML(arrayBuffer) {
};

let parsed = parse(arrayBuffer, {
attributeNamePrefix: '',
attributesNodeName: 'attributes',
attributeNameProcessor: (attributeName) => attributeName,
tagValueProcessor: (value, node) => {
if (node.tagName !== 'binary') return decoder.decode(value);
const ontologies = node.parent.children.cvParam.map(
(entry) => entry.attributes.accession,
);

return decodeBase64(node.value, { ontologies });
const promise = decodeBase64(node.bytes, { ontologies });
// avoid unhandled promise rejection and swallow the error
promise.catch((error) => {
logger.error('error decoding base64', error);
return [];
});
return promise;
},
});
// parsed file still contains promises
await recursiveResolve(parsed);

const mzML = parsed.mzML || parsed.indexedmzML.mzML;

Expand Down
7 changes: 4 additions & 3 deletions src/mzml/utils.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import { decode } from 'uint8-base64';
import { inflate } from 'pako';

export function decoder(base64Encoded, options = {}) {
import { inflate } from '../util/inflate.js';

export async function decoder(base64Encoded, options = {}) {
const { compressionAlgorithm } = options;
let decoded;
switch (compressionAlgorithm) {
case 'zlib':
decoded = inflate(decode(base64Encoded));
decoded = await inflate(decode(base64Encoded));
break;
case undefined:
case '':
Expand Down
22 changes: 19 additions & 3 deletions src/mzxml/parseMzXML.js
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
import { parse } from 'arraybuffer-xml-parser';
import { recursiveResolve } from 'ml-spectra-processing';

import { decodeBase64 } from '../util/decodeBase64';

import { processSpectrumList } from './processSpectrumList';

const decoder = new TextDecoder();

export function parseMzXML(arrayBuffer) {
/**
*
* @param {*} arrayBuffer
* @param {import('../Options.js').Options} [options]
* @returns
*/
export async function parseMzXML(arrayBuffer, options = {}) {
const { logger = console } = options;
const result = {
metadata: {},
times: [],
Expand All @@ -17,17 +25,25 @@ export function parseMzXML(arrayBuffer) {
},
};
let parsed = parse(arrayBuffer, {
attributeNamePrefix: '',
attributesNodeName: 'attributes',
attributeNameProcessor: (attributeName) => attributeName,
tagValueProcessor: (value, node) => {
if (node.tagName !== 'peaks') return decoder.decode(value);
return decodeBase64(node.value, {

const promise = decodeBase64(node.bytes, {
precision: node.attributes.precision,
endian: node.attributes.byteOrder,
compression: node.attributes.compressionType,
});
// avoid unhandled promise rejection and swallow the error
promise.catch((error) => {
logger.error('error decoding base64', error);
return [];
});
return promise;
},
});
await recursiveResolve(parsed);

processSpectrumList(parsed.mzXML, result.times, result.series.ms);

Expand Down
6 changes: 3 additions & 3 deletions src/mzxml/utils.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import { decode } from 'uint8-base64';

import { inflate } from 'pako';
import { inflate } from '../util/inflate.js';

export function decoder(base64Encoded, options = {}) {
export async function decoder(base64Encoded, options = {}) {
const { compressionAlgorithm } = options;
let decoded;
switch (compressionAlgorithm) {
case 'zlib':
decoded = inflate(decode(base64Encoded));
decoded = await inflate(decode(base64Encoded));
break;
case undefined:
case '':
Expand Down
7 changes: 4 additions & 3 deletions src/util/decodeBase64.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
import { inflate } from 'pako';
import { decode } from 'uint8-base64';

export function decodeBase64(base64, options = {}) {
import { inflate } from './inflate.js';

export async function decodeBase64(base64, options = {}) {
let {
endian = 'little',
precision,
Expand All @@ -28,7 +29,7 @@ export function decodeBase64(base64, options = {}) {
let uint8Array = decode(base64);
switch (compression.toLowerCase()) {
case 'zlib':
uint8Array = inflate(uint8Array);
uint8Array = await inflate(uint8Array);
break;
case '':
case 'none':
Expand Down
37 changes: 37 additions & 0 deletions src/util/inflate.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
export async function inflate(zlibCompressedData) {
// Strip the zlib header and footer
const strippedData = zlibCompressedData.subarray(2, -4); // Remove 2-byte header and 4-byte Adler-32 footer

const inputStream = new ReadableStream({
start(controller) {
controller.enqueue(strippedData);
controller.close();
},
});

const decompressedStream = inputStream.pipeThrough(
new DecompressionStream('deflate-raw'),

Check failure on line 13 in src/util/inflate.js

View workflow job for this annotation

GitHub Actions / nodejs / test (18)

src/mzml/__tests__/parseMzML.test.js > parseMzML > read test.mzML

TypeError: The argument 'format' is invalid. Received 'deflate-raw' ❯ Module.inflate src/util/inflate.js:13:5 ❯ Module.decodeBase64 src/util/decodeBase64.js:32:26 ❯ XMLNode.tagValueProcessor src/mzml/parseMzML.js:32:23 ❯ XMLNode.get value [as value] node_modules/arraybuffer-xml-parser/src/XMLNode.ts:41:26 ❯ traversableToJSON node_modules/arraybuffer-xml-parser/src/traversableToJSON.ts:31:17 ❯ traversableToJSON node_modules/arraybuffer-xml-parser/src/traversableToJSON.ts:78:25 ❯ traversableToJSON node_modules/arraybuffer-xml-parser/src/traversableToJSON.ts:73:13 ❯ traversableToJSON node_modules/arraybuffer-xml-parser/src/traversableToJSON.ts:78:25 ⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯ Serialized Error: { code: 'ERR_INVALID_ARG_VALUE' }

Check failure on line 13 in src/util/inflate.js

View workflow job for this annotation

GitHub Actions / nodejs / test (18)

src/mzml/__tests__/parseMzML.test.js > parseMzML > read compressed 32bits

TypeError: The argument 'format' is invalid. Received 'deflate-raw' ❯ Module.inflate src/util/inflate.js:13:5 ❯ Module.decodeBase64 src/util/decodeBase64.js:32:26 ❯ XMLNode.tagValueProcessor src/mzml/parseMzML.js:32:23 ❯ XMLNode.get value [as value] node_modules/arraybuffer-xml-parser/src/XMLNode.ts:41:26 ❯ traversableToJSON node_modules/arraybuffer-xml-parser/src/traversableToJSON.ts:31:17 ❯ traversableToJSON node_modules/arraybuffer-xml-parser/src/traversableToJSON.ts:78:25 ❯ traversableToJSON node_modules/arraybuffer-xml-parser/src/traversableToJSON.ts:73:13 ❯ traversableToJSON node_modules/arraybuffer-xml-parser/src/traversableToJSON.ts:78:25 ⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯⎯ Serialized Error: { code: 'ERR_INVALID_ARG_VALUE' }
);

const reader = decompressedStream.getReader();
const chunks = [];
let totalLength = 0;

while (true) {
// eslint-disable-next-line no-await-in-loop
const { value, done } = await reader.read();
if (done) break;
chunks.push(value);
totalLength += value.length;
}

// Combine chunks into a single Uint8Array
const decompressedData = new Uint8Array(totalLength);
let offset = 0;
for (const chunk of chunks) {
decompressedData.set(chunk, offset);
offset += chunk.length;
}

return decompressedData;
}

0 comments on commit 17f7bcd

Please sign in to comment.