Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion docs/source/status.rst
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ Data Types
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Binary | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Large Binary | ✓ | ✓ | ✓ | | | ✓ | ✓ | |
| Large Binary | ✓ | ✓ | ✓ | | | ✓ | ✓ | |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Utf8 | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
Expand Down
4 changes: 2 additions & 2 deletions js/src/Arrow.dom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ export {
Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
Float, Float16, Float32, Float64,
Utf8, LargeUtf8,
Binary,
Binary, LargeBinary,
FixedSizeBinary,
Date_, DateDay, DateMillisecond,
Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
Expand Down Expand Up @@ -78,7 +78,7 @@ export {
} from './Arrow.js';

export {
BinaryBuilder,
BinaryBuilder, LargeBinaryBuilder,
BoolBuilder,
DateBuilder, DateDayBuilder, DateMillisecondBuilder,
DecimalBuilder,
Expand Down
3 changes: 2 additions & 1 deletion js/src/Arrow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ export {
Int, Int8, Int16, Int32, Int64, Uint8, Uint16, Uint32, Uint64,
Float, Float16, Float32, Float64,
Utf8, LargeUtf8,
Binary,
Binary, LargeBinary,
FixedSizeBinary,
Date_, DateDay, DateMillisecond,
Timestamp, TimestampSecond, TimestampMillisecond, TimestampMicrosecond, TimestampNanosecond,
Expand Down Expand Up @@ -80,6 +80,7 @@ export { DurationBuilder, DurationSecondBuilder, DurationMillisecondBuilder, Dur
export { Utf8Builder } from './builder/utf8.js';
export { LargeUtf8Builder } from './builder/largeutf8.js';
export { BinaryBuilder } from './builder/binary.js';
export { LargeBinaryBuilder } from './builder/largebinary.js';
export { ListBuilder } from './builder/list.js';
export { FixedSizeListBuilder } from './builder/fixedsizelist.js';
export { MapBuilder } from './builder/map.js';
Expand Down
6 changes: 3 additions & 3 deletions js/src/builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import {
DataType, strideForType,
Float, Int, Decimal, FixedSizeBinary,
Date_, Time, Timestamp, Interval, Duration,
Utf8, LargeUtf8, Binary, List, Map_,
Utf8, LargeUtf8, Binary, LargeBinary, List, Map_,
} from './type.js';
import { createIsValidFunction } from './builder/valid.js';
import { BufferBuilder, BitmapBufferBuilder, DataBufferBuilder, OffsetsBufferBuilder } from './builder/buffer.js';
Expand Down Expand Up @@ -285,7 +285,7 @@ export abstract class Builder<T extends DataType = any, TNull = any> {

if (typeIds = _typeIds?.flush(length)) { // Unions, DenseUnions
valueOffsets = _offsets?.flush(length);
} else if (valueOffsets = _offsets?.flush(length)) { // Variable-width primitives (Binary, Utf8, LargeUtf8), and Lists
} else if (valueOffsets = _offsets?.flush(length)) { // Variable-width primitives (Binary, LargeBinary, Utf8, LargeUtf8), and Lists
data = _values?.flush(_offsets.last());
} else { // Fixed-width primitives (Int, Float, Decimal, Time, Timestamp, Duration and Interval)
data = _values?.flush(length);
Expand Down Expand Up @@ -352,7 +352,7 @@ export abstract class FixedWidthBuilder<T extends Int | Float | FixedSizeBinary
}

/** @ignore */
export abstract class VariableWidthBuilder<T extends Binary | Utf8 | LargeUtf8 | List | Map_, TNull = any> extends Builder<T, TNull> {
export abstract class VariableWidthBuilder<T extends Binary | LargeBinary | Utf8 | LargeUtf8 | List | Map_, TNull = any> extends Builder<T, TNull> {
protected _pendingLength = 0;
protected _offsets: OffsetsBufferBuilder<T>;
protected _pending: Map<number, any> | undefined;
Expand Down
54 changes: 54 additions & 0 deletions js/src/builder/largebinary.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import { LargeBinary } from '../type.js';
import { toUint8Array } from '../util/buffer.js';
import { BufferBuilder } from './buffer.js';
import { VariableWidthBuilder, BuilderOptions } from '../builder.js';

/** @ignore */
export class LargeBinaryBuilder<TNull = any> extends VariableWidthBuilder<LargeBinary, TNull> {
constructor(opts: BuilderOptions<LargeBinary, TNull>) {
super(opts);
this._values = new BufferBuilder(new Uint8Array(0));
}
public get byteLength(): number {
let size = this._pendingLength + (this.length * 4);
this._offsets && (size += this._offsets.byteLength);
this._values && (size += this._values.byteLength);
this._nulls && (size += this._nulls.byteLength);
return size;
}
public setValue(index: number, value: Uint8Array) {
return super.setValue(index, toUint8Array(value));
}
protected _flushPending(pending: Map<number, Uint8Array | undefined>, pendingLength: number) {
const offsets = this._offsets;
const data = this._values.reserve(pendingLength).buffer;
let offset = 0;
for (const [index, value] of pending) {
if (value === undefined) {
offsets.set(index, BigInt(0));
} else {
const length = value.length;
data.set(value, offset);
offsets.set(index, BigInt(length));
offset += length;
}
}
}
}
Comment on lines +39 to +54
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you want to activate the placeholder in builder/largeutf8.ts in this PR?

Seems like you could probably do the same for getByteLength, too.

(LargeUtf8Builder.prototype as any)._flushPending = (LargeBinaryBuilder.prototype as any)._flushPending;

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, just updated the pull request.

22 changes: 4 additions & 18 deletions js/src/builder/largeutf8.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import { LargeUtf8 } from '../type.js';
import { encodeUtf8 } from '../util/utf8.js';
import { BufferBuilder } from './buffer.js';
import { VariableWidthBuilder, BuilderOptions } from '../builder.js';
import { LargeBinaryBuilder } from './largebinary.js';

/** @ignore */
export class LargeUtf8Builder<TNull = any> extends VariableWidthBuilder<LargeUtf8, TNull> {
Expand All @@ -36,24 +37,9 @@ export class LargeUtf8Builder<TNull = any> extends VariableWidthBuilder<LargeUtf
public setValue(index: number, value: string) {
return super.setValue(index, encodeUtf8(value) as any);
}

// @ts-ignore
// TODO: move to largeBinaryBuilder when implemented
// protected _flushPending(pending: Map<number, Uint8Array | undefined>, pendingLength: number): void { }
protected _flushPending(pending: Map<number, Uint8Array | undefined>, pendingLength: number) {
const offsets = this._offsets;
const data = this._values.reserve(pendingLength).buffer;
let offset = 0;
for (const [index, value] of pending) {
if (value === undefined) {
offsets.set(index, BigInt(0));
} else {
const length = value.length;
data.set(value, offset);
offsets.set(index, BigInt(length));
offset += length;
}
}
}
protected _flushPending(pending: Map<number, Uint8Array | undefined>, pendingLength: number): void { }
}

// (LargeUtf8Builder.prototype as any)._flushPending = (LargeBinaryBuilder.prototype as any)._flushPending;
(LargeUtf8Builder.prototype as any)._flushPending = (LargeBinaryBuilder.prototype as any)._flushPending;
15 changes: 13 additions & 2 deletions js/src/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

import { Vector } from './vector.js';
import { BufferType, Type, UnionMode } from './enum.js';
import { DataType, LargeUtf8, strideForType } from './type.js';
import { DataType, strideForType } from './type.js';
import { popcnt_bit_range, truncateBitmap } from './util/bit.js';

// When slicing, we do not know the null count of the sliced range without
Expand Down Expand Up @@ -253,7 +253,7 @@ export class Data<T extends DataType = DataType> {

import {
Dictionary,
Bool, Null, Utf8, Binary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
Bool, Null, Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary, List, FixedSizeList, Map_, Struct,
Float,
Int,
Date_,
Expand Down Expand Up @@ -324,6 +324,14 @@ class MakeDataVisitor extends Visitor {
const { ['length']: length = valueOffsets.length - 1, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props;
return new Data(type, offset, length, nullCount, [valueOffsets, data, nullBitmap]);
}
public visitLargeBinary<T extends LargeBinary>(props: LargeBinaryDataProps<T>) {
const { ['type']: type, ['offset']: offset = 0 } = props;
const data = toUint8Array(props['data']);
const nullBitmap = toUint8Array(props['nullBitmap']);
const valueOffsets = toBigInt64Array(props['valueOffsets']);
const { ['length']: length = valueOffsets.length - 1, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0 } = props;
return new Data(type, offset, length, nullCount, [valueOffsets, data, nullBitmap]);
}
public visitFixedSizeBinary<T extends FixedSizeBinary>(props: FixedSizeBinaryDataProps<T>) {
const { ['type']: type, ['offset']: offset = 0 } = props;
const nullBitmap = toUint8Array(props['nullBitmap']);
Expand Down Expand Up @@ -444,6 +452,7 @@ interface IntervalDataProps<T extends Interval> extends DataProps_<T> { data?: D
interface DurationDataProps<T extends Duration> extends DataProps_<T> { data?: DataBuffer<T> }
interface FixedSizeBinaryDataProps<T extends FixedSizeBinary> extends DataProps_<T> { data?: DataBuffer<T> }
interface BinaryDataProps<T extends Binary> extends DataProps_<T> { valueOffsets: ValueOffsetsBuffer; data?: DataBuffer<T> }
interface LargeBinaryDataProps<T extends LargeBinary> extends DataProps_<T> { valueOffsets: LargeValueOffsetsBuffer | ValueOffsetsBuffer; data?: DataBuffer<T> }
interface Utf8DataProps<T extends Utf8> extends DataProps_<T> { valueOffsets: ValueOffsetsBuffer; data?: DataBuffer<T> }
interface LargeUtf8DataProps<T extends LargeUtf8> extends DataProps_<T> { valueOffsets: LargeValueOffsetsBuffer | ValueOffsetsBuffer; data?: DataBuffer<T> }
interface ListDataProps<T extends List> extends DataProps_<T> { valueOffsets: ValueOffsetsBuffer; child: Data<T['valueType']> }
Expand All @@ -468,6 +477,7 @@ export type DataProps<T extends DataType> = (
T extends Duration /* */ ? DurationDataProps<T> :
T extends FixedSizeBinary /* */ ? FixedSizeBinaryDataProps<T> :
T extends Binary /* */ ? BinaryDataProps<T> :
T extends LargeBinary /* */ ? LargeBinaryDataProps<T> :
T extends Utf8 /* */ ? Utf8DataProps<T> :
T extends LargeUtf8 /* */ ? LargeUtf8DataProps<T> :
T extends List /* */ ? ListDataProps<T> :
Expand Down Expand Up @@ -495,6 +505,7 @@ export function makeData<T extends Interval>(props: IntervalDataProps<T>): Data<
export function makeData<T extends Duration>(props: DurationDataProps<T>): Data<T>;
export function makeData<T extends FixedSizeBinary>(props: FixedSizeBinaryDataProps<T>): Data<T>;
export function makeData<T extends Binary>(props: BinaryDataProps<T>): Data<T>;
export function makeData<T extends LargeBinary>(props: LargeBinaryDataProps<T>): Data<T>;
export function makeData<T extends Utf8>(props: Utf8DataProps<T>): Data<T>;
export function makeData<T extends LargeUtf8>(props: LargeUtf8DataProps<T>): Data<T>;
export function makeData<T extends List>(props: ListDataProps<T>): Data<T>;
Expand Down
3 changes: 2 additions & 1 deletion js/src/enum.ts
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,8 @@ export enum Type {
FixedSizeBinary = 15, /** Fixed-size binary. Each value occupies the same number of bytes */
FixedSizeList = 16, /** Fixed-size list. Each value occupies the same number of bytes */
Map = 17, /** Map of named logical types */
Duration = 18, /** Measure of elapsed time in either seconds, milliseconds, microseconds or nanoseconds. */
Duration = 18, /** Measure of elapsed time in either seconds, milliseconds, microseconds or nanoseconds */
LargeBinary = 19, /** Large variable-length bytes (no guarantee of UTF8-ness) */
LargeUtf8 = 20, /** Large variable-length string as List<Char> */

Dictionary = -1, /** Dictionary aka Category type */
Expand Down
4 changes: 4 additions & 0 deletions js/src/interfaces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import type { DurationBuilder, DurationSecondBuilder, DurationMillisecondBuilder
import type { Utf8Builder } from './builder/utf8.js';
import type { LargeUtf8Builder } from './builder/largeutf8.js';
import type { BinaryBuilder } from './builder/binary.js';
import type { LargeBinaryBuilder } from './builder/largebinary.js';
import type { ListBuilder } from './builder/list.js';
import type { FixedSizeListBuilder } from './builder/fixedsizelist.js';
import type { MapBuilder } from './builder/map.js';
Expand Down Expand Up @@ -210,6 +211,7 @@ export type TypeToDataType<T extends Type> = {
[Type.Utf8]: type.Utf8;
[Type.LargeUtf8]: type.LargeUtf8;
[Type.Binary]: type.Binary;
[Type.LargeBinary]: type.LargeBinary;
[Type.FixedSizeBinary]: type.FixedSizeBinary;
[Type.Date]: type.Date_;
[Type.DateDay]: type.DateDay;
Expand Down Expand Up @@ -264,6 +266,7 @@ type TypeToBuilder<T extends Type = any, TNull = any> = {
[Type.Utf8]: Utf8Builder<TNull>;
[Type.LargeUtf8]: LargeUtf8Builder<TNull>;
[Type.Binary]: BinaryBuilder<TNull>;
[Type.LargeBinary]: LargeBinaryBuilder<TNull>;
[Type.FixedSizeBinary]: FixedSizeBinaryBuilder<TNull>;
[Type.Date]: DateBuilder<any, TNull>;
[Type.DateDay]: DateDayBuilder<TNull>;
Expand Down Expand Up @@ -318,6 +321,7 @@ type DataTypeToBuilder<T extends DataType = any, TNull = any> = {
[Type.Utf8]: T extends type.Utf8 ? Utf8Builder<TNull> : never;
[Type.LargeUtf8]: T extends type.LargeUtf8 ? LargeUtf8Builder<TNull> : never;
[Type.Binary]: T extends type.Binary ? BinaryBuilder<TNull> : never;
[Type.LargeBinary]: T extends type.LargeBinary ? LargeBinaryBuilder<TNull> : never;
[Type.FixedSizeBinary]: T extends type.FixedSizeBinary ? FixedSizeBinaryBuilder<TNull> : never;
[Type.Date]: T extends type.Date_ ? DateBuilder<T, TNull> : never;
[Type.DateDay]: T extends type.DateDay ? DateDayBuilder<TNull> : never;
Expand Down
3 changes: 2 additions & 1 deletion js/src/ipc/metadata/json.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
import { Schema, Field } from '../../schema.js';
import {
DataType, Dictionary, TimeBitWidth,
Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary,
Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary,
List, FixedSizeList, Map_, Struct, Union,
Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys, Duration,
} from '../../type.js';
Expand Down Expand Up @@ -149,6 +149,7 @@ function typeFromJSON(f: any, children?: Field[]): DataType<any> {
case 'NONE': return new Null();
case 'null': return new Null();
case 'binary': return new Binary();
case 'largebinary': return new LargeBinary();
case 'utf8': return new Utf8();
case 'largeutf8': return new LargeUtf8();
case 'bool': return new Bool();
Expand Down
3 changes: 2 additions & 1 deletion js/src/ipc/metadata/message.ts
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ import ByteBuffer = flatbuffers.ByteBuffer;

import {
DataType, Dictionary, TimeBitWidth,
Utf8, LargeUtf8, Binary, Decimal, FixedSizeBinary,
Utf8, LargeUtf8, Binary, LargeBinary, Decimal, FixedSizeBinary,
List, FixedSizeList, Map_, Struct, Union,
Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys, Duration,
} from '../../type.js';
Expand Down Expand Up @@ -432,6 +432,7 @@ function decodeFieldType(f: _Field, children?: Field[]): DataType<any> {
case Type['NONE']: return new Null();
case Type['Null']: return new Null();
case Type['Binary']: return new Binary();
case Type['LargeBinary']: return new LargeBinary();
case Type['Utf8']: return new Utf8();
case Type['LargeUtf8']: return new LargeUtf8();
case Type['Bool']: return new Bool();
Expand Down
5 changes: 2 additions & 3 deletions js/src/ipc/writer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -391,7 +391,7 @@ export class RecordBatchJSONWriter<T extends TypeMap = any> extends RecordBatchW
protected _writeDictionaryBatch(dictionary: Data, id: number, isDelta = false) {
this._dictionaryDeltaOffsets.set(id, dictionary.length + (this._dictionaryDeltaOffsets.get(id) || 0));
this._write(this._dictionaryBlocks.length === 0 ? ` ` : `,\n `);
this._write(`${dictionaryBatchToJSON(dictionary, id, isDelta)}`);
this._write(dictionaryBatchToJSON(dictionary, id, isDelta));
this._dictionaryBlocks.push(new FileBlock(0, 0, 0));
return this;
}
Expand All @@ -401,7 +401,6 @@ export class RecordBatchJSONWriter<T extends TypeMap = any> extends RecordBatchW
return this;
}
public close() {

if (this._dictionaries.length > 0) {
this._write(`,\n "dictionaries": [\n`);
for (const batch of this._dictionaries) {
Expand All @@ -413,7 +412,7 @@ export class RecordBatchJSONWriter<T extends TypeMap = any> extends RecordBatchW
if (this._recordBatches.length > 0) {
for (let i = -1, n = this._recordBatches.length; ++i < n;) {
this._write(i === 0 ? `,\n "batches": [\n ` : `,\n `);
this._write(`${recordBatchToJSON(this._recordBatches[i])}`);
this._write(recordBatchToJSON(this._recordBatches[i]));
this._recordBatchBlocks.push(new FileBlock(0, 0, 0));
}
this._write(`\n ]`);
Expand Down
18 changes: 17 additions & 1 deletion js/src/type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ export abstract class DataType<TType extends Type = Type, TChildren extends Type
/** @nocollapse */ static isInt(x: any): x is Int_ { return x?.typeId === Type.Int; }
/** @nocollapse */ static isFloat(x: any): x is Float { return x?.typeId === Type.Float; }
/** @nocollapse */ static isBinary(x: any): x is Binary { return x?.typeId === Type.Binary; }
/** @nocollapse */ static isLargeBinary(x: any): x is LargeBinary { return x?.typeId === Type.LargeBinary; }
/** @nocollapse */ static isUtf8(x: any): x is Utf8 { return x?.typeId === Type.Utf8; }
/** @nocollapse */ static isLargeUtf8(x: any): x is LargeUtf8 { return x?.typeId === Type.LargeUtf8; }
/** @nocollapse */ static isBool(x: any): x is Bool { return x?.typeId === Type.Bool; }
Expand Down Expand Up @@ -250,6 +251,22 @@ export class Binary extends DataType<Type.Binary> {
})(Binary.prototype);
}

/** @ignore */
export interface LargeBinary extends DataType<Type.LargeBinary> { TArray: Uint8Array; TOffsetArray: BigInt64Array; TValue: Uint8Array; ArrayType: TypedArrayConstructor<Uint8Array>; OffsetArrayType: BigIntArrayConstructor<BigInt64Array> }
/** @ignore */
export class LargeBinary extends DataType<Type.LargeBinary> {
constructor() {
super();
}
public get typeId() { return Type.LargeBinary as Type.LargeBinary; }
public toString() { return `LargeBinary`; }
protected static [Symbol.toStringTag] = ((proto: LargeBinary) => {
(<any>proto).ArrayType = Uint8Array;
(<any>proto).OffsetArrayType = BigInt64Array;
return proto[Symbol.toStringTag] = 'LargeBinary';
})(LargeBinary.prototype);
}

/** @ignore */
export interface Utf8 extends DataType<Type.Utf8> { TArray: Uint8Array; TOffsetArray: Int32Array; TValue: string; ArrayType: TypedArrayConstructor<Uint8Array>; OffsetArrayType: TypedArrayConstructor<Int32Array> }
/** @ignore */
Expand Down Expand Up @@ -601,7 +618,6 @@ export class FixedSizeBinary extends DataType<Type.FixedSizeBinary> {
protected static [Symbol.toStringTag] = ((proto: FixedSizeBinary) => {
(<any>proto).byteWidth = null;
(<any>proto).ArrayType = Uint8Array;
(<any>proto).OffsetArrayType = Int32Array;
return proto[Symbol.toStringTag] = 'FixedSizeBinary';
})(FixedSizeBinary.prototype);
}
Expand Down
Loading