Skip to content

Commit

Permalink
add getters for the dictionary and indices of chunked dictionary vectors
Browse files Browse the repository at this point in the history
  • Loading branch information
trxcllnt committed Jan 5, 2019
1 parent aaf42c8 commit f7d2b2e
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 12 deletions.
26 changes: 25 additions & 1 deletion js/src/vector/chunked.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,13 @@
import { Data } from '../data';
import { Field } from '../schema';
import { Vector } from '../vector';
import { DataType } from '../type';
import { clampRange } from '../util/vector';
import { DataType, Dictionary } from '../type';
import { Clonable, Sliceable, Applicative } from '../vector';
import { DictionaryVector } from './dictionary';

type ChunkedDict<T extends DataType> = T extends Dictionary ? T['dictionaryVector'] : null | never;
type ChunkedKeys<T extends DataType> = T extends Dictionary ? Vector<T['indices']> | Chunked<T['indices']> : null | never;

/** @ignore */
type SearchContinuation<T extends Chunked> = (column: T, chunkIndex: number, valueIndex: number) => any;
Expand Down Expand Up @@ -80,6 +84,26 @@ export class Chunked<T extends DataType = any>
return nullCount;
}

protected _indices?: ChunkedKeys<T>;
public get indices(): ChunkedKeys<T> | null {
if (DataType.isDictionary(this._type)) {
if (!this._indices) {
const chunks = (<any> this._chunks) as DictionaryVector<T, any>[];
this._indices = (chunks.length === 1
? chunks[0].indices
: Chunked.concat(...chunks.map((x) => x.indices))) as ChunkedKeys<T>;
}
return this._indices;
}
return null;
}
public get dictionary(): ChunkedDict<T> | null {
if (DataType.isDictionary(this._type)) {
return (<any> this._type.dictionaryVector) as ChunkedDict<T>;
}
return null;
}

public *[Symbol.iterator](): IterableIterator<T['TValue'] | null> {
for (const chunk of this._chunks) {
yield* chunk;
Expand Down
16 changes: 11 additions & 5 deletions js/test/generate-test-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -129,34 +129,40 @@ const defaultUnionChildren = [
new Field('union[2]', new Map_(defaultStructChildren))
];

type GeneratedTestData<T extends DataType> = {
export type GeneratedTestData<T extends DataType> = {
vector: VType<T>;
keys?: ArrayLike<number>;
values: () => (T['TValue'] | null)[];
};

export const table = (lengths = [100], schema: Schema = new Schema(defaultRecordBatchChildren.slice())) => {
const generated = lengths.map((length) => recordBatch(length, schema));
const rowBatches = generated.map(({ rows }) => rows);
const colBatches = generated.map(({ cols }) => cols);
const keyBatches = generated.map(({ keys }) => keys);
const rows = memoize(() => rowBatches.reduce((rows: any[][], batch) => [...rows, ...batch()], []));
const keys = memoize(() => keyBatches.reduce((keys: any[][], batch) => (
!keys.length ? batch() : keys.map((idxs, i) => [...(idxs || []), ...(batch()[i] || [])])
), []));
const cols = memoize(() => colBatches.reduce((cols: any[][], batch) => (
!cols.length ? batch() : cols.map((vals, i) => [...vals, ...batch()[i]])
), []));

return { rows, cols, rowBatches, colBatches, table: new Table(schema, generated.map(({ recordBatch }) => recordBatch)) };
return { rows, cols, keys, rowBatches, colBatches, keyBatches, table: new Table(schema, generated.map(({ recordBatch }) => recordBatch)) };
};
export const recordBatch = (length = 100, schema: Schema = new Schema(defaultRecordBatchChildren.slice())) => {

const generated = schema.fields.map((f) => vectorGenerator.visit(f.type, length));
const vecs = generated.map(({ vector }) => vector);

const keys = memoize(() => generated.map(({ keys }) => keys));
const cols = memoize(() => generated.map(({ values }) => values()));
const rows = ((_cols: () => any[][]) => memoize((rows: any[][] = [], cols: any[][] = _cols()) => {
for (let i = -1; ++i < length; rows[i] = cols.map((vals) => vals[i]));
return rows;
}))(cols);

return { rows, cols, recordBatch: new RecordBatch(schema, length, vecs) };
return { rows, cols, keys, recordBatch: new RecordBatch(schema, length, vecs) };
};
export const null_ = (length = 100) => vectorGenerator.visit(new Null(), length);
export const bool = (length = 100, nullCount = length * 0.2 | 0) => vectorGenerator.visit(new Bool(), length, nullCount);
Expand Down Expand Up @@ -410,7 +416,7 @@ function generateDictionary<T extends Dictionary>(this: TestDataVectorGenerator,
type.dictionaryVector = dict;
(<any> type).dictVals = vals;

return { values, vector: Vector.new(Data.Dictionary(type, 0, length, nullCount, nullBitmap, keys)) };
return { values, keys, vector: Vector.new(Data.Dictionary(type, 0, length, nullCount, nullBitmap, keys)) };
}

function generateUnion<T extends Union>(this: TestDataVectorGenerator, type: T, length = 100, nullCount = length * 0.2 | 0, children?: GeneratedTestData<any>[]): GeneratedTestData<T> {
Expand Down Expand Up @@ -519,7 +525,7 @@ const randomString = ((opts) =>
(length: number) => randomatic('?', length, opts)
)({ chars: `abcdefghijklmnopqrstuvwxyz0123456789_` });

const memoize = (fn: () => any) => () => ((x?: any) => x || (x = fn()))();
const memoize = (fn: () => any) => ((x?: any) => () => x || (x = fn()))();

const encodeUtf8 = ((encoder) =>
encoder.encode.bind(encoder) as (input?: string, options?: { stream?: boolean }) => Uint8Array
Expand Down
58 changes: 52 additions & 6 deletions js/test/unit/generated-data-tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,31 +61,62 @@ describe('Generated Test Data', () => {
describe('MapVector', () => validateVector(generate.map()));
});

function validateTable({ rows, cols, rowBatches, colBatches, table }: { rows: () => any[][], cols: () => any[][], rowBatches: (() => any[][])[], colBatches: (() => any[][])[], table: Table }) {
interface GeneratedTable {
table: Table;
rows: () => any[][];
cols: () => any[][];
keys: () => ArrayLike<number>[];
rowBatches: (() => any[][])[];
colBatches: (() => any[][])[];
keyBatches: (() => ArrayLike<number>[])[];
}

interface GeneratedRecordBatch {
recordBatch: RecordBatch;
rows: () => any[][];
cols: () => any[][];
keys: () => ArrayLike<number>[];
};

interface GeneratedVector {
vector: Vector;
values: () => any[];
keys?: ArrayLike<number>;
}

function validateTable({ keys, rows, cols, rowBatches, colBatches, keyBatches, table }: GeneratedTable) {

validateVector({ values: rows, vector: table });

table.chunks.forEach((recordBatch, i) => {
describe(`recordBatch ${i}`, () => {
validateRecordBatch({ rows: rowBatches[i], cols: colBatches[i], recordBatch });
validateRecordBatch({ keys: keyBatches[i], rows: rowBatches[i], cols: colBatches[i], recordBatch });
});
});

table.schema.fields.forEach((field, i) => {
describe(`column ${i}: ${field}`, () => validateVector({ values: () => cols()[i], vector: table.getColumnAt(i)! }));
describe(`column ${i}: ${field}`, () => validateVector({
keys: keys()[i],
values: () => cols()[i],
vector: table.getColumnAt(i)!
}));
});
}

function validateRecordBatch({ rows, cols, recordBatch }: { rows: () => any[][], cols: () => any[][], recordBatch: RecordBatch }) {
function validateRecordBatch({ rows, cols, keys, recordBatch }: GeneratedRecordBatch) {

validateVector({ values: rows, vector: recordBatch });

recordBatch.schema.fields.forEach((field, i) => {
describe(`${field}`, () => validateVector({ values: () => cols()[i], vector: recordBatch.getChildAt(i)! }));
describe(`${field}`, () => validateVector({
keys: keys()[i],
values: () => cols()[i],
vector: recordBatch.getChildAt(i)!
}));
});
}

function validateVector({ values: createValues, vector }: { values: () => any[], vector: Vector }) {
function validateVector({ values: createValues, vector, keys }: GeneratedVector) {

const values = createValues();

Expand All @@ -101,6 +132,21 @@ function validateVector({ values: createValues, vector }: { values: () => any[],
} catch (e) { throw new Error(`${vector}[${i}]: ${e}`); }
});

if (keys && keys.length > 0) {
test(`dictionary indices should match`, () => {
expect.hasAssertions();
let indices = (vector as any).indices;
let i = -1, n = indices.length;
try {
while (++i < n) {
indices.isValid(i)
? expect(indices.get(i)).toBe(keys[i])
: expect(indices.get(i)).toBe(null);
}
} catch (e) { throw new Error(`${indices}[${i}]: ${e}`); }
});
}

test(`sets expected values`, () => {
expect.hasAssertions();
let i = -1, n = vector.length, actual, expected;
Expand Down

0 comments on commit f7d2b2e

Please sign in to comment.