Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/Arrow.dom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ export {
tableFromIPC, tableToIPC,
MessageReader, AsyncMessageReader, JSONMessageReader,
Message,
RecordBatch,
RecordBatch, recordBatchFromArrays,
util,
Builder, makeBuilder, builderThroughIterable, builderThroughAsyncIterable,
compressionRegistry, CompressionType,
Expand Down
2 changes: 1 addition & 1 deletion src/Arrow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ export { compressionRegistry } from './ipc/compression/registry.js';
export type { Codec } from './ipc/compression/registry.js';
export { MessageReader, AsyncMessageReader, JSONMessageReader } from './ipc/message.js';
export { Message } from './ipc/metadata/message.js';
export { RecordBatch } from './recordbatch.js';
export { RecordBatch, recordBatchFromArrays } from './recordbatch.js';
export type { ArrowJSONLike, FileHandle, Readable, Writable, ReadableWritable, ReadableDOMStreamOptions } from './io/interfaces.js';

export {
Expand Down
28 changes: 27 additions & 1 deletion src/factories.ts
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,35 @@ export function vectorFromArray<T extends dtypes.DataType>(data: DataProps<T>):
export function vectorFromArray<T extends TypedArray | BigIntArray | readonly unknown[]>(data: T): Vector<ArrayDataType<T>>;

export function vectorFromArray(init: any, type?: dtypes.DataType) {
if (init instanceof Data || init instanceof Vector || init.type instanceof dtypes.DataType || ArrayBuffer.isView(init)) {
if (init instanceof Data || init instanceof Vector || init.type instanceof dtypes.DataType) {
return makeVector(init as any);
}
if (ArrayBuffer.isView(init) && !type) {
return makeVector(init as any);
}
if (ArrayBuffer.isView(init) && type) {
// Validate BigInt/number boundary
const isBigIntInput = init instanceof BigInt64Array || init instanceof BigUint64Array;
const isBigIntTarget = type.ArrayType === BigInt64Array || type.ArrayType === BigUint64Array;
if (isBigIntInput && !isBigIntTarget) {
throw new TypeError(
`Cannot convert BigInt input to ${type}. BigInt arrays can only target BigInt-based types (e.g. Int64, Uint64).`
);
}
if (!isBigIntInput && isBigIntTarget) {
throw new TypeError(
`Cannot convert non-BigInt input to ${type}. ${type} requires BigInt values.`
);
}

// Fast path: direct TypedArray conversion for Int and Float types
if (dtypes.DataType.isInt(type) || dtypes.DataType.isFloat(type)) {
const data = init.constructor === type.ArrayType
? init // zero-copy, same TypedArray type
: new (type.ArrayType as any)(init); // standard JS TypedArray conversion
return makeVector({ type, data, offset: 0, length: data.length, nullCount: 0 } as any);
}
}
const options: IterableBuilderOptions = { type: type ?? inferType(init), nullValues: [null] };
const chunks = [...builderThroughIterable(options)(init)];
const vector = chunks.length === 1 ? chunks[0] : chunks.reduce((a, b) => a.concat(b));
Expand Down
57 changes: 57 additions & 0 deletions src/recordbatch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ import { Vector } from './vector.js';
import { Schema, Field } from './schema.js';
import { DataType, Struct, Null, TypeMap } from './type.js';
import { wrapIndex } from './util/vector.js';
import { vectorFromArray } from './factories.js';
import { ArrayDataType, BigIntArray, TypedArray } from './interfaces.js';

import { instance as getVisitor } from './visitor/get.js';
import { instance as setVisitor } from './visitor/set.js';
Expand Down Expand Up @@ -306,6 +308,61 @@ Object.defineProperty(RecordBatch, Symbol.hasInstance, {
},
});

/**
* Creates a new RecordBatch from an object of typed arrays or JavaScript arrays.
*
* @example
* ```ts
* const batch = recordBatchFromArrays({
* a: [1, 2, 3],
* b: new Int8Array([1, 2, 3]),
* });
* ```
*
* @example
* ```ts
* const schema = new Schema([
* new Field('a', new Int32),
* new Field('b', new Utf8),
* ]);
* const batch = recordBatchFromArrays({ a: [1, 2, 3], b: ['x', 'y', 'z'] }, schema);
* ```
*
* @param input An object mapping column names to typed arrays or JavaScript arrays.
* @param schema Optional schema to control column types, ordering, nullability, and metadata.
* @returns A new RecordBatch.
*/
export function recordBatchFromArrays<T extends TypeMap>(
input: Record<string, TypedArray | BigIntArray | readonly unknown[]>,
schema: Schema<T>
): RecordBatch<T>;
export function recordBatchFromArrays<I extends Record<string | number | symbol, TypedArray | BigIntArray | readonly unknown[]>>(
input: I
): RecordBatch<{ [P in keyof I]: ArrayDataType<I[P]> }>;
export function recordBatchFromArrays(
input: Record<string, TypedArray | BigIntArray | readonly unknown[]>,
schema?: Schema
): RecordBatch {
if (schema) {
const children: Data[] = [];
for (const field of schema.fields) {
const col = input[field.name];
if (col === undefined) {
throw new TypeError(
`Schema field "${field.name}" not found in input. ` +
`Available keys: [${Object.keys(input).join(', ')}]`
);
}
children.push(vectorFromArray(col as any, field.type).data[0]);
}
return new RecordBatch(schema, makeData({ type: new Struct(schema.fields), children }));
}
const dataMap: Record<string, Data> = {};
for (const [key, col] of Object.entries(input)) {
dataMap[key] = vectorFromArray(col).data[0];
}
return new RecordBatch(dataMap as any);
}

/** @ignore */
function ensureSameLengthData<T extends TypeMap = any>(
Expand Down
47 changes: 40 additions & 7 deletions src/table.ts
Original file line number Diff line number Diff line change
Expand Up @@ -454,15 +454,48 @@ export function makeTable<I extends Record<string | number | symbol, TypedArray>
* })
* ```
*
* @param input Input an object of typed arrays or JavaScript arrays.
* @example
* ```ts
* const schema = new Schema([
* new Field('a', new Int32),
* new Field('b', new Utf8),
* ]);
* const table = tableFromArrays({ a: [1, 2, 3], b: ['x', 'y', 'z'] }, schema);
* ```
*
* @param input An object mapping column names to typed arrays or JavaScript arrays.
* @param schema Optional schema to control column types, ordering, nullability, and metadata.
* @returns A new Table.
*/
export function tableFromArrays<I extends Record<string | number | symbol, TypedArray | BigIntArray | readonly unknown[]>>(input: I): Table<{ [P in keyof I]: ArrayDataType<I[P]> }> {
type T = { [P in keyof I]: ArrayDataType<I[P]> };
const vecs = {} as VectorsMap<T>;
const inputs = Object.entries(input) as [keyof I, I[keyof I]][];
for (const [key, col] of inputs) {
export function tableFromArrays<T extends TypeMap>(
input: Record<string, TypedArray | BigIntArray | readonly unknown[]>,
schema: Schema<T>
): Table<T>;
export function tableFromArrays<I extends Record<string | number | symbol, TypedArray | BigIntArray | readonly unknown[]>>(
input: I
): Table<{ [P in keyof I]: ArrayDataType<I[P]> }>;
export function tableFromArrays(
input: Record<string, TypedArray | BigIntArray | readonly unknown[]>,
schema?: Schema
): Table {
if (schema) {
const vecs: Vector[] = [];
for (const field of schema.fields) {
const col = input[field.name];
if (col === undefined) {
throw new TypeError(
`Schema field "${field.name}" not found in input. ` +
`Available keys: [${Object.keys(input).join(', ')}]`
);
}
vecs.push(vectorFromArray(col as any, field.type));
}
const [adjustedSchema, batches] = distributeVectorsIntoRecordBatches(schema, vecs);
return new Table(adjustedSchema, batches);
}
const vecs = {} as Record<string, Vector>;
for (const [key, col] of Object.entries(input)) {
vecs[key] = vectorFromArray(col);
}
return new Table<T>(vecs);
return new Table(vecs);
}
97 changes: 96 additions & 1 deletion test/unit/recordbatch/record-batch-tests.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import '../../jest-extensions.js';
import { arange } from '../utils.js';

import { RecordBatch, makeVector } from 'apache-arrow';
import { RecordBatch, makeVector, recordBatchFromArrays, Schema, Field, Int32, Float32, Float64, Utf8, Dictionary } from 'apache-arrow';

function numsRecordBatch(i32Len: number, f32Len: number) {
return new RecordBatch({
Expand Down Expand Up @@ -130,3 +130,98 @@ describe(`RecordBatch`, () => {
});
});
});

describe(`recordBatchFromArrays()`, () => {
test(`creates a RecordBatch from typed arrays and JavaScript arrays`, () => {
const batch = recordBatchFromArrays({
a: new Float32Array([1, 2, 3]),
b: [4, 5, 6],
c: ['x', 'y', 'z'],
});

expect(batch.numRows).toBe(3);
expect(batch.numCols).toBe(3);
expect(batch.getChild('a')!.type).toBeInstanceOf(Float32);
expect(batch.getChild('b')!.type).toBeInstanceOf(Float64);
expect(batch.getChild('c')!.type).toBeInstanceOf(Dictionary);
});

test(`schema overrides type inference`, () => {
const schema = new Schema([
new Field('a', new Int32),
new Field('b', new Utf8),
]);
const batch = recordBatchFromArrays({ a: [1, 2, 3], b: ['x', 'y', 'z'] }, schema);

expect(batch.numRows).toBe(3);
expect(batch.getChild('a')!.type).toBeInstanceOf(Int32);
expect(batch.getChild('b')!.type).toBeInstanceOf(Utf8);
expect(batch.getChild('a')!.toArray()).toEqual(new Int32Array([1, 2, 3]));
});

test(`schema coerces TypedArray type`, () => {
const schema = new Schema([new Field('a', new Int32)]);
const batch = recordBatchFromArrays({ a: new Float32Array([1, 2, 3]) }, schema);
expect(batch.getChild('a')!.type).toBeInstanceOf(Int32);
expect(batch.getChild('a')!.toArray()).toEqual(new Int32Array([1, 2, 3]));
});

test(`preserves schema metadata`, () => {
const schema = new Schema(
[new Field('a', new Int32)],
new Map([['source', 'test']])
);
const batch = recordBatchFromArrays({ a: [1, 2, 3] }, schema);
expect(batch.schema.metadata.get('source')).toBe('test');
});

test(`throws on missing schema field`, () => {
const schema = new Schema([new Field('c', new Int32)]);
expect(() => recordBatchFromArrays({ a: [1] }, schema)).toThrow(TypeError);
expect(() => recordBatchFromArrays({ a: [1] }, schema)).toThrow(/Schema field "c" not found in input/);
});

test(`handles different length columns via ensureSameLengthData`, () => {
const schema = new Schema([
new Field('a', new Int32),
new Field('b', new Int32),
]);
const batch = recordBatchFromArrays({ a: [1, 2, 3], b: [4, 5] }, schema);
expect(batch.numRows).toBe(3);
expect(batch.getChild('a')!).toHaveLength(3);
expect(batch.getChild('b')!).toHaveLength(3);
expect(batch.getChild('b')!.nullCount).toBe(1);
});

test(`preserves field ordering from schema`, () => {
const schema = new Schema([
new Field('b', new Float64),
new Field('a', new Int32),
]);
const batch = recordBatchFromArrays({ a: [1, 2, 3], b: [4.0, 5.0, 6.0] }, schema);
expect(batch.schema.fields[0].name).toBe('b');
expect(batch.schema.fields[1].name).toBe('a');
expect(batch.getChild('b')!.type).toBeInstanceOf(Float64);
expect(batch.getChild('a')!.type).toBeInstanceOf(Int32);
});

test(`handles empty arrays`, () => {
const schema = new Schema([new Field('a', new Int32)]);
const batch = recordBatchFromArrays({ a: new Int32Array(0) }, schema);
expect(batch.numRows).toBe(0);
expect(batch.numCols).toBe(1);
expect(batch.getChild('a')!.type).toBeInstanceOf(Int32);
});

test(`basic creation without schema infers types`, () => {
const batch = recordBatchFromArrays({
f32: new Float32Array([1, 2]),
nums: [1, 2, 3],
strs: ['a', 'b'],
});

expect(batch.getChild('f32')!.type).toBeInstanceOf(Float32);
expect(batch.getChild('nums')!.type).toBeInstanceOf(Float64);
expect(batch.getChild('strs')!.type).toBeInstanceOf(Dictionary);
});
});
Loading