diff --git a/src/binary.ts b/src/binary.ts index 1fe098058..f7cd61b1f 100644 --- a/src/binary.ts +++ b/src/binary.ts @@ -341,6 +341,8 @@ export class Binary extends BSONValue { throw new BSONError('Binary datatype field is not Int8'); } + validateBinaryVector(this); + return new Int8Array( this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position) ); @@ -361,6 +363,8 @@ export class Binary extends BSONValue { throw new BSONError('Binary datatype field is not Float32'); } + validateBinaryVector(this); + const floatBytes = new Uint8Array( this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position) ); @@ -387,6 +391,8 @@ export class Binary extends BSONValue { throw new BSONError('Binary datatype field is not packed bit'); } + validateBinaryVector(this); + return new Uint8Array( this.buffer.buffer.slice(this.buffer.byteOffset + 2, this.buffer.byteOffset + this.position) ); @@ -409,6 +415,8 @@ export class Binary extends BSONValue { throw new BSONError('Binary datatype field is not packed bit'); } + validateBinaryVector(this); + const byteCount = this.length() - 2; const bitCount = byteCount * 8 - this.buffer[1]; const bits = new Int8Array(bitCount); @@ -434,7 +442,9 @@ export class Binary extends BSONValue { buffer[1] = 0; const intBytes = new Uint8Array(array.buffer, array.byteOffset, array.byteLength); buffer.set(intBytes, 2); - return new this(buffer, this.SUBTYPE_VECTOR); + const bin = new this(buffer, this.SUBTYPE_VECTOR); + validateBinaryVector(bin); + return bin; } /** Constructs a Binary representing an Float32 Vector. */ @@ -448,7 +458,9 @@ export class Binary extends BSONValue { if (NumberUtils.isBigEndian) ByteUtils.swap32(new Uint8Array(binaryBytes.buffer, 2)); - return new this(binaryBytes, this.SUBTYPE_VECTOR); + const bin = new this(binaryBytes, this.SUBTYPE_VECTOR); + validateBinaryVector(bin); + return bin; } /** @@ -461,7 +473,9 @@ export class Binary extends BSONValue { buffer[0] = Binary.VECTOR_TYPE.PackedBit; buffer[1] = padding; buffer.set(array, 2); - return new this(buffer, this.SUBTYPE_VECTOR); + const bin = new this(buffer, this.SUBTYPE_VECTOR); + validateBinaryVector(bin); + return bin; } /** @@ -517,6 +531,12 @@ export function validateBinaryVector(vector: Binary): void { throw new BSONError('Invalid Vector: padding must be zero for int8 and float32 vectors'); } + if (datatype === Binary.VECTOR_TYPE.Float32) { + if (size !== 0 && size - 2 !== 0 && (size - 2) % 4 !== 0) { + throw new BSONError('Invalid Vector: Float32 vector must contain a multiple of 4 bytes'); + } + } + if (datatype === Binary.VECTOR_TYPE.PackedBit && padding !== 0 && size === 2) { throw new BSONError( 'Invalid Vector: padding must be zero for packed bit vectors that are empty' diff --git a/test/node/bson_binary_vector.spec.test.ts b/test/node/bson_binary_vector.spec.test.ts index 87f573abe..50475d94d 100644 --- a/test/node/bson_binary_vector.spec.test.ts +++ b/test/node/bson_binary_vector.spec.test.ts @@ -1,6 +1,7 @@ +import * as util from 'util'; import * as fs from 'fs'; import * as path from 'path'; -import { BSON, BSONError, Binary } from '../register-bson'; +import { BSON, BSONError, Binary, EJSON } from '../register-bson'; import { expect } from 'chai'; const { toHex, fromHex } = BSON.onDemand.ByteUtils; @@ -8,7 +9,7 @@ const { toHex, fromHex } = BSON.onDemand.ByteUtils; type VectorHexType = '0x03' | '0x27' | '0x10'; type VectorTest = { description: string; - vector: (number | string)[]; + vector?: number[]; valid: boolean; dtype_hex: VectorHexType; padding?: number; @@ -17,15 +18,11 @@ type VectorTest = { type VectorSuite = { description: string; test_key: string; tests: VectorTest[] }; function fixFloats(f: string | number): number { + // Should be nothing to "fix" but validates we didn't get + // an unexpected type so we don't silently fail on it during the test if (typeof f === 'number') { return f; } - if (f === 'inf') { - return Infinity; - } - if (f === '-inf') { - return -Infinity; - } throw new Error(`test format error: unknown float value: ${f}`); } @@ -49,7 +46,20 @@ function fixBits(f: number | string): number { return f; } -function make(vector: (number | string)[], dtype_hex: VectorHexType, padding?: number): Binary { +function dtypeToHelper(dtype_hex: string) { + switch (dtype_hex) { + case '0x10' /* packed_bit */: + return 'fromPackedBits'; + case '0x03' /* int8 */: + return 'fromInt8Array'; + case '0x27' /* float32 */: + return 'fromFloat32Array'; + default: + throw new Error(`Unknown dtype_hex: ${dtype_hex}`); + } +} + +function make(vector: number[], dtype_hex: VectorHexType, padding?: number): Binary { let binary: Binary; switch (dtype_hex) { case '0x10' /* packed_bit */: @@ -87,21 +97,152 @@ const invalidTestExpectedError = new Map() 'Invalid Vector: padding must be a value between 0 and 7' ) .set('Negative padding PACKED_BIT', 'Invalid Vector: padding must be a value between 0 and 7') - // skipped - .set('Overflow Vector PACKED_BIT', false) - .set('Underflow Vector PACKED_BIT', false) - .set('Overflow Vector INT8', false) - .set('Underflow Vector INT8', false) - .set('INT8 with float inputs', false) - // duplicate test! but also skipped. - .set('Vector with float values PACKED_BIT', false) - .set('Vector with float values PACKED_BIT', false); + .set( + 'Insufficient vector data FLOAT32', + 'Invalid Vector: Float32 vector must contain a multiple of 4 bytes' + ) + // These are not possible given the constraints of the input types allowed: + // our helpers will throw an "unsupported_error" for these + .set('Overflow Vector PACKED_BIT', 'unsupported_error') + .set('Underflow Vector PACKED_BIT', 'unsupported_error') + .set('Overflow Vector INT8', 'unsupported_error') + .set('Underflow Vector INT8', 'unsupported_error') + .set('INT8 with float inputs', 'unsupported_error') + .set('Vector with float values PACKED_BIT', 'unsupported_error'); + +function catchError( + fn: () => T +): { status: 'returned'; result: T } | { status: 'thrown'; result: Error } { + try { + return { status: 'returned', result: fn() }; + } catch (error) { + return { status: 'thrown', result: error }; + } +} + +function testVectorInvalidInputValues(test: VectorTest, expectedErrorMessage: string) { + const binaryCreation = catchError(make.bind(null, test.vector!, test.dtype_hex, test.padding)); + const bsonBytesCreation = + binaryCreation.status !== 'thrown' + ? catchError(BSON.serialize.bind(null, { bin: binaryCreation.result })) + : undefined; + const ejsonStringCreation = + binaryCreation.status !== 'thrown' + ? catchError(BSON.EJSON.stringify.bind(null, { bin: binaryCreation.result })) + : undefined; + + const binaryHelperValidations = [ + 'Padding specified with no vector data PACKED_BIT', + 'Exceeding maximum padding PACKED_BIT', + 'Negative padding PACKED_BIT', + ...Array.from(invalidTestExpectedError.entries()) + .filter(([, v]) => v === 'unsupported_error') + .map(([k]) => k) + ]; + + const errorType = expectedErrorMessage === 'unsupported_error' ? Error : BSONError; + const errorName = expectedErrorMessage === 'unsupported_error' ? 'Error' : 'BSONError'; + + const check = outcome => { + expect(outcome).to.exist; + expect(outcome.status).to.equal('thrown'); + expect(outcome.result).to.be.instanceOf(errorType); + expect(outcome.result).to.match(new RegExp(expectedErrorMessage)); + }; + + if (binaryHelperValidations.includes(test.description)) { + describe('when creating a BSON Vector given invalid input values', () => { + it(`Binary.${dtypeToHelper(test.dtype_hex)}() throws a ${errorName}`, function () { + check(binaryCreation); + }); + }); + } else { + expect(errorName).to.equal('BSONError'); // unsupported_error are only when making vectors + + describe('when encoding a BSON Vector given invalid input values', () => { + it(`Binary.${dtypeToHelper(test.dtype_hex)}() does not throw`, function () { + expect(binaryCreation).to.have.property('status', 'returned'); + }); + + it(`BSON.serialize() throws a BSONError`, function () { + check(bsonBytesCreation); + }); + + it(`EJSON.stringify() throws a BSONError`, function () { + check(ejsonStringCreation); + }); + }); + } +} + +function testVectorInvalidBSONBytes(test: VectorTest, expectedErrorMessage: string) { + describe('when encoding a Binary Vector made from invalid bytes', () => { + it(`BSON.serialize() throw a BSONError`, function () { + let thrownError: Error | undefined; + const bin = BSON.deserialize(Buffer.from(test.canonical_bson!, 'hex')); + + try { + BSON.serialize(bin); + } catch (error) { + thrownError = error; + } + + expect(thrownError, thrownError?.stack).to.be.instanceOf(BSONError); + expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage)); + }); + + const toHelper = dtypeToHelper(test.dtype_hex).replace('from', 'to'); + it(`Binary.${toHelper}() throw a BSONError`, function () { + let thrownError: Error | undefined; + const bin = BSON.deserialize(Buffer.from(test.canonical_bson!, 'hex')); + + try { + bin.vector[toHelper](); + } catch (error) { + thrownError = error; + } + + expect(thrownError, thrownError?.stack).to.be.instanceOf(BSONError); + expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage)); + }); + + if (toHelper === 'toPackedBits') { + it(`Binary.toBits() throw a BSONError`, function () { + let thrownError: Error | undefined; + const bin = BSON.deserialize(Buffer.from(test.canonical_bson!, 'hex')); + + try { + bin.vector.toBits(); + } catch (error) { + thrownError = error; + } + + expect(thrownError, thrownError?.stack).to.be.instanceOf(BSONError); + expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage)); + }); + } + + it(`EJSON.stringify() throw a BSONError`, function () { + let thrownError: Error | undefined; + const bin = BSON.deserialize(Buffer.from(test.canonical_bson!, 'hex')); + + try { + EJSON.stringify(bin); + } catch (error) { + thrownError = error; + } + + expect(thrownError, thrownError?.stack).to.be.instanceOf(BSONError); + expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage)); + }); + }); +} describe('BSON Binary Vector spec tests', () => { const tests: Record = Object.create(null); for (const file of fs.readdirSync(path.join(__dirname, 'specs/bson-binary-vector'))) { - tests[path.basename(file, '.json')] = JSON.parse( + tests[path.basename(file, '.json')] = EJSON.parse( fs.readFileSync(path.join(__dirname, 'specs/bson-binary-vector', file), 'utf8') ); } @@ -120,20 +261,22 @@ describe('BSON Binary Vector spec tests', () => { * > MUST assert that the input float array is the same after encoding and decoding. */ for (const test of valid) { - it(`encode ${test.description}`, function () { - const bin = make(test.vector, test.dtype_hex, test.padding); + describe(test.description, () => { + it(`calling Binary.${dtypeToHelper(test.dtype_hex)}() with input numbers and serializing it does not throw`, function () { + const bin = make(test.vector!, test.dtype_hex, test.padding); - const buffer = BSON.serialize({ [suite.test_key]: bin }); - expect(toHex(buffer)).to.equal(test.canonical_bson!.toLowerCase()); - }); + const buffer = BSON.serialize({ [suite.test_key]: bin }); + expect(toHex(buffer)).to.equal(test.canonical_bson!.toLowerCase()); + }); - it(`decode ${test.description}`, function () { - const canonical_bson = fromHex(test.canonical_bson!.toLowerCase()); - const doc = BSON.deserialize(canonical_bson); + it(`creating a Binary instance from BSON bytes does not throw`, function () { + const canonical_bson = fromHex(test.canonical_bson!.toLowerCase()); + const doc = BSON.deserialize(canonical_bson); - expect(doc[suite.test_key].sub_type).to.equal(0x09); - expect(doc[suite.test_key].buffer[0]).to.equal(+test.dtype_hex); - expect(doc[suite.test_key].buffer[1]).to.equal(test.padding); + expect(doc[suite.test_key].sub_type).to.equal(0x09); + expect(doc[suite.test_key].buffer[0]).to.equal(+test.dtype_hex); + expect(doc[suite.test_key].buffer[1]).to.equal(test.padding); + }); }); } }); @@ -147,46 +290,18 @@ describe('BSON Binary Vector spec tests', () => { for (const test of invalid) { const expectedErrorMessage = invalidTestExpectedError.get(test.description); - it(`bson: ${test.description}`, function () { - let thrownError: Error | undefined; - try { - const bin = make(test.vector, test.dtype_hex, test.padding); - BSON.serialize({ bin }); - } catch (error) { - thrownError = error; + describe(test.description, () => { + if (test.canonical_bson != null) { + testVectorInvalidBSONBytes(test, expectedErrorMessage); } - if (thrownError?.message.startsWith('unsupported_error')) { - expect( - expectedErrorMessage, - 'We expect a certain error message but got an unsupported error' - ).to.be.false; - this.skip(); + if (test.vector != null) { + testVectorInvalidInputValues(test, expectedErrorMessage); } - expect(thrownError).to.be.instanceOf(BSONError); - expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage)); - }); - - it(`extended json: ${test.description}`, function () { - let thrownError: Error | undefined; - try { - const bin = make(test.vector, test.dtype_hex, test.padding); - BSON.EJSON.stringify({ bin }); - } catch (error) { - thrownError = error; + if (test.vector == null && test.canonical_bson == null) { + throw new Error('not testing anything for: ' + util.inspect(test)); } - - if (thrownError?.message.startsWith('unsupported_error')) { - expect( - expectedErrorMessage, - 'We expect a certain error message but got an unsupported error' - ).to.be.false; - this.skip(); - } - - expect(thrownError).to.be.instanceOf(BSONError); - expect(thrownError?.message).to.match(new RegExp(expectedErrorMessage)); }); } }); diff --git a/test/node/specs/bson-binary-vector/float32.json b/test/node/specs/bson-binary-vector/float32.json index 872c43532..72dafce10 100644 --- a/test/node/specs/bson-binary-vector/float32.json +++ b/test/node/specs/bson-binary-vector/float32.json @@ -32,7 +32,7 @@ { "description": "Infinity Vector FLOAT32", "valid": true, - "vector": ["-inf", 0.0, "inf"], + "vector": [{"$numberDouble": "-Infinity"}, 0.0, {"$numberDouble": "Infinity"} ], "dtype_hex": "0x27", "dtype_alias": "FLOAT32", "padding": 0, @@ -44,8 +44,22 @@ "vector": [127.0, 7.0], "dtype_hex": "0x27", "dtype_alias": "FLOAT32", - "padding": 3 + "padding": 3, + "canonical_bson": "1C00000005766563746F72000A0000000927030000FE420000E04000" + }, + { + "description": "Insufficient vector data with 3 bytes FLOAT32", + "valid": false, + "dtype_hex": "0x27", + "dtype_alias": "FLOAT32", + "canonical_bson": "1700000005766563746F7200050000000927002A2A2A00" + }, + { + "description": "Insufficient vector data with 5 bytes FLOAT32", + "valid": false, + "dtype_hex": "0x27", + "dtype_alias": "FLOAT32", + "canonical_bson": "1900000005766563746F7200070000000927002A2A2A2A2A00" } ] } - diff --git a/test/node/specs/bson-binary-vector/int8.json b/test/node/specs/bson-binary-vector/int8.json index 7529721e5..29524fb61 100644 --- a/test/node/specs/bson-binary-vector/int8.json +++ b/test/node/specs/bson-binary-vector/int8.json @@ -42,7 +42,8 @@ "vector": [127, 7], "dtype_hex": "0x03", "dtype_alias": "INT8", - "padding": 3 + "padding": 3, + "canonical_bson": "1600000005766563746F7200040000000903037F0700" }, { "description": "INT8 with float inputs", @@ -54,4 +55,3 @@ } ] } - diff --git a/test/node/specs/bson-binary-vector/packed_bit.json b/test/node/specs/bson-binary-vector/packed_bit.json index 035776e87..a220e7e31 100644 --- a/test/node/specs/bson-binary-vector/packed_bit.json +++ b/test/node/specs/bson-binary-vector/packed_bit.json @@ -8,7 +8,8 @@ "vector": [], "dtype_hex": "0x10", "dtype_alias": "PACKED_BIT", - "padding": 1 + "padding": 1, + "canonical_bson": "1400000005766563746F72000200000009100100" }, { "description": "Simple Vector PACKED_BIT", @@ -61,21 +62,14 @@ "dtype_alias": "PACKED_BIT", "padding": 0 }, - { - "description": "Padding specified with no vector data PACKED_BIT", - "valid": false, - "vector": [], - "dtype_hex": "0x10", - "dtype_alias": "PACKED_BIT", - "padding": 1 - }, { "description": "Exceeding maximum padding PACKED_BIT", "valid": false, "vector": [1], "dtype_hex": "0x10", "dtype_alias": "PACKED_BIT", - "padding": 8 + "padding": 8, + "canonical_bson": "1500000005766563746F7200030000000910080100" }, { "description": "Negative padding PACKED_BIT", @@ -84,15 +78,6 @@ "dtype_hex": "0x10", "dtype_alias": "PACKED_BIT", "padding": -1 - }, - { - "description": "Vector with float values PACKED_BIT", - "valid": false, - "vector": [127.5], - "dtype_hex": "0x10", - "dtype_alias": "PACKED_BIT", - "padding": 0 } ] } -