@@ -4,6 +4,7 @@ import { BSONError } from './error';
44import { BSON_BINARY_SUBTYPE_UUID_NEW } from './constants' ;
55import { ByteUtils } from './utils/byte_utils' ;
66import { BSONValue } from './bson_value' ;
7+ import { NumberUtils } from './utils/number_utils' ;
78
89/** @public */
910export type BinarySequence = Uint8Array | number [ ] ;
@@ -58,9 +59,18 @@ export class Binary extends BSONValue {
5859 static readonly SUBTYPE_COLUMN = 7 ;
5960 /** Sensitive BSON type */
6061 static readonly SUBTYPE_SENSITIVE = 8 ;
62+ /** Vector BSON type */
63+ static readonly SUBTYPE_VECTOR = 9 ;
6164 /** User BSON type */
6265 static readonly SUBTYPE_USER_DEFINED = 128 ;
6366
67+ /** datatype of a Binary Vector (subtype: 9) */
68+ static readonly VECTOR_TYPE = Object . freeze ( {
69+ Int8 : 0x03 ,
70+ Float32 : 0x27 ,
71+ PackedBit : 0x10
72+ } as const ) ;
73+
6474 /**
6575 * The bytes of the Binary value.
6676 *
@@ -238,6 +248,11 @@ export class Binary extends BSONValue {
238248 /** @internal */
239249 toExtendedJSON ( options ?: EJSONOptions ) : BinaryExtendedLegacy | BinaryExtended {
240250 options = options || { } ;
251+
252+ if ( this . sub_type === Binary . SUBTYPE_VECTOR ) {
253+ validateBinaryVector ( this ) ;
254+ }
255+
241256 const base64String = ByteUtils . toBase64 ( this . buffer ) ;
242257
243258 const subType = Number ( this . sub_type ) . toString ( 16 ) ;
@@ -310,6 +325,209 @@ export class Binary extends BSONValue {
310325 const subTypeArg = inspect ( this . sub_type , options ) ;
311326 return `Binary.createFromBase64(${ base64Arg } , ${ subTypeArg } )` ;
312327 }
328+
329+ /**
330+ * If this Binary represents a Int8 Vector (`binary.buffer[0] === Binary.VECTOR_TYPE.Int8`),
331+ * returns a copy of the bytes in a new Int8Array.
332+ *
333+ * If the Binary is not a Vector, or the datatype is not Int8, an error is thrown.
334+ */
335+ public toInt8Array ( ) : Int8Array {
336+ if ( this . sub_type !== Binary . SUBTYPE_VECTOR ) {
337+ throw new BSONError ( 'Binary sub_type is not Vector' ) ;
338+ }
339+
340+ if ( this . buffer [ 0 ] !== Binary . VECTOR_TYPE . Int8 ) {
341+ throw new BSONError ( 'Binary datatype field is not Int8' ) ;
342+ }
343+
344+ return new Int8Array (
345+ this . buffer . buffer . slice ( this . buffer . byteOffset + 2 , this . buffer . byteOffset + this . position )
346+ ) ;
347+ }
348+
349+ /**
350+ * If this Binary represents a Float32 Vector (`binary.buffer[0] === Binary.VECTOR_TYPE.Float32`),
351+ * returns a copy of the bytes in a new Float32Array.
352+ *
353+ * If the Binary is not a Vector, or the datatype is not Float32, an error is thrown.
354+ */
355+ public toFloat32Array ( ) : Float32Array {
356+ if ( this . sub_type !== Binary . SUBTYPE_VECTOR ) {
357+ throw new BSONError ( 'Binary sub_type is not Vector' ) ;
358+ }
359+
360+ if ( this . buffer [ 0 ] !== Binary . VECTOR_TYPE . Float32 ) {
361+ throw new BSONError ( 'Binary datatype field is not Float32' ) ;
362+ }
363+
364+ const floatBytes = new Uint8Array (
365+ this . buffer . buffer . slice ( this . buffer . byteOffset + 2 , this . buffer . byteOffset + this . position )
366+ ) ;
367+
368+ if ( NumberUtils . isBigEndian ) ByteUtils . swap32 ( floatBytes ) ;
369+
370+ return new Float32Array ( floatBytes . buffer ) ;
371+ }
372+
373+ /**
374+ * If this Binary represents packed bit Vector (`binary.buffer[0] === Binary.VECTOR_TYPE.PackedBit`),
375+ * returns a copy of the bytes that are packed bits.
376+ *
377+ * Use `toBits` to get the unpacked bits.
378+ *
379+ * If the Binary is not a Vector, or the datatype is not PackedBit, an error is thrown.
380+ */
381+ public toPackedBits ( ) : Uint8Array {
382+ if ( this . sub_type !== Binary . SUBTYPE_VECTOR ) {
383+ throw new BSONError ( 'Binary sub_type is not Vector' ) ;
384+ }
385+
386+ if ( this . buffer [ 0 ] !== Binary . VECTOR_TYPE . PackedBit ) {
387+ throw new BSONError ( 'Binary datatype field is not packed bit' ) ;
388+ }
389+
390+ return new Uint8Array (
391+ this . buffer . buffer . slice ( this . buffer . byteOffset + 2 , this . buffer . byteOffset + this . position )
392+ ) ;
393+ }
394+
395+ /**
396+ * If this Binary represents a Packed bit Vector (`binary.buffer[0] === Binary.VECTOR_TYPE.PackedBit`),
397+ * returns a copy of the bit unpacked into a new Int8Array.
398+ *
399+ * Use `toPackedBits` to get the bits still in packed form.
400+ *
401+ * If the Binary is not a Vector, or the datatype is not PackedBit, an error is thrown.
402+ */
403+ public toBits ( ) : Int8Array {
404+ if ( this . sub_type !== Binary . SUBTYPE_VECTOR ) {
405+ throw new BSONError ( 'Binary sub_type is not Vector' ) ;
406+ }
407+
408+ if ( this . buffer [ 0 ] !== Binary . VECTOR_TYPE . PackedBit ) {
409+ throw new BSONError ( 'Binary datatype field is not packed bit' ) ;
410+ }
411+
412+ const byteCount = this . length ( ) - 2 ;
413+ const bitCount = byteCount * 8 - this . buffer [ 1 ] ;
414+ const bits = new Int8Array ( bitCount ) ;
415+
416+ for ( let bitOffset = 0 ; bitOffset < bits . length ; bitOffset ++ ) {
417+ const byteOffset = ( bitOffset / 8 ) | 0 ;
418+ const byte = this . buffer [ byteOffset + 2 ] ;
419+ const shift = 7 - ( bitOffset % 8 ) ;
420+ const bit = ( byte >> shift ) & 1 ;
421+ bits [ bitOffset ] = bit ;
422+ }
423+
424+ return bits ;
425+ }
426+
427+ /**
428+ * Constructs a Binary representing an Int8 Vector.
429+ * @param array - The array to store as a view on the Binary class
430+ */
431+ public static fromInt8Array ( array : Int8Array ) : Binary {
432+ const buffer = ByteUtils . allocate ( array . byteLength + 2 ) ;
433+ buffer [ 0 ] = Binary . VECTOR_TYPE . Int8 ;
434+ buffer [ 1 ] = 0 ;
435+ const intBytes = new Uint8Array ( array . buffer , array . byteOffset , array . byteLength ) ;
436+ buffer . set ( intBytes , 2 ) ;
437+ return new this ( buffer , this . SUBTYPE_VECTOR ) ;
438+ }
439+
440+ /** Constructs a Binary representing an Float32 Vector. */
441+ public static fromFloat32Array ( array : Float32Array ) : Binary {
442+ const binaryBytes = ByteUtils . allocate ( array . byteLength + 2 ) ;
443+ binaryBytes [ 0 ] = Binary . VECTOR_TYPE . Float32 ;
444+ binaryBytes [ 1 ] = 0 ;
445+
446+ const floatBytes = new Uint8Array ( array . buffer , array . byteOffset , array . byteLength ) ;
447+ binaryBytes . set ( floatBytes , 2 ) ;
448+
449+ if ( NumberUtils . isBigEndian ) ByteUtils . swap32 ( new Uint8Array ( binaryBytes . buffer , 2 ) ) ;
450+
451+ return new this ( binaryBytes , this . SUBTYPE_VECTOR ) ;
452+ }
453+
454+ /**
455+ * Constructs a Binary representing a packed bit Vector.
456+ *
457+ * Use `fromBits` to pack an array of 1s and 0s.
458+ */
459+ public static fromPackedBits ( array : Uint8Array , padding = 0 ) : Binary {
460+ const buffer = ByteUtils . allocate ( array . byteLength + 2 ) ;
461+ buffer [ 0 ] = Binary . VECTOR_TYPE . PackedBit ;
462+ buffer [ 1 ] = padding ;
463+ buffer . set ( array , 2 ) ;
464+ return new this ( buffer , this . SUBTYPE_VECTOR ) ;
465+ }
466+
467+ /**
468+ * Constructs a Binary representing an Packed Bit Vector.
469+ * @param array - The array of 1s and 0s to pack into the Binary instance
470+ */
471+ public static fromBits ( bits : ArrayLike < number > ) : Binary {
472+ const byteLength = ( bits . length + 7 ) >>> 3 ; // ceil(bits.length / 8)
473+ const bytes = new Uint8Array ( byteLength + 2 ) ;
474+ bytes [ 0 ] = Binary . VECTOR_TYPE . PackedBit ;
475+
476+ const remainder = bits . length % 8 ;
477+ bytes [ 1 ] = remainder === 0 ? 0 : 8 - remainder ;
478+
479+ for ( let bitOffset = 0 ; bitOffset < bits . length ; bitOffset ++ ) {
480+ const byteOffset = bitOffset >>> 3 ; // floor(bitOffset / 8)
481+ const bit = bits [ bitOffset ] ;
482+
483+ if ( bit !== 0 && bit !== 1 ) {
484+ throw new BSONError (
485+ `Invalid bit value at ${ bitOffset } : must be 0 or 1, found ${ bits [ bitOffset ] } `
486+ ) ;
487+ }
488+
489+ if ( bit === 0 ) continue ;
490+
491+ const shift = 7 - ( bitOffset % 8 ) ;
492+ bytes [ byteOffset + 2 ] |= bit << shift ;
493+ }
494+
495+ return new this ( bytes , Binary . SUBTYPE_VECTOR ) ;
496+ }
497+ }
498+
499+ export function validateBinaryVector ( vector : Binary ) : void {
500+ if ( vector . sub_type !== Binary . SUBTYPE_VECTOR ) return ;
501+
502+ const size = vector . position ;
503+
504+ // NOTE: Validation is only applied to **KNOWN** vector types
505+ // If a new datatype is introduced, a future version of the library will need to add validation
506+ const datatype = vector . buffer [ 0 ] ;
507+
508+ // NOTE: We do not enable noUncheckedIndexedAccess so TS believes this is always number
509+ // a Binary vector may be empty, in which case the padding is undefined
510+ // this possible value is tolerable for our validation checks
511+ const padding : number | undefined = vector . buffer [ 1 ] ;
512+
513+ if (
514+ ( datatype === Binary . VECTOR_TYPE . Float32 || datatype === Binary . VECTOR_TYPE . Int8 ) &&
515+ padding !== 0
516+ ) {
517+ throw new BSONError ( 'Invalid Vector: padding must be zero for int8 and float32 vectors' ) ;
518+ }
519+
520+ if ( datatype === Binary . VECTOR_TYPE . PackedBit && padding !== 0 && size === 2 ) {
521+ throw new BSONError (
522+ 'Invalid Vector: padding must be zero for packed bit vectors that are empty'
523+ ) ;
524+ }
525+
526+ if ( datatype === Binary . VECTOR_TYPE . PackedBit && padding > 7 ) {
527+ throw new BSONError (
528+ `Invalid Vector: padding must be a value between 0 and 7. found: ${ padding } `
529+ ) ;
530+ }
313531}
314532
315533/** @public */
0 commit comments