@@ -8,6 +8,7 @@ import { webByteUtils } from '../../src/utils/web_byte_utils';
88import * as sinon from 'sinon' ;
99import { loadCJSModuleBSON , loadReactNativeCJSModuleBSON , loadESModuleBSON } from '../load_bson' ;
1010import * as crypto from 'node:crypto' ;
11+ import { BSONError , BSONUTF8Error } from '../../src/error' ;
1112
1213type ByteUtilTest < K extends keyof ByteUtils > = {
1314 name : string ;
@@ -399,6 +400,8 @@ const fromUTF8Tests: ByteUtilTest<'encodeUTF8Into'>[] = [
399400 }
400401 }
401402] ;
403+
404+
402405const toUTF8Tests : ByteUtilTest < 'toUTF8' > [ ] = [
403406 {
404407 name : 'should create utf8 string from buffer input' ,
@@ -416,6 +419,14 @@ const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [
416419 expect ( output ) . to . be . a ( 'string' ) . with . lengthOf ( 0 ) ;
417420 }
418421 } ,
422+ {
423+ name : 'should insert replacement character fatal is false and string is invalid' ,
424+ inputs : [ Buffer . from ( '616263f09fa4' , 'hex' ) , 0 , 7 , false ] ,
425+ expectation ( { error, output } ) {
426+ expect ( error ) . to . not . exist ;
427+ expect ( output ) . to . equal ( 'abc\uFFFD' ) ;
428+ }
429+ } ,
419430 {
420431 name : 'should throw an error if fatal is set and string is invalid' ,
421432 inputs : [ Buffer . from ( '616263f09fa4' , 'hex' ) , 0 , 7 , true ] ,
@@ -424,14 +435,168 @@ const toUTF8Tests: ByteUtilTest<'toUTF8'>[] = [
424435 }
425436 } ,
426437 {
427- name : 'should insert replacement character fatal is false and string is invalid' ,
428- inputs : [ Buffer . from ( '616263f09fa4' , 'hex' ) , 0 , 7 , false ] ,
429- expectation ( { error, output } ) {
430- expect ( error ) . to . not . exist ;
431- expect ( output ) . to . equal ( 'abc\uFFFD' ) ;
438+ name : 'throw an error if fatal is set and string contains overlong encoding' ,
439+ inputs : [ Buffer . from ( '11000000025f0005000000f08282ac0000' , 'hex' ) , 0 , 18 , true ] ,
440+ expectation ( { error } ) {
441+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
442+ }
443+ } ,
444+ {
445+ name : 'throw an error if fatal is set and string contains invalid bytes' ,
446+ inputs : [ Buffer . from ( 'abcff' , 'hex' ) , 0 , 2 , true ] ,
447+ expectation ( { error } ) {
448+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
449+ }
450+ } ,
451+ {
452+ name : 'throw an error if fatal is set and string contains an unexpected continuation byte' ,
453+ inputs : [ Buffer . from ( '7F80' , 'hex' ) , 0 , 2 , true ] ,
454+ expectation ( { error } ) {
455+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
456+ }
457+ } ,
458+ { inputs : [ Buffer . from ( '0xFF' , 'hex' ) , 0 , 1 , true ] , name : 'throws when provided with invalid code' , expectation ( { error } ) {
459+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
460+ }
461+ } ,
462+ { inputs : [ Buffer . from ( '0xC0' , 'hex' ) , 0 , 1 , true ] , name : 'throws when provided with ends early' , expectation ( { error } ) {
463+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
464+ }
465+ } ,
466+ { inputs : [ Buffer . from ( '0xE0' , 'hex' ) , 0 , 1 , true ] , name : 'throws when provided with ends early 2' , expectation ( { error } ) {
467+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
468+ }
469+ } ,
470+ { inputs : [ Buffer . from ( '0xC000' , 'hex' ) , 0 , 2 , true ] , name : 'throws when provided with invalid trail' , expectation ( { error } ) {
471+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
472+ }
473+ } ,
474+ { inputs : [ Buffer . from ( '0xC0C0' , 'hex' ) , 0 , 2 , true ] , name : 'throws when provided with invalid trail 2' , expectation ( { error } ) {
475+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
476+ }
477+ } ,
478+ { inputs : [ Buffer . from ( '0xE000' , 'hex' ) , 0 , 2 , true ] , name : 'throws when provided with invalid trail 3' , expectation ( { error } ) {
479+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
480+ }
481+ } ,
482+ { inputs : [ Buffer . from ( '0xE0C0' , 'hex' ) , 0 , 2 , true ] , name : 'throws when provided with invalid trail 4' , expectation ( { error } ) {
483+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
484+ }
485+ } ,
486+ { inputs : [ Buffer . from ( '0xE08000' , 'hex' ) , 0 , 3 , true ] , name : 'throws when provided with invalid trail 5' , expectation ( { error } ) {
487+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
488+ }
489+ } ,
490+ { inputs : [ Buffer . from ( '0xE080C0' , 'hex' ) , 0 , 3 , true ] , name : 'throws when provided with invalid trail 6' , expectation ( { error } ) {
491+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
492+ }
493+ } ,
494+ { inputs : [ Buffer . from ( '0xFC8080808080' , 'hex' ) , 0 , 6 , true ] , name : 'throws when provided with > 0x10FFFF' , expectation ( { error } ) {
495+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
496+ }
497+ } ,
498+ { inputs : [ Buffer . from ( '0xFE8080808080' , 'hex' ) , 0 , 6 , true ] , name : 'throws when provided with obsolete lead byte' , expectation ( { error } ) {
499+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
500+ }
501+ } ,
502+
503+ // Overlong encodings
504+ { inputs : [ Buffer . from ( '0xC080' , 'hex' ) , 0 , 2 , true ] , name : 'throws when provided with overlong U+0000 - 2 bytes' , expectation ( { error } ) {
505+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
506+ }
507+ } ,
508+ { inputs : [ Buffer . from ( '0xE08080' , 'hex' ) , 0 , 3 , true ] , name : 'throws when provided with overlong U+0000 - 3 bytes' , expectation ( { error } ) {
509+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
510+ }
511+ } ,
512+ { inputs : [ Buffer . from ( '0xF0808080' , 'hex' ) , 0 , 4 , true ] , name : 'throws when provided with overlong U+0000 - 4 bytes' , expectation ( { error } ) {
513+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
514+ }
515+ } ,
516+ { inputs : [ Buffer . from ( '0xF880808080' , 'hex' ) , 0 , 5 , true ] , name : 'throws when provided with overlong U+0000 - 5 bytes' , expectation ( { error } ) {
517+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
518+ }
519+ } ,
520+ { inputs : [ Buffer . from ( '0xFC8080808080' , 'hex' ) , 0 , 6 , true ] , name : 'throws when provided with overlong U+0000 - 6 bytes' , expectation ( { error } ) {
521+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
522+ }
523+ } ,
524+
525+ { inputs : [ Buffer . from ( '0xC1BF' , 'hex' ) , 0 , 2 , true ] , name : 'throws when provided with overlong U+007F - 2 bytes' , expectation ( { error } ) {
526+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
527+ }
528+ } ,
529+ { inputs : [ Buffer . from ( '0xE081BF' , 'hex' ) , 0 , 3 , true ] , name : 'throws when provided with overlong U+007F - 3 bytes' , expectation ( { error } ) {
530+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
531+ }
532+ } ,
533+ { inputs : [ Buffer . from ( '0xF08081BF' , 'hex' ) , 0 , 4 , true ] , name : 'throws when provided with overlong U+007F - 4 bytes' , expectation ( { error } ) {
534+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
535+ }
536+ } ,
537+ { inputs : [ Buffer . from ( '0xF8808081BF' , 'hex' ) , 0 , 5 , true ] , name : 'throws when provided with overlong U+007F - 5 bytes' , expectation ( { error } ) {
538+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
539+ }
540+ } ,
541+ { inputs : [ Buffer . from ( '0xFC80808081BF' , 'hex' ) , 0 , 6 , true ] , name : 'throws when provided with overlong U+007F - 6 bytes' , expectation ( { error } ) {
542+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
543+ }
544+ } ,
545+
546+ { inputs : [ Buffer . from ( '0xE09FBF' , 'hex' ) , 0 , 3 , true ] , name : 'throws when provided with overlong U+07FF - 3 bytes' , expectation ( { error } ) {
547+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
548+ }
549+ } ,
550+ { inputs : [ Buffer . from ( '0xF0809FBF' , 'hex' ) , 0 , 4 , true ] , name : 'throws when provided with overlong U+07FF - 4 bytes' , expectation ( { error } ) {
551+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
552+ }
553+ } ,
554+ { inputs : [ Buffer . from ( '0xF880809FBF' , 'hex' ) , 0 , 5 , true ] , name : 'throws when provided with overlong U+07FF - 5 bytes' , expectation ( { error } ) {
555+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
556+ }
557+ } ,
558+ { inputs : [ Buffer . from ( '0xFC8080809FBF' , 'hex' ) , 0 , 6 , true ] , name : 'throws when provided with overlong U+07FF - 6 bytes' , expectation ( { error } ) {
559+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
560+ }
561+ } ,
562+
563+ { inputs : [ Buffer . from ( '0xF08FBFBF' , 'hex' ) , 0 , 4 , true ] , name : 'throws when provided with overlong U+FFFF - 4 bytes' , expectation ( { error } ) {
564+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
565+ }
566+ } ,
567+ { inputs : [ Buffer . from ( '0xF8808FBFBF' , 'hex' ) , 0 , 5 , true ] , name : 'throws when provided with overlong U+FFFF - 5 bytes' , expectation ( { error } ) {
568+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
569+ }
570+ } ,
571+ { inputs : [ Buffer . from ( '0xFC80808FBFBF' , 'hex' ) , 0 , 6 , true ] , name : 'throws when provided with overlong U+FFFF - 6 bytes' , expectation ( { error } ) {
572+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
573+ }
574+ } ,
575+
576+ { inputs : [ Buffer . from ( '0xF8848FBFBF' , 'hex' ) , 0 , 5 , true ] , name : 'throws when provided with overlong U+10FFFF - 5 bytes' , expectation ( { error } ) {
577+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
578+ }
579+ } ,
580+ { inputs : [ Buffer . from ( '0xFC80848FBFBF' , 'hex' ) , 0 , 6 , true ] , name : 'throws when provided with overlong U+10FFFF - 6 bytes' , expectation ( { error } ) {
581+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
582+ }
583+ } ,
584+
585+ // UTF-16 surrogates encoded as code points in UTF-8
586+ { inputs : [ Buffer . from ( '0xEDA080' , 'hex' ) , 0 , 3 , true ] , name : 'throws when provided with lead surrogate' , expectation ( { error } ) {
587+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
588+ }
589+ } ,
590+ { inputs : [ Buffer . from ( '0xEDB080' , 'hex' ) , 0 , 3 , true ] , name : 'throws when provided with trail surrogate' , expectation ( { error } ) {
591+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
592+ }
593+ } ,
594+ { inputs : [ Buffer . from ( '0xEDA080EDB080' , 'hex' ) , 0 , 6 , true ] , name : 'throws when provided with surrogate pair' , expectation ( { error } ) {
595+ expect ( error ) . to . match ( / I n v a l i d U T F - 8 s t r i n g i n B S O N d o c u m e n t / i) ;
432596 }
433597 }
434598] ;
599+
435600const utf8ByteLengthTests : ByteUtilTest < 'utf8ByteLength' > [ ] = [
436601 {
437602 name : 'should return zero for empty string' ,
@@ -801,4 +966,54 @@ describe('ByteUtils', () => {
801966 } ) ;
802967 }
803968 }
969+
970+ let bad = [
971+ { encoding : 'utf-8' , input : [ 0xFF ] , name : 'invalid code' } ,
972+ { encoding : 'utf-8' , input : [ 0xC0 ] , name : 'ends early' } ,
973+ { encoding : 'utf-8' , input : [ 0xE0 ] , name : 'ends early 2' } ,
974+ { encoding : 'utf-8' , input : [ 0xC0 , 0x00 ] , name : 'invalid trail' } ,
975+ { encoding : 'utf-8' , input : [ 0xC0 , 0xC0 ] , name : 'invalid trail 2' } ,
976+ { encoding : 'utf-8' , input : [ 0xE0 , 0x00 ] , name : 'invalid trail 3' } ,
977+ { encoding : 'utf-8' , input : [ 0xE0 , 0xC0 ] , name : 'invalid trail 4' } ,
978+ { encoding : 'utf-8' , input : [ 0xE0 , 0x80 , 0x00 ] , name : 'invalid trail 5' } ,
979+ { encoding : 'utf-8' , input : [ 0xE0 , 0x80 , 0xC0 ] , name : 'invalid trail 6' } ,
980+ { encoding : 'utf-8' , input : [ 0xFC , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 ] , name : '> 0x10FFFF' } ,
981+ { encoding : 'utf-8' , input : [ 0xFE , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 ] , name : 'obsolete lead byte' } ,
982+
983+ // Overlong encodings
984+ { encoding : 'utf-8' , input : [ 0xC0 , 0x80 ] , name : 'overlong U+0000 - 2 bytes' } ,
985+ { encoding : 'utf-8' , input : [ 0xE0 , 0x80 , 0x80 ] , name : 'overlong U+0000 - 3 bytes' } ,
986+ { encoding : 'utf-8' , input : [ 0xF0 , 0x80 , 0x80 , 0x80 ] , name : 'overlong U+0000 - 4 bytes' } ,
987+ { encoding : 'utf-8' , input : [ 0xF8 , 0x80 , 0x80 , 0x80 , 0x80 ] , name : 'overlong U+0000 - 5 bytes' } ,
988+ { encoding : 'utf-8' , input : [ 0xFC , 0x80 , 0x80 , 0x80 , 0x80 , 0x80 ] , name : 'overlong U+0000 - 6 bytes' } ,
989+
990+ { encoding : 'utf-8' , input : [ 0xC1 , 0xBF ] , name : 'overlong U+007F - 2 bytes' } ,
991+ { encoding : 'utf-8' , input : [ 0xE0 , 0x81 , 0xBF ] , name : 'overlong U+007F - 3 bytes' } ,
992+ { encoding : 'utf-8' , input : [ 0xF0 , 0x80 , 0x81 , 0xBF ] , name : 'overlong U+007F - 4 bytes' } ,
993+ { encoding : 'utf-8' , input : [ 0xF8 , 0x80 , 0x80 , 0x81 , 0xBF ] , name : 'overlong U+007F - 5 bytes' } ,
994+ { encoding : 'utf-8' , input : [ 0xFC , 0x80 , 0x80 , 0x80 , 0x81 , 0xBF ] , name : 'overlong U+007F - 6 bytes' } ,
995+
996+ { encoding : 'utf-8' , input : [ 0xE0 , 0x9F , 0xBF ] , name : 'overlong U+07FF - 3 bytes' } ,
997+ { encoding : 'utf-8' , input : [ 0xF0 , 0x80 , 0x9F , 0xBF ] , name : 'overlong U+07FF - 4 bytes' } ,
998+ { encoding : 'utf-8' , input : [ 0xF8 , 0x80 , 0x80 , 0x9F , 0xBF ] , name : 'overlong U+07FF - 5 bytes' } ,
999+ { encoding : 'utf-8' , input : [ 0xFC , 0x80 , 0x80 , 0x80 , 0x9F , 0xBF ] , name : 'overlong U+07FF - 6 bytes' } ,
1000+
1001+ { encoding : 'utf-8' , input : [ 0xF0 , 0x8F , 0xBF , 0xBF ] , name : 'overlong U+FFFF - 4 bytes' } ,
1002+ { encoding : 'utf-8' , input : [ 0xF8 , 0x80 , 0x8F , 0xBF , 0xBF ] , name : 'overlong U+FFFF - 5 bytes' } ,
1003+ { encoding : 'utf-8' , input : [ 0xFC , 0x80 , 0x80 , 0x8F , 0xBF , 0xBF ] , name : 'overlong U+FFFF - 6 bytes' } ,
1004+
1005+ { encoding : 'utf-8' , input : [ 0xF8 , 0x84 , 0x8F , 0xBF , 0xBF ] , name : 'overlong U+10FFFF - 5 bytes' } ,
1006+ { encoding : 'utf-8' , input : [ 0xFC , 0x80 , 0x84 , 0x8F , 0xBF , 0xBF ] , name : 'overlong U+10FFFF - 6 bytes' } ,
1007+
1008+ // UTF-16 surrogates encoded as code points in UTF-8
1009+ { encoding : 'utf-8' , input : [ 0xED , 0xA0 , 0x80 ] , name : 'lead surrogate' } ,
1010+ { encoding : 'utf-8' , input : [ 0xED , 0xB0 , 0x80 ] , name : 'trail surrogate' } ,
1011+ { encoding : 'utf-8' , input : [ 0xED , 0xA0 , 0x80 , 0xED , 0xB0 , 0x80 ] , name : 'surrogate pair' } ,
1012+ ] ;
1013+
1014+ for ( const test of bad ) {
1015+ it . only ( `${ test . name } ` , ( ) => {
1016+ expect ( ( ) => nodeJsByteUtils . toUTF8 ( Uint8Array . from ( test . input ) , 0 , test . input . length , true ) ) . to . throw ( BSONError ) ;
1017+ } ) ;
1018+ }
8041019} ) ;
0 commit comments