@@ -429,42 +429,6 @@ const mbfl_encoding *mbfl_identify_encoding(mbfl_string *string, const mbfl_enco
429
429
return enc ;
430
430
}
431
431
432
- /*
433
- * strlen
434
- */
435
- size_t mbfl_strlen (const mbfl_string * string )
436
- {
437
- size_t len = 0 ;
438
- const mbfl_encoding * encoding = string -> encoding ;
439
-
440
- if (encoding -> flag & MBFL_ENCTYPE_SBCS ) {
441
- len = string -> len ;
442
- } else if (encoding -> flag & MBFL_ENCTYPE_WCS2 ) {
443
- len = string -> len /2 ;
444
- } else if (encoding -> flag & MBFL_ENCTYPE_WCS4 ) {
445
- len = string -> len /4 ;
446
- } else if (encoding -> mblen_table ) {
447
- const unsigned char * mbtab = encoding -> mblen_table ;
448
- unsigned char * p = string -> val , * e = p + string -> len ;
449
- while (p < e ) {
450
- p += mbtab [* p ];
451
- len ++ ;
452
- }
453
- } else {
454
- uint32_t wchar_buf [128 ];
455
- unsigned char * in = string -> val ;
456
- size_t in_len = string -> len ;
457
- unsigned int state = 0 ;
458
-
459
- while (in_len ) {
460
- len += encoding -> to_wchar (& in , & in_len , wchar_buf , 128 , & state );
461
- }
462
- }
463
-
464
- return len ;
465
- }
466
-
467
-
468
432
/*
469
433
* strpos
470
434
*/
@@ -528,136 +492,6 @@ collector_strpos(int c, void* data)
528
492
return 0 ;
529
493
}
530
494
531
- static const unsigned char * mbfl_find_offset_utf8 (
532
- const unsigned char * str , const unsigned char * end , ssize_t offset ) {
533
- if (offset < 0 ) {
534
- const unsigned char * pos = end ;
535
- while (offset < 0 ) {
536
- if (pos <= str ) {
537
- return NULL ;
538
- }
539
-
540
- unsigned char c = * (-- pos );
541
- if (c < 0x80 ) {
542
- ++ offset ;
543
- } else if ((c & 0xc0 ) != 0x80 ) {
544
- ++ offset ;
545
- }
546
- }
547
- return pos ;
548
- } else {
549
- const unsigned char * u8_tbl = mbfl_encoding_utf8 .mblen_table ;
550
- const unsigned char * pos = str ;
551
- while (offset -- > 0 ) {
552
- if (pos >= end ) {
553
- return NULL ;
554
- }
555
- pos += u8_tbl [* pos ];
556
- }
557
- return pos ;
558
- }
559
- }
560
-
561
- static size_t mbfl_pointer_to_offset_utf8 (const unsigned char * start , const unsigned char * pos ) {
562
- size_t result = 0 ;
563
- while (pos > start ) {
564
- unsigned char c = * -- pos ;
565
- if (c < 0x80 ) {
566
- ++ result ;
567
- } else if ((c & 0xc0 ) != 0x80 ) {
568
- ++ result ;
569
- }
570
- }
571
- return result ;
572
- }
573
-
574
- size_t
575
- mbfl_strpos (
576
- mbfl_string * haystack ,
577
- mbfl_string * needle ,
578
- ssize_t offset ,
579
- int reverse )
580
- {
581
- size_t result ;
582
- mbfl_string _haystack_u8 , _needle_u8 ;
583
- const mbfl_string * haystack_u8 , * needle_u8 = NULL ;
584
- const unsigned char * offset_pointer ;
585
-
586
- if (haystack -> encoding -> no_encoding != mbfl_no_encoding_utf8 ) {
587
- mbfl_string_init (& _haystack_u8 );
588
- haystack_u8 = mbfl_convert_encoding (haystack , & _haystack_u8 , & mbfl_encoding_utf8 );
589
- if (haystack_u8 == NULL ) {
590
- result = MBFL_ERROR_ENCODING ;
591
- goto out ;
592
- }
593
- } else {
594
- haystack_u8 = haystack ;
595
- }
596
-
597
- if (needle -> encoding -> no_encoding != mbfl_no_encoding_utf8 ) {
598
- mbfl_string_init (& _needle_u8 );
599
- needle_u8 = mbfl_convert_encoding (needle , & _needle_u8 , & mbfl_encoding_utf8 );
600
- if (needle_u8 == NULL ) {
601
- result = MBFL_ERROR_ENCODING ;
602
- goto out ;
603
- }
604
- } else {
605
- needle_u8 = needle ;
606
- }
607
-
608
- offset_pointer = mbfl_find_offset_utf8 (
609
- haystack_u8 -> val , haystack_u8 -> val + haystack_u8 -> len , offset );
610
- if (!offset_pointer ) {
611
- result = MBFL_ERROR_OFFSET ;
612
- goto out ;
613
- }
614
-
615
- result = MBFL_ERROR_NOT_FOUND ;
616
- if (haystack_u8 -> len < needle_u8 -> len ) {
617
- goto out ;
618
- }
619
-
620
- const char * found_pos ;
621
- if (!reverse ) {
622
- found_pos = zend_memnstr (
623
- (const char * ) offset_pointer ,
624
- (const char * ) needle_u8 -> val , needle_u8 -> len ,
625
- (const char * ) haystack_u8 -> val + haystack_u8 -> len );
626
- } else {
627
- if (offset >= 0 ) {
628
- found_pos = zend_memnrstr (
629
- (const char * ) offset_pointer ,
630
- (const char * ) needle_u8 -> val , needle_u8 -> len ,
631
- (const char * ) haystack_u8 -> val + haystack_u8 -> len );
632
- } else {
633
- size_t needle_len = mbfl_strlen (needle_u8 );
634
- offset_pointer = mbfl_find_offset_utf8 (
635
- offset_pointer , haystack_u8 -> val + haystack_u8 -> len , needle_len );
636
- if (!offset_pointer ) {
637
- offset_pointer = haystack_u8 -> val + haystack_u8 -> len ;
638
- }
639
-
640
- found_pos = zend_memnrstr (
641
- (const char * ) haystack_u8 -> val ,
642
- (const char * ) needle_u8 -> val , needle_u8 -> len ,
643
- (const char * ) offset_pointer );
644
- }
645
- }
646
-
647
- if (found_pos ) {
648
- result = mbfl_pointer_to_offset_utf8 (haystack_u8 -> val , (const unsigned char * ) found_pos );
649
- }
650
-
651
- out :
652
- if (haystack_u8 == & _haystack_u8 ) {
653
- mbfl_string_clear (& _haystack_u8 );
654
- }
655
- if (needle_u8 == & _needle_u8 ) {
656
- mbfl_string_clear (& _needle_u8 );
657
- }
658
- return result ;
659
- }
660
-
661
495
/*
662
496
* substr_count
663
497
*/
@@ -727,176 +561,6 @@ mbfl_substr_count(
727
561
return result ;
728
562
}
729
563
730
- /*
731
- * substr
732
- */
733
- struct collector_substr_data {
734
- mbfl_convert_filter * next_filter ;
735
- size_t start ;
736
- size_t stop ;
737
- size_t output ;
738
- };
739
-
740
- static int
741
- collector_substr (int c , void * data )
742
- {
743
- struct collector_substr_data * pc = (struct collector_substr_data * )data ;
744
-
745
- if (pc -> output >= pc -> stop ) {
746
- return -1 ;
747
- }
748
-
749
- if (pc -> output >= pc -> start ) {
750
- (* pc -> next_filter -> filter_function )(c , pc -> next_filter );
751
- }
752
-
753
- pc -> output ++ ;
754
-
755
- return 0 ;
756
- }
757
-
758
- mbfl_string *
759
- mbfl_substr (
760
- mbfl_string * string ,
761
- mbfl_string * result ,
762
- size_t from ,
763
- size_t length )
764
- {
765
- const mbfl_encoding * encoding = string -> encoding ;
766
- size_t n , k , len , start , end ;
767
- unsigned m ;
768
- unsigned char * p , * w ;
769
-
770
- mbfl_string_init (result );
771
- result -> encoding = string -> encoding ;
772
-
773
- if ((encoding -> flag & (MBFL_ENCTYPE_SBCS | MBFL_ENCTYPE_WCS2 | MBFL_ENCTYPE_WCS4 )) ||
774
- encoding -> mblen_table != NULL ) {
775
- len = string -> len ;
776
- if (encoding -> flag & MBFL_ENCTYPE_SBCS ) {
777
- start = from ;
778
- } else if (encoding -> flag & MBFL_ENCTYPE_WCS2 ) {
779
- start = from * 2 ;
780
- } else if (encoding -> flag & MBFL_ENCTYPE_WCS4 ) {
781
- start = from * 4 ;
782
- } else {
783
- const unsigned char * mbtab = encoding -> mblen_table ;
784
- start = 0 ;
785
- n = 0 ;
786
- k = 0 ;
787
- p = string -> val ;
788
- /* search start position */
789
- while (k <= from ) {
790
- start = n ;
791
- if (n >= len ) {
792
- break ;
793
- }
794
- m = mbtab [* p ];
795
- n += m ;
796
- p += m ;
797
- k ++ ;
798
- }
799
- }
800
-
801
- if (length == MBFL_SUBSTR_UNTIL_END ) {
802
- end = len ;
803
- } else if (encoding -> flag & MBFL_ENCTYPE_SBCS ) {
804
- end = start + length ;
805
- } else if (encoding -> flag & MBFL_ENCTYPE_WCS2 ) {
806
- end = start + length * 2 ;
807
- } else if (encoding -> flag & MBFL_ENCTYPE_WCS4 ) {
808
- end = start + length * 4 ;
809
- } else {
810
- const unsigned char * mbtab = encoding -> mblen_table ;
811
- end = start ;
812
- n = start ;
813
- k = 0 ;
814
- p = string -> val + start ;
815
- /* detect end position */
816
- while (k <= length ) {
817
- end = n ;
818
- if (n >= len ) {
819
- break ;
820
- }
821
- m = mbtab [* p ];
822
- n += m ;
823
- p += m ;
824
- k ++ ;
825
- }
826
- }
827
-
828
- if (start > len ) {
829
- start = len ;
830
- }
831
- if (end > len ) {
832
- end = len ;
833
- }
834
- if (start > end ) {
835
- start = end ;
836
- }
837
-
838
- /* allocate memory and copy */
839
- n = end - start ;
840
- result -> len = 0 ;
841
- result -> val = w = (unsigned char * )emalloc (n + 1 );
842
- result -> len = n ;
843
- memcpy (w , string -> val + start , n );
844
- w [n ] = '\0' ;
845
- } else {
846
- mbfl_memory_device device ;
847
- struct collector_substr_data pc ;
848
- mbfl_convert_filter * decoder ;
849
- mbfl_convert_filter * encoder ;
850
-
851
- if (length == MBFL_SUBSTR_UNTIL_END ) {
852
- length = mbfl_strlen (string ) - from ;
853
- }
854
-
855
- mbfl_memory_device_init (& device , length + 1 , 0 );
856
- mbfl_string_init (result );
857
- result -> encoding = string -> encoding ;
858
- /* output code filter */
859
- decoder = mbfl_convert_filter_new (
860
- & mbfl_encoding_wchar ,
861
- string -> encoding ,
862
- mbfl_memory_device_output , 0 , & device );
863
- /* wchar filter */
864
- encoder = mbfl_convert_filter_new (
865
- string -> encoding ,
866
- & mbfl_encoding_wchar ,
867
- collector_substr , 0 , & pc );
868
- if (decoder == NULL || encoder == NULL ) {
869
- mbfl_convert_filter_delete (encoder );
870
- mbfl_convert_filter_delete (decoder );
871
- return NULL ;
872
- }
873
- pc .next_filter = decoder ;
874
- pc .start = from ;
875
- pc .stop = from + length ;
876
- pc .output = 0 ;
877
-
878
- /* feed data */
879
- p = string -> val ;
880
- n = string -> len ;
881
- if (p != NULL ) {
882
- while (n > 0 ) {
883
- if ((* encoder -> filter_function )(* p ++ , encoder ) < 0 ) {
884
- break ;
885
- }
886
- n -- ;
887
- }
888
- }
889
-
890
- mbfl_convert_filter_flush (encoder );
891
- mbfl_convert_filter_flush (decoder );
892
- result = mbfl_memory_device_result (& device , result );
893
- mbfl_convert_filter_delete (encoder );
894
- mbfl_convert_filter_delete (decoder );
895
- }
896
-
897
- return result ;
898
- }
899
-
900
564
/*
901
565
* strcut
902
566
*/
0 commit comments