@@ -364,91 +364,9 @@ pub trait DoubleEndedSearcher<'a>: ReverseSearcher<'a> {}
364364// Impl for char
365365/////////////////////////////////////////////////////////////////////////////
366366
367- #[ derive( Clone , Debug ) ]
368- /// hah
369- pub struct AsciiCharSearcher < ' a > {
370- haystack : & ' a str ,
371- needle : u8 ,
372- finger : usize ,
373- finger_back : usize ,
374- }
375-
376- unsafe impl < ' a > Searcher < ' a > for AsciiCharSearcher < ' a > {
377- fn haystack ( & self ) -> & ' a str {
378- self . haystack
379- }
380-
381- #[ inline]
382- fn next ( & mut self ) -> SearchStep {
383- let byte = self . haystack . as_bytes ( ) . get ( self . finger ) ;
384- if let Some ( & byte) = byte {
385- self . finger += 1 ;
386- if byte == self . needle {
387- SearchStep :: Match ( self . finger - 1 , self . finger )
388- } else {
389- SearchStep :: Reject ( self . finger - 1 , self . finger )
390- }
391- } else {
392- SearchStep :: Done
393- }
394- }
395-
396- #[ inline( always) ]
397- fn next_match ( & mut self ) -> Option < ( usize , usize ) > {
398- match unsafe { self . haystack . as_bytes ( ) . get_unchecked ( self . finger ..self . finger_back ) }
399- . iter ( )
400- . position ( |x| * x == self . needle )
401- {
402- Some ( x) => {
403- self . finger += x + 1 ;
404- Some ( ( self . finger - 1 , self . finger ) )
405- }
406- None => None ,
407- }
408- }
409-
410- // let next_reject use the default implementation from the Searcher trait
411- }
412-
413- unsafe impl < ' a > ReverseSearcher < ' a > for AsciiCharSearcher < ' a > {
414- #[ inline]
415- fn next_back ( & mut self ) -> SearchStep {
416- let old_finger = self . finger_back ;
417- let slice = unsafe { self . haystack . get_unchecked ( self . finger ..old_finger) } ;
418-
419- let mut iter = slice. as_bytes ( ) . iter ( ) ;
420- let old_len = iter. len ( ) ;
421- if let Some ( ch) = iter. next_back ( ) {
422- self . finger_back -= old_len - iter. len ( ) ;
423- if * ch == self . needle {
424- SearchStep :: Match ( self . finger_back , old_finger)
425- } else {
426- SearchStep :: Reject ( self . finger_back , old_finger)
427- }
428- } else {
429- SearchStep :: Done
430- }
431- }
432-
433- #[ inline]
434- fn next_match_back ( & mut self ) -> Option < ( usize , usize ) > {
435- match memchr:: memrchr ( self . needle , self . haystack [ self . finger ..self . finger_back ] . as_bytes ( ) )
436- {
437- Some ( x) => {
438- let index = self . finger + x;
439- self . finger_back = index;
440- Some ( ( self . finger_back , self . finger_back + 1 ) )
441- }
442- None => None ,
443- }
444- }
445-
446- // let next_reject use the default implementation from the Searcher trait
447- }
448-
449367/// Associated type for `<char as Pattern>::Searcher<'a>`.
450368#[ derive( Clone , Debug ) ]
451- pub struct UnicodeCharSearcher < ' a > {
369+ pub struct CharSearcher < ' a > {
452370 haystack : & ' a str ,
453371 // safety invariant: `finger`/`finger_back` must be a valid utf8 byte index of `haystack`
454372 // This invariant can be broken *within* next_match and next_match_back, however
@@ -471,15 +389,17 @@ pub struct UnicodeCharSearcher<'a> {
471389 utf8_size : u8 ,
472390 /// A utf8 encoded copy of the `needle`
473391 utf8_encoded : [ u8 ; 4 ] ,
392+ /// yhm
393+ ascii : bool ,
474394}
475395
476- impl UnicodeCharSearcher < ' _ > {
396+ impl CharSearcher < ' _ > {
477397 fn utf8_size ( & self ) -> usize {
478398 self . utf8_size . into ( )
479399 }
480400}
481401
482- unsafe impl < ' a > Searcher < ' a > for UnicodeCharSearcher < ' a > {
402+ unsafe impl < ' a > Searcher < ' a > for CharSearcher < ' a > {
483403 #[ inline]
484404 fn haystack ( & self ) -> & ' a str {
485405 self . haystack
@@ -511,8 +431,23 @@ unsafe impl<'a> Searcher<'a> for UnicodeCharSearcher<'a> {
511431 SearchStep :: Done
512432 }
513433 }
514- #[ inline]
434+ #[ inline( always ) ]
515435 fn next_match ( & mut self ) -> Option < ( usize , usize ) > {
436+ if self . ascii {
437+ // SAFETY: invariant
438+ return match unsafe {
439+ self . haystack . as_bytes ( ) . get_unchecked ( self . finger ..self . finger_back )
440+ }
441+ . iter ( )
442+ . position ( |x| * x == self . utf8_encoded [ 0 ] )
443+ {
444+ Some ( x) => {
445+ self . finger += x + 1 ;
446+ Some ( ( self . finger - 1 , self . finger ) )
447+ }
448+ None => None ,
449+ } ;
450+ }
516451 loop {
517452 // get the haystack after the last character found
518453 let bytes = self . haystack . as_bytes ( ) . get ( self . finger ..self . finger_back ) ?;
@@ -532,7 +467,7 @@ unsafe impl<'a> Searcher<'a> for UnicodeCharSearcher<'a> {
532467 //
533468 // However, this is totally okay. While we have the invariant that
534469 // self.finger is on a UTF8 boundary, this invariant is not relied upon
535- // within this method (it is relied upon in UnicodeCharSearcher ::next()).
470+ // within this method (it is relied upon in CharSearcher ::next()).
536471 //
537472 // We only exit this method when we reach the end of the string, or if we
538473 // find something. When we find something the `finger` will be set
@@ -557,7 +492,7 @@ unsafe impl<'a> Searcher<'a> for UnicodeCharSearcher<'a> {
557492 // let next_reject use the default implementation from the Searcher trait
558493}
559494
560- unsafe impl < ' a > ReverseSearcher < ' a > for UnicodeCharSearcher < ' a > {
495+ unsafe impl < ' a > ReverseSearcher < ' a > for CharSearcher < ' a > {
561496 #[ inline]
562497 fn next_back ( & mut self ) -> SearchStep {
563498 let old_finger = self . finger_back ;
@@ -580,6 +515,20 @@ unsafe impl<'a> ReverseSearcher<'a> for UnicodeCharSearcher<'a> {
580515 }
581516 #[ inline]
582517 fn next_match_back ( & mut self ) -> Option < ( usize , usize ) > {
518+ if self . ascii {
519+ // SAFETY: invariant
520+ return match memchr:: memrchr (
521+ self . utf8_encoded [ 0 ] ,
522+ self . haystack [ self . finger ..self . finger_back ] . as_bytes ( ) ,
523+ ) {
524+ Some ( x) => {
525+ let index = self . finger + x;
526+ self . finger_back = index;
527+ Some ( ( self . finger_back , self . finger_back + 1 ) )
528+ }
529+ None => None ,
530+ } ;
531+ }
583532 let haystack = self . haystack . as_bytes ( ) ;
584533 loop {
585534 // get the haystack up to but not including the last character searched
@@ -632,57 +581,6 @@ unsafe impl<'a> ReverseSearcher<'a> for UnicodeCharSearcher<'a> {
632581}
633582
634583impl < ' a > DoubleEndedSearcher < ' a > for CharSearcher < ' a > { }
635- #[ derive( Clone , Debug ) ]
636- ///h
637- pub enum CharSearcher < ' a > {
638- ///h
639- AsciiCharSearcher ( AsciiCharSearcher < ' a > ) ,
640- ///h
641- UnicodeCharSearcher ( UnicodeCharSearcher < ' a > ) ,
642- }
643- unsafe impl < ' a > Searcher < ' a > for CharSearcher < ' a > {
644- #[ inline]
645-
646- fn haystack ( & self ) -> & ' a str {
647- let ( Self :: UnicodeCharSearcher ( UnicodeCharSearcher { haystack, .. } )
648- | Self :: AsciiCharSearcher ( AsciiCharSearcher { haystack, .. } ) ) = self ;
649- haystack
650- }
651- #[ inline( always) ]
652-
653- fn next_match ( & mut self ) -> Option < ( usize , usize ) > {
654- match self {
655- CharSearcher :: AsciiCharSearcher ( x) => x. next_match ( ) ,
656- CharSearcher :: UnicodeCharSearcher ( x) => x. next_match ( ) ,
657- }
658- }
659- #[ inline]
660-
661- fn next ( & mut self ) -> SearchStep {
662- match self {
663- CharSearcher :: AsciiCharSearcher ( x) => x. next ( ) ,
664- CharSearcher :: UnicodeCharSearcher ( x) => x. next ( ) ,
665- }
666- }
667- }
668- unsafe impl < ' a > ReverseSearcher < ' a > for CharSearcher < ' a > {
669- #[ inline]
670-
671- fn next_back ( & mut self ) -> SearchStep {
672- match self {
673- CharSearcher :: AsciiCharSearcher ( x) => x. next_back ( ) ,
674- CharSearcher :: UnicodeCharSearcher ( x) => x. next_back ( ) ,
675- }
676- }
677- #[ inline]
678-
679- fn next_match_back ( & mut self ) -> Option < ( usize , usize ) > {
680- match self {
681- CharSearcher :: AsciiCharSearcher ( x) => x. next_match_back ( ) ,
682- CharSearcher :: UnicodeCharSearcher ( x) => x. next_match_back ( ) ,
683- }
684- }
685- }
686584
687585/// Searches for chars that are equal to a given [`char`].
688586///
@@ -696,31 +594,21 @@ impl Pattern for char {
696594
697595 #[ inline]
698596 fn into_searcher < ' a > ( self , haystack : & ' a str ) -> Self :: Searcher < ' a > {
699- if ( self as u32 ) < 128 { }
700597 let mut utf8_encoded = [ 0 ; MAX_LEN_UTF8 ] ;
701598 let utf8_size = self
702599 . encode_utf8 ( & mut utf8_encoded)
703600 . len ( )
704601 . try_into ( )
705602 . expect ( "char len should be less than 255" ) ;
706- if utf8_size == 1 {
707- CharSearcher :: AsciiCharSearcher ( AsciiCharSearcher {
708- haystack,
709- needle : utf8_encoded[ 0 ] ,
710- finger : 0 ,
711- finger_back : haystack. len ( ) ,
712- // available: None,
713- // available_back: None,
714- } )
715- } else {
716- CharSearcher :: UnicodeCharSearcher ( UnicodeCharSearcher {
717- haystack,
718- finger : 0 ,
719- finger_back : haystack. len ( ) ,
720- needle : self ,
721- utf8_size,
722- utf8_encoded,
723- } )
603+
604+ CharSearcher {
605+ haystack,
606+ finger : 0 ,
607+ finger_back : haystack. len ( ) ,
608+ needle : self ,
609+ utf8_size,
610+ utf8_encoded,
611+ ascii : utf8_size == 1 ,
724612 }
725613 }
726614
0 commit comments