@@ -368,18 +368,6 @@ static int is_fixed(const char *s, size_t len)
368
368
return 1 ;
369
369
}
370
370
371
- static int has_null (const char * s , size_t len )
372
- {
373
- /*
374
- * regcomp cannot accept patterns with NULs so when using it
375
- * we consider any pattern containing a NUL fixed.
376
- */
377
- if (memchr (s , 0 , len ))
378
- return 1 ;
379
-
380
- return 0 ;
381
- }
382
-
383
371
#ifdef USE_LIBPCRE1
384
372
static void compile_pcre1_regexp (struct grep_pat * p , const struct grep_opt * opt )
385
373
{
@@ -388,11 +376,11 @@ static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
388
376
int options = PCRE_MULTILINE ;
389
377
390
378
if (opt -> ignore_case ) {
391
- if (has_non_ascii (p -> pattern ))
379
+ if (! opt -> ignore_locale && has_non_ascii (p -> pattern ))
392
380
p -> pcre1_tables = pcre_maketables ();
393
381
options |= PCRE_CASELESS ;
394
382
}
395
- if (is_utf8_locale () && has_non_ascii (p -> pattern ))
383
+ if (! opt -> ignore_locale && is_utf8_locale () && has_non_ascii (p -> pattern ))
396
384
options |= PCRE_UTF8 ;
397
385
398
386
p -> pcre1_regexp = pcre_compile (p -> pattern , options , & error , & erroffset ,
@@ -406,15 +394,8 @@ static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
406
394
407
395
#ifdef GIT_PCRE1_USE_JIT
408
396
pcre_config (PCRE_CONFIG_JIT , & p -> pcre1_jit_on );
409
- if (p -> pcre1_jit_on == 1 ) {
410
- p -> pcre1_jit_stack = pcre_jit_stack_alloc (1 , 1024 * 1024 );
411
- if (!p -> pcre1_jit_stack )
412
- die ("Couldn't allocate PCRE JIT stack" );
413
- pcre_assign_jit_stack (p -> pcre1_extra_info , NULL , p -> pcre1_jit_stack );
414
- } else if (p -> pcre1_jit_on != 0 ) {
415
- BUG ("The pcre1_jit_on variable should be 0 or 1, not %d" ,
416
- p -> pcre1_jit_on );
417
- }
397
+ if (opt -> debug )
398
+ fprintf (stderr , "pcre1_jit_on=%d\n" , p -> pcre1_jit_on );
418
399
#endif
419
400
}
420
401
@@ -426,18 +407,9 @@ static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
426
407
if (eflags & REG_NOTBOL )
427
408
flags |= PCRE_NOTBOL ;
428
409
429
- #ifdef GIT_PCRE1_USE_JIT
430
- if (p -> pcre1_jit_on ) {
431
- ret = pcre_jit_exec (p -> pcre1_regexp , p -> pcre1_extra_info , line ,
432
- eol - line , 0 , flags , ovector ,
433
- ARRAY_SIZE (ovector ), p -> pcre1_jit_stack );
434
- } else
435
- #endif
436
- {
437
- ret = pcre_exec (p -> pcre1_regexp , p -> pcre1_extra_info , line ,
438
- eol - line , 0 , flags , ovector ,
439
- ARRAY_SIZE (ovector ));
440
- }
410
+ ret = pcre_exec (p -> pcre1_regexp , p -> pcre1_extra_info , line ,
411
+ eol - line , 0 , flags , ovector ,
412
+ ARRAY_SIZE (ovector ));
441
413
442
414
if (ret < 0 && ret != PCRE_ERROR_NOMATCH )
443
415
die ("pcre_exec failed with error code %d" , ret );
@@ -454,14 +426,11 @@ static void free_pcre1_regexp(struct grep_pat *p)
454
426
{
455
427
pcre_free (p -> pcre1_regexp );
456
428
#ifdef GIT_PCRE1_USE_JIT
457
- if (p -> pcre1_jit_on ) {
429
+ if (p -> pcre1_jit_on )
458
430
pcre_free_study (p -> pcre1_extra_info );
459
- pcre_jit_stack_free (p -> pcre1_jit_stack );
460
- } else
431
+ else
461
432
#endif
462
- {
463
433
pcre_free (p -> pcre1_extra_info );
464
- }
465
434
pcre_free ((void * )p -> pcre1_tables );
466
435
}
467
436
#else /* !USE_LIBPCRE1 */
@@ -498,14 +467,15 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
498
467
p -> pcre2_compile_context = NULL ;
499
468
500
469
if (opt -> ignore_case ) {
501
- if (has_non_ascii (p -> pattern )) {
470
+ if (! opt -> ignore_locale && has_non_ascii (p -> pattern )) {
502
471
character_tables = pcre2_maketables (NULL );
503
472
p -> pcre2_compile_context = pcre2_compile_context_create (NULL );
504
473
pcre2_set_character_tables (p -> pcre2_compile_context , character_tables );
505
474
}
506
475
options |= PCRE2_CASELESS ;
507
476
}
508
- if (is_utf8_locale () && has_non_ascii (p -> pattern ))
477
+ if (!opt -> ignore_locale && is_utf8_locale () && has_non_ascii (p -> pattern ) &&
478
+ !(!opt -> ignore_case && (p -> fixed || p -> is_fixed )))
509
479
options |= PCRE2_UTF ;
510
480
511
481
p -> pcre2_pattern = pcre2_compile ((PCRE2_SPTR )p -> pattern ,
@@ -522,7 +492,9 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
522
492
}
523
493
524
494
pcre2_config (PCRE2_CONFIG_JIT , & p -> pcre2_jit_on );
525
- if (p -> pcre2_jit_on == 1 ) {
495
+ if (opt -> debug )
496
+ fprintf (stderr , "pcre2_jit_on=%d\n" , p -> pcre2_jit_on );
497
+ if (p -> pcre2_jit_on ) {
526
498
jitret = pcre2_jit_compile (p -> pcre2_pattern , PCRE2_JIT_COMPLETE );
527
499
if (jitret )
528
500
die ("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n" , p -> pattern , jitret );
@@ -547,19 +519,11 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
547
519
BUG ("pcre2_pattern_info() failed: %d" , patinforet );
548
520
if (jitsizearg == 0 ) {
549
521
p -> pcre2_jit_on = 0 ;
522
+ if (opt -> debug )
523
+ fprintf (stderr , "pcre2_jit_on=%d: (*NO_JIT) in regex\n" ,
524
+ p -> pcre2_jit_on );
550
525
return ;
551
526
}
552
-
553
- p -> pcre2_jit_stack = pcre2_jit_stack_create (1 , 1024 * 1024 , NULL );
554
- if (!p -> pcre2_jit_stack )
555
- die ("Couldn't allocate PCRE2 JIT stack" );
556
- p -> pcre2_match_context = pcre2_match_context_create (NULL );
557
- if (!p -> pcre2_match_context )
558
- die ("Couldn't allocate PCRE2 match context" );
559
- pcre2_jit_stack_assign (p -> pcre2_match_context , NULL , p -> pcre2_jit_stack );
560
- } else if (p -> pcre2_jit_on != 0 ) {
561
- BUG ("The pcre2_jit_on variable should be 0 or 1, not %d" ,
562
- p -> pcre2_jit_on );
563
527
}
564
528
}
565
529
@@ -603,8 +567,6 @@ static void free_pcre2_pattern(struct grep_pat *p)
603
567
pcre2_compile_context_free (p -> pcre2_compile_context );
604
568
pcre2_code_free (p -> pcre2_pattern );
605
569
pcre2_match_data_free (p -> pcre2_match_data );
606
- pcre2_jit_stack_free (p -> pcre2_jit_stack );
607
- pcre2_match_context_free (p -> pcre2_match_context );
608
570
}
609
571
#else /* !USE_LIBPCRE2 */
610
572
static void compile_pcre2_pattern (struct grep_pat * p , const struct grep_opt * opt )
@@ -626,7 +588,6 @@ static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
626
588
static void free_pcre2_pattern (struct grep_pat * p )
627
589
{
628
590
}
629
- #endif /* !USE_LIBPCRE2 */
630
591
631
592
static void compile_fixed_regexp (struct grep_pat * p , struct grep_opt * opt )
632
593
{
@@ -647,46 +608,66 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
647
608
compile_regexp_failed (p , errbuf );
648
609
}
649
610
}
611
+ #endif /* !USE_LIBPCRE2 */
650
612
651
613
static void compile_regexp (struct grep_pat * p , struct grep_opt * opt )
652
614
{
653
- int ascii_only ;
654
615
int err ;
655
616
int regflags = REG_NEWLINE ;
656
617
657
618
p -> word_regexp = opt -> word_regexp ;
658
619
p -> ignore_case = opt -> ignore_case ;
659
- ascii_only = ! has_non_ascii ( p -> pattern ) ;
620
+ p -> fixed = opt -> fixed ;
660
621
661
- /*
662
- * Even when -F (fixed) asks us to do a non-regexp search, we
663
- * may not be able to correctly case-fold when -i
664
- * (ignore-case) is asked (in which case, we'll synthesize a
665
- * regexp to match the pattern that matches regexp special
666
- * characters literally, while ignoring case differences). On
667
- * the other hand, even without -F, if the pattern does not
668
- * have any regexp special characters and there is no need for
669
- * case-folding search, we can internally turn it into a
670
- * simple string match using kws. p->fixed tells us if we
671
- * want to use kws.
672
- */
673
- if (opt -> fixed ||
674
- has_null (p -> pattern , p -> patternlen ) ||
675
- is_fixed (p -> pattern , p -> patternlen ))
676
- p -> fixed = !p -> ignore_case || ascii_only ;
677
-
678
- if (p -> fixed ) {
679
- p -> kws = kwsalloc (p -> ignore_case ? tolower_trans_tbl : NULL );
680
- kwsincr (p -> kws , p -> pattern , p -> patternlen );
681
- kwsprep (p -> kws );
682
- return ;
683
- } else if (opt -> fixed ) {
684
- /*
685
- * We come here when the pattern has the non-ascii
686
- * characters we cannot case-fold, and asked to
687
- * ignore-case.
688
- */
622
+ if (memchr (p -> pattern , 0 , p -> patternlen ) && !opt -> pcre2 )
623
+ die (_ ("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2" ));
624
+
625
+ p -> is_fixed = is_fixed (p -> pattern , p -> patternlen );
626
+ #ifdef USE_LIBPCRE2
627
+ if (!p -> fixed && !p -> is_fixed ) {
628
+ const char * no_jit = "(*NO_JIT)" ;
629
+ const int no_jit_len = strlen (no_jit );
630
+ if (starts_with (p -> pattern , no_jit ) &&
631
+ is_fixed (p -> pattern + no_jit_len ,
632
+ p -> patternlen - no_jit_len ))
633
+ p -> is_fixed = 1 ;
634
+ }
635
+ #endif
636
+ if (p -> fixed || p -> is_fixed ) {
637
+ #ifdef USE_LIBPCRE2
638
+ opt -> pcre2 = 1 ;
639
+ if (p -> is_fixed ) {
640
+ compile_pcre2_pattern (p , opt );
641
+ } else {
642
+ /*
643
+ * E.g. t7811-grep-open.sh relies on the
644
+ * pattern being restored.
645
+ */
646
+ char * old_pattern = p -> pattern ;
647
+ size_t old_patternlen = p -> patternlen ;
648
+ struct strbuf sb = STRBUF_INIT ;
649
+
650
+ /*
651
+ * There is the PCRE2_LITERAL flag, but it's
652
+ * only in PCRE v2 10.30 and later. Needing to
653
+ * ifdef our way around that and dealing with
654
+ * it + PCRE2_MULTILINE being an error is more
655
+ * complex than just quoting this ourselves.
656
+ */
657
+ strbuf_add (& sb , "\\Q" , 2 );
658
+ strbuf_add (& sb , p -> pattern , p -> patternlen );
659
+ strbuf_add (& sb , "\\E" , 2 );
660
+
661
+ p -> pattern = sb .buf ;
662
+ p -> patternlen = sb .len ;
663
+ compile_pcre2_pattern (p , opt );
664
+ p -> pattern = old_pattern ;
665
+ p -> patternlen = old_patternlen ;
666
+ strbuf_release (& sb );
667
+ }
668
+ #else /* !USE_LIBPCRE2 */
689
669
compile_fixed_regexp (p , opt );
670
+ #endif /* !USE_LIBPCRE2 */
690
671
return ;
691
672
}
692
673
@@ -1053,9 +1034,7 @@ void free_grep_patterns(struct grep_opt *opt)
1053
1034
case GREP_PATTERN : /* atom */
1054
1035
case GREP_PATTERN_HEAD :
1055
1036
case GREP_PATTERN_BODY :
1056
- if (p -> kws )
1057
- kwsfree (p -> kws );
1058
- else if (p -> pcre1_regexp )
1037
+ if (p -> pcre1_regexp )
1059
1038
free_pcre1_regexp (p );
1060
1039
else if (p -> pcre2_pattern )
1061
1040
free_pcre2_pattern (p );
@@ -1115,29 +1094,12 @@ static void show_name(struct grep_opt *opt, const char *name)
1115
1094
opt -> output (opt , opt -> null_following_name ? "\0" : "\n" , 1 );
1116
1095
}
1117
1096
1118
- static int fixmatch (struct grep_pat * p , char * line , char * eol ,
1119
- regmatch_t * match )
1120
- {
1121
- struct kwsmatch kwsm ;
1122
- size_t offset = kwsexec (p -> kws , line , eol - line , & kwsm );
1123
- if (offset == -1 ) {
1124
- match -> rm_so = match -> rm_eo = -1 ;
1125
- return REG_NOMATCH ;
1126
- } else {
1127
- match -> rm_so = offset ;
1128
- match -> rm_eo = match -> rm_so + kwsm .size [0 ];
1129
- return 0 ;
1130
- }
1131
- }
1132
-
1133
1097
static int patmatch (struct grep_pat * p , char * line , char * eol ,
1134
1098
regmatch_t * match , int eflags )
1135
1099
{
1136
1100
int hit ;
1137
1101
1138
- if (p -> fixed )
1139
- hit = !fixmatch (p , line , eol , match );
1140
- else if (p -> pcre1_regexp )
1102
+ if (p -> pcre1_regexp )
1141
1103
hit = !pcre1match (p , line , eol , match , eflags );
1142
1104
else if (p -> pcre2_pattern )
1143
1105
hit = !pcre2match (p , line , eol , match , eflags );
0 commit comments