@@ -368,18 +368,6 @@ static int is_fixed(const char *s, size_t len)
368368 return 1 ;
369369}
370370
371- static int has_null (const char * s , size_t len )
372- {
373- /*
374- * regcomp cannot accept patterns with NULs so when using it
375- * we consider any pattern containing a NUL fixed.
376- */
377- if (memchr (s , 0 , len ))
378- return 1 ;
379-
380- return 0 ;
381- }
382-
383371#ifdef USE_LIBPCRE1
384372static void compile_pcre1_regexp (struct grep_pat * p , const struct grep_opt * opt )
385373{
@@ -388,11 +376,11 @@ static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
388376 int options = PCRE_MULTILINE ;
389377
390378 if (opt -> ignore_case ) {
391- if (has_non_ascii (p -> pattern ))
379+ if (! opt -> ignore_locale && has_non_ascii (p -> pattern ))
392380 p -> pcre1_tables = pcre_maketables ();
393381 options |= PCRE_CASELESS ;
394382 }
395- if (is_utf8_locale () && has_non_ascii (p -> pattern ))
383+ if (! opt -> ignore_locale && is_utf8_locale () && has_non_ascii (p -> pattern ))
396384 options |= PCRE_UTF8 ;
397385
398386 p -> pcre1_regexp = pcre_compile (p -> pattern , options , & error , & erroffset ,
@@ -406,15 +394,8 @@ static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt *opt)
406394
407395#ifdef GIT_PCRE1_USE_JIT
408396 pcre_config (PCRE_CONFIG_JIT , & p -> pcre1_jit_on );
409- if (p -> pcre1_jit_on == 1 ) {
410- p -> pcre1_jit_stack = pcre_jit_stack_alloc (1 , 1024 * 1024 );
411- if (!p -> pcre1_jit_stack )
412- die ("Couldn't allocate PCRE JIT stack" );
413- pcre_assign_jit_stack (p -> pcre1_extra_info , NULL , p -> pcre1_jit_stack );
414- } else if (p -> pcre1_jit_on != 0 ) {
415- BUG ("The pcre1_jit_on variable should be 0 or 1, not %d" ,
416- p -> pcre1_jit_on );
417- }
397+ if (opt -> debug )
398+ fprintf (stderr , "pcre1_jit_on=%d\n" , p -> pcre1_jit_on );
418399#endif
419400}
420401
@@ -426,18 +407,9 @@ static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
426407 if (eflags & REG_NOTBOL )
427408 flags |= PCRE_NOTBOL ;
428409
429- #ifdef GIT_PCRE1_USE_JIT
430- if (p -> pcre1_jit_on ) {
431- ret = pcre_jit_exec (p -> pcre1_regexp , p -> pcre1_extra_info , line ,
432- eol - line , 0 , flags , ovector ,
433- ARRAY_SIZE (ovector ), p -> pcre1_jit_stack );
434- } else
435- #endif
436- {
437- ret = pcre_exec (p -> pcre1_regexp , p -> pcre1_extra_info , line ,
438- eol - line , 0 , flags , ovector ,
439- ARRAY_SIZE (ovector ));
440- }
410+ ret = pcre_exec (p -> pcre1_regexp , p -> pcre1_extra_info , line ,
411+ eol - line , 0 , flags , ovector ,
412+ ARRAY_SIZE (ovector ));
441413
442414 if (ret < 0 && ret != PCRE_ERROR_NOMATCH )
443415 die ("pcre_exec failed with error code %d" , ret );
@@ -454,14 +426,11 @@ static void free_pcre1_regexp(struct grep_pat *p)
454426{
455427 pcre_free (p -> pcre1_regexp );
456428#ifdef GIT_PCRE1_USE_JIT
457- if (p -> pcre1_jit_on ) {
429+ if (p -> pcre1_jit_on )
458430 pcre_free_study (p -> pcre1_extra_info );
459- pcre_jit_stack_free (p -> pcre1_jit_stack );
460- } else
431+ else
461432#endif
462- {
463433 pcre_free (p -> pcre1_extra_info );
464- }
465434 pcre_free ((void * )p -> pcre1_tables );
466435}
467436#else /* !USE_LIBPCRE1 */
@@ -498,14 +467,15 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
498467 p -> pcre2_compile_context = NULL ;
499468
500469 if (opt -> ignore_case ) {
501- if (has_non_ascii (p -> pattern )) {
470+ if (! opt -> ignore_locale && has_non_ascii (p -> pattern )) {
502471 character_tables = pcre2_maketables (NULL );
503472 p -> pcre2_compile_context = pcre2_compile_context_create (NULL );
504473 pcre2_set_character_tables (p -> pcre2_compile_context , character_tables );
505474 }
506475 options |= PCRE2_CASELESS ;
507476 }
508- if (is_utf8_locale () && has_non_ascii (p -> pattern ))
477+ if (!opt -> ignore_locale && is_utf8_locale () && has_non_ascii (p -> pattern ) &&
478+ !(!opt -> ignore_case && (p -> fixed || p -> is_fixed )))
509479 options |= PCRE2_UTF ;
510480
511481 p -> pcre2_pattern = pcre2_compile ((PCRE2_SPTR )p -> pattern ,
@@ -522,7 +492,9 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
522492 }
523493
524494 pcre2_config (PCRE2_CONFIG_JIT , & p -> pcre2_jit_on );
525- if (p -> pcre2_jit_on == 1 ) {
495+ if (opt -> debug )
496+ fprintf (stderr , "pcre2_jit_on=%d\n" , p -> pcre2_jit_on );
497+ if (p -> pcre2_jit_on ) {
526498 jitret = pcre2_jit_compile (p -> pcre2_pattern , PCRE2_JIT_COMPLETE );
527499 if (jitret )
528500 die ("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n" , p -> pattern , jitret );
@@ -547,19 +519,11 @@ static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt *opt
547519 BUG ("pcre2_pattern_info() failed: %d" , patinforet );
548520 if (jitsizearg == 0 ) {
549521 p -> pcre2_jit_on = 0 ;
522+ if (opt -> debug )
523+ fprintf (stderr , "pcre2_jit_on=%d: (*NO_JIT) in regex\n" ,
524+ p -> pcre2_jit_on );
550525 return ;
551526 }
552-
553- p -> pcre2_jit_stack = pcre2_jit_stack_create (1 , 1024 * 1024 , NULL );
554- if (!p -> pcre2_jit_stack )
555- die ("Couldn't allocate PCRE2 JIT stack" );
556- p -> pcre2_match_context = pcre2_match_context_create (NULL );
557- if (!p -> pcre2_match_context )
558- die ("Couldn't allocate PCRE2 match context" );
559- pcre2_jit_stack_assign (p -> pcre2_match_context , NULL , p -> pcre2_jit_stack );
560- } else if (p -> pcre2_jit_on != 0 ) {
561- BUG ("The pcre2_jit_on variable should be 0 or 1, not %d" ,
562- p -> pcre2_jit_on );
563527 }
564528}
565529
@@ -603,8 +567,6 @@ static void free_pcre2_pattern(struct grep_pat *p)
603567 pcre2_compile_context_free (p -> pcre2_compile_context );
604568 pcre2_code_free (p -> pcre2_pattern );
605569 pcre2_match_data_free (p -> pcre2_match_data );
606- pcre2_jit_stack_free (p -> pcre2_jit_stack );
607- pcre2_match_context_free (p -> pcre2_match_context );
608570}
609571#else /* !USE_LIBPCRE2 */
610572static void compile_pcre2_pattern (struct grep_pat * p , const struct grep_opt * opt )
@@ -626,7 +588,6 @@ static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
626588static void free_pcre2_pattern (struct grep_pat * p )
627589{
628590}
629- #endif /* !USE_LIBPCRE2 */
630591
631592static void compile_fixed_regexp (struct grep_pat * p , struct grep_opt * opt )
632593{
@@ -647,46 +608,66 @@ static void compile_fixed_regexp(struct grep_pat *p, struct grep_opt *opt)
647608 compile_regexp_failed (p , errbuf );
648609 }
649610}
611+ #endif /* !USE_LIBPCRE2 */
650612
651613static void compile_regexp (struct grep_pat * p , struct grep_opt * opt )
652614{
653- int ascii_only ;
654615 int err ;
655616 int regflags = REG_NEWLINE ;
656617
657618 p -> word_regexp = opt -> word_regexp ;
658619 p -> ignore_case = opt -> ignore_case ;
659- ascii_only = ! has_non_ascii ( p -> pattern ) ;
620+ p -> fixed = opt -> fixed ;
660621
661- /*
662- * Even when -F (fixed) asks us to do a non-regexp search, we
663- * may not be able to correctly case-fold when -i
664- * (ignore-case) is asked (in which case, we'll synthesize a
665- * regexp to match the pattern that matches regexp special
666- * characters literally, while ignoring case differences). On
667- * the other hand, even without -F, if the pattern does not
668- * have any regexp special characters and there is no need for
669- * case-folding search, we can internally turn it into a
670- * simple string match using kws. p->fixed tells us if we
671- * want to use kws.
672- */
673- if (opt -> fixed ||
674- has_null (p -> pattern , p -> patternlen ) ||
675- is_fixed (p -> pattern , p -> patternlen ))
676- p -> fixed = !p -> ignore_case || ascii_only ;
677-
678- if (p -> fixed ) {
679- p -> kws = kwsalloc (p -> ignore_case ? tolower_trans_tbl : NULL );
680- kwsincr (p -> kws , p -> pattern , p -> patternlen );
681- kwsprep (p -> kws );
682- return ;
683- } else if (opt -> fixed ) {
684- /*
685- * We come here when the pattern has the non-ascii
686- * characters we cannot case-fold, and asked to
687- * ignore-case.
688- */
622+ if (memchr (p -> pattern , 0 , p -> patternlen ) && !opt -> pcre2 )
623+ die (_ ("given pattern contains NULL byte (via -f <file>). This is only supported with -P under PCRE v2" ));
624+
625+ p -> is_fixed = is_fixed (p -> pattern , p -> patternlen );
626+ #ifdef USE_LIBPCRE2
627+ if (!p -> fixed && !p -> is_fixed ) {
628+ const char * no_jit = "(*NO_JIT)" ;
629+ const int no_jit_len = strlen (no_jit );
630+ if (starts_with (p -> pattern , no_jit ) &&
631+ is_fixed (p -> pattern + no_jit_len ,
632+ p -> patternlen - no_jit_len ))
633+ p -> is_fixed = 1 ;
634+ }
635+ #endif
636+ if (p -> fixed || p -> is_fixed ) {
637+ #ifdef USE_LIBPCRE2
638+ opt -> pcre2 = 1 ;
639+ if (p -> is_fixed ) {
640+ compile_pcre2_pattern (p , opt );
641+ } else {
642+ /*
643+ * E.g. t7811-grep-open.sh relies on the
644+ * pattern being restored.
645+ */
646+ char * old_pattern = p -> pattern ;
647+ size_t old_patternlen = p -> patternlen ;
648+ struct strbuf sb = STRBUF_INIT ;
649+
650+ /*
651+ * There is the PCRE2_LITERAL flag, but it's
652+ * only in PCRE v2 10.30 and later. Needing to
653+ * ifdef our way around that and dealing with
654+ * it + PCRE2_MULTILINE being an error is more
655+ * complex than just quoting this ourselves.
656+ */
657+ strbuf_add (& sb , "\\Q" , 2 );
658+ strbuf_add (& sb , p -> pattern , p -> patternlen );
659+ strbuf_add (& sb , "\\E" , 2 );
660+
661+ p -> pattern = sb .buf ;
662+ p -> patternlen = sb .len ;
663+ compile_pcre2_pattern (p , opt );
664+ p -> pattern = old_pattern ;
665+ p -> patternlen = old_patternlen ;
666+ strbuf_release (& sb );
667+ }
668+ #else /* !USE_LIBPCRE2 */
689669 compile_fixed_regexp (p , opt );
670+ #endif /* !USE_LIBPCRE2 */
690671 return ;
691672 }
692673
@@ -1053,9 +1034,7 @@ void free_grep_patterns(struct grep_opt *opt)
10531034 case GREP_PATTERN : /* atom */
10541035 case GREP_PATTERN_HEAD :
10551036 case GREP_PATTERN_BODY :
1056- if (p -> kws )
1057- kwsfree (p -> kws );
1058- else if (p -> pcre1_regexp )
1037+ if (p -> pcre1_regexp )
10591038 free_pcre1_regexp (p );
10601039 else if (p -> pcre2_pattern )
10611040 free_pcre2_pattern (p );
@@ -1115,29 +1094,12 @@ static void show_name(struct grep_opt *opt, const char *name)
11151094 opt -> output (opt , opt -> null_following_name ? "\0" : "\n" , 1 );
11161095}
11171096
1118- static int fixmatch (struct grep_pat * p , char * line , char * eol ,
1119- regmatch_t * match )
1120- {
1121- struct kwsmatch kwsm ;
1122- size_t offset = kwsexec (p -> kws , line , eol - line , & kwsm );
1123- if (offset == -1 ) {
1124- match -> rm_so = match -> rm_eo = -1 ;
1125- return REG_NOMATCH ;
1126- } else {
1127- match -> rm_so = offset ;
1128- match -> rm_eo = match -> rm_so + kwsm .size [0 ];
1129- return 0 ;
1130- }
1131- }
1132-
11331097static int patmatch (struct grep_pat * p , char * line , char * eol ,
11341098 regmatch_t * match , int eflags )
11351099{
11361100 int hit ;
11371101
1138- if (p -> fixed )
1139- hit = !fixmatch (p , line , eol , match );
1140- else if (p -> pcre1_regexp )
1102+ if (p -> pcre1_regexp )
11411103 hit = !pcre1match (p , line , eol , match , eflags );
11421104 else if (p -> pcre2_pattern )
11431105 hit = !pcre2match (p , line , eol , match , eflags );
0 commit comments