Skip to content

Commit b2428ca

Browse files
committed
MFC r317665: bsdgrep: fix -w -v matching improperly with certain patterns
-w and -v flag matching was mostly functional but had some minor problems: 1. -w flag processing only allowed one iteration through pattern matching on a line. This was problematic if one pattern could match more than once, or if there were multiple patterns and the earliest/ longest match was not the most ideal, and 2. Previous work "fixed" things to not further process a line if the first iteration through patterns produced no matches. This is clearly wrong if we're dealing with the more restrictive -w matching. #2 breakage could have also occurred before recent broad rewrites, but it would be more arbitrary based on input patterns as to whether or not it actually affected things. Fix both of these by forcing a retry of the patterns after advancing just past the start of the first match if we're doing more restrictive -w matching and we didn't get any hits to start with. Also move -v flag processing outside of the loop so that we have a greater change to match in the more restrictive cases. This wasn't strictly wrong, but it could be a little more error prone. While here, introduce some regressions tests for this behavior and fix some excessive wrapping nearby that hindered readability. GNU grep passes these new tests. PR: 218467, 218811 Approved by: emaste (mentor, blanket MFC)
1 parent db6ebb6 commit b2428ca

File tree

2 files changed

+48
-3
lines changed

2 files changed

+48
-3
lines changed

contrib/netbsd-tests/usr.bin/grep/t_grep.sh

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,12 @@ word_regexps_body()
9393
{
9494
atf_check -o file:"$(atf_get_srcdir)/d_word_regexps.out" \
9595
grep -w separated $(atf_get_srcdir)/d_input
96+
97+
# Begin FreeBSD
98+
printf "xmatch pmatch\n" > test1
99+
100+
atf_check -o inline:"pmatch\n" grep -Eow "(match )?pmatch" test1
101+
# End FreeBSD
96102
}
97103

98104
atf_test_case begin_end
@@ -439,6 +445,23 @@ grep_sanity_body()
439445

440446
atf_check -o inline:"M\n" grep -o -e "M\{1\}" test2
441447
}
448+
449+
atf_test_case wv_combo_break
450+
wv_combo_break_head()
451+
{
452+
atf_set "descr" "Check for incorrectly matching lines with both -w and -v flags (PR 218467)"
453+
}
454+
wv_combo_break_body()
455+
{
456+
printf "x xx\n" > test1
457+
printf "xx x\n" > test2
458+
459+
atf_check -o file:test1 grep -w "x" test1
460+
atf_check -o file:test2 grep -w "x" test2
461+
462+
atf_check -s exit:1 grep -v -w "x" test1
463+
atf_check -s exit:1 grep -v -w "x" test2
464+
}
442465
# End FreeBSD
443466

444467
atf_init_test_cases()
@@ -467,6 +490,7 @@ atf_init_test_cases()
467490
atf_add_test_case escmap
468491
atf_add_test_case egrep_empty_invalid
469492
atf_add_test_case zerolen
493+
atf_add_test_case wv_combo_break
470494
atf_add_test_case fgrep_sanity
471495
atf_add_test_case egrep_sanity
472496
atf_add_test_case grep_sanity

usr.bin/grep/util.c

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@ procline(struct str *l, int nottext)
305305
unsigned int i;
306306
int c = 0, m = 0, r = 0, lastmatches = 0, leflags = eflags;
307307
int startm = 0;
308+
int retry;
308309

309310
/* Initialize to avoid a false positive warning from GCC. */
310311
lastmatch.rm_so = lastmatch.rm_eo = 0;
@@ -313,6 +314,7 @@ procline(struct str *l, int nottext)
313314
while (st <= l->len) {
314315
lastmatches = 0;
315316
startm = m;
317+
retry = 0;
316318
if (st > 0)
317319
leflags |= REG_NOTBOL;
318320
/* Loop to compare with all the patterns */
@@ -356,6 +358,17 @@ procline(struct str *l, int nottext)
356358
else if (iswword(wbegin) ||
357359
iswword(wend))
358360
r = REG_NOMATCH;
361+
/*
362+
* If we're doing whole word matching and we
363+
* matched once, then we should try the pattern
364+
* again after advancing just past the start of
365+
* the earliest match. This allows the pattern
366+
* to match later on in the line and possibly
367+
* still match a whole word.
368+
*/
369+
if (r == REG_NOMATCH &&
370+
(retry == 0 || pmatch.rm_so + 1 < retry))
371+
retry = pmatch.rm_so + 1;
359372
}
360373
if (r == 0) {
361374
lastmatches++;
@@ -385,9 +398,14 @@ procline(struct str *l, int nottext)
385398
}
386399
}
387400

388-
if (vflag) {
389-
c = !c;
390-
break;
401+
/*
402+
* Advance to just past the start of the earliest match, try
403+
* again just in case we still have a chance to match later in
404+
* the string.
405+
*/
406+
if (lastmatches == 0 && retry > 0) {
407+
st = retry;
408+
continue;
391409
}
392410

393411
/* One pass if we are not recording matches */
@@ -410,6 +428,9 @@ procline(struct str *l, int nottext)
410428
}
411429

412430

431+
if (vflag)
432+
c = !c;
433+
413434
/* Count the matches if we have a match limit */
414435
if (mflag)
415436
mcount -= c;

0 commit comments

Comments
 (0)