diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index 4d31cf5715342c..7f010f3fdd62e5 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -894,7 +894,9 @@ private RegexNode ReduceLoops() // If the Loop or Lazyloop now only has one child node and its a Set, One, or Notone, // reduce to just Setloop/lazy, Oneloop/lazy, or Notoneloop/lazy. The parser will // generally have only produced the latter, but other reductions could have exposed - // this. + // this. We can also reduce or eliminate certain loops that are nops, e.g. + // a loop with a minimum of 0 that wraps a zero-width assertion is either asserting something + // or not, and is thus useless. if (u.ChildCount() == 1) { RegexNode child = u.Child(0); @@ -906,6 +908,17 @@ private RegexNode ReduceLoops() child.MakeRep(u.Kind == RegexNodeKind.Lazyloop ? RegexNodeKind.Onelazy : RegexNodeKind.Oneloop, u.M, u.N); u = child; break; + + case RegexNodeKind.Empty: + case RegexNodeKind.PositiveLookaround or RegexNodeKind.NegativeLookaround or + RegexNodeKind.Beginning or RegexNodeKind.Start or + RegexNodeKind.Bol or RegexNodeKind.Eol or + RegexNodeKind.End or RegexNodeKind.EndZ or + RegexNodeKind.Boundary or RegexNodeKind.ECMABoundary or + RegexNodeKind.NonBoundary or RegexNodeKind.NonECMABoundary + when u.M == 0: + u = new RegexNode(RegexNodeKind.Empty, Options); + break; } } diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs index c1030f86b11f1a..8501700b31c180 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexFindOptimizationsTests.cs @@ -43,9 +43,8 @@ public class RegexFindOptimizationsTests [InlineData(@"(?=\b)^abc", 0, (int)FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Beginning)] [InlineData(@"(?=\b)(?=^.*$)abc", 0, (int)FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Beginning)] [InlineData(@"(?=\b)(?=\B)^abc", 0, (int)FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Beginning)] - // The next two could be improved slightly to be LeadingString_LeftToRight. - [InlineData(@"(?=^.*def)?abc", 0, (int)FindNextStartingPositionMode.FixedDistanceChar_LeftToRight)] - [InlineData(@"(?=^)?(?=^)abc", 0, (int)FindNextStartingPositionMode.FixedDistanceChar_LeftToRight)] + [InlineData(@"(?=^.*def)abc", 0, (int)FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Beginning)] + [InlineData(@"(?=^)(?=^)abc", 0, (int)FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Beginning)] [InlineData(@"^", (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingAnchor_RightToLeft_Beginning)] [InlineData(@"hello^", (int)RegexOptions.RightToLeft, (int)FindNextStartingPositionMode.LeadingAnchor_RightToLeft_Beginning)] diff --git a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs index 0211065fbb8051..e5a6ff8e89ea2e 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/UnitTests/RegexReductionTests.cs @@ -244,6 +244,19 @@ public class RegexReductionTests // Large loop patterns [InlineData("a*a*a*a*a*a*a*b*b*?a+a*", "a*b*b*?a+")] [InlineData("a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?a?aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "a{0,30}aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")] + // Nop loops + [InlineData("(?:)*", "")] + [InlineData("a(?=abc)*b", "ab")] + [InlineData("a(?<=abc)*b", "ab")] + [InlineData("a(?a*)(?(xyz)bcd)")] // Different prefixes on alternation branches [InlineData("^abcd|$abce", "^abcd|^abce")] + // Zero-width assertions in non-removable loops + [InlineData("a(?=abc)+b", "ab")] + [InlineData("a(?<=abc)+b", "ab")] + [InlineData("a(?