From 3635df2728db717a3cc4fc8f4f2d05a5180a1d9b Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Mon, 5 Aug 2024 11:48:28 +0200 Subject: [PATCH 01/11] RegexGroupParser: Ignore whitespaces on "x"-modified patterns --- src/Type/Php/RegexGroupParser.php | 45 +++++++++++++++---- .../Analyser/nsrt/preg_match_shapes.php | 11 +++++ 2 files changed, 47 insertions(+), 9 deletions(-) diff --git a/src/Type/Php/RegexGroupParser.php b/src/Type/Php/RegexGroupParser.php index 98a0c92c8f..488c34c23b 100644 --- a/src/Type/Php/RegexGroupParser.php +++ b/src/Type/Php/RegexGroupParser.php @@ -30,6 +30,7 @@ use function str_replace; use function strlen; use function substr; +use function trim; final class RegexGroupParser { @@ -126,7 +127,11 @@ private function walkRegexAst( $inAlternation ? $alternationId : null, $inOptionalQuantification, $parentGroup, - $this->createGroupType($ast, $this->allowConstantTypes($patternModifiers, $repeatedMoreThanOnce, $parentGroup)), + $this->createGroupType( + $ast, + $this->allowConstantTypes($patternModifiers, $repeatedMoreThanOnce, $parentGroup), + $patternModifiers, + ), ); $parentGroup = $group; } elseif ($ast->getId() === '#namedcapturing') { @@ -137,7 +142,11 @@ private function walkRegexAst( $inAlternation ? $alternationId : null, $inOptionalQuantification, $parentGroup, - $this->createGroupType($ast, $this->allowConstantTypes($patternModifiers, $repeatedMoreThanOnce, $parentGroup)), + $this->createGroupType( + $ast, + $this->allowConstantTypes($patternModifiers, $repeatedMoreThanOnce, $parentGroup), + $patternModifiers, + ), ); $parentGroup = $group; } elseif ($ast->getId() === '#noncapturing') { @@ -293,7 +302,7 @@ private function getQuantificationRange(TreeNode $node): array return [$min, $max]; } - private function createGroupType(TreeNode $group, bool $maybeConstant): Type + private function createGroupType(TreeNode $group, bool $maybeConstant, string $patternModifiers): Type { $isNonEmpty = TrinaryLogic::createMaybe(); $isNonFalsy = TrinaryLogic::createMaybe(); @@ -310,6 +319,7 @@ private function createGroupType(TreeNode $group, bool $maybeConstant): Type $inOptionalQuantification, $onlyLiterals, false, + $patternModifiers, ); if ($maybeConstant && $onlyLiterals !== null && $onlyLiterals !== []) { @@ -356,6 +366,7 @@ private function walkGroupAst( bool &$inOptionalQuantification, ?array &$onlyLiterals, bool $inClass, + string $patternModifiers, ): void { $children = $ast->getChildren(); @@ -364,9 +375,20 @@ private function walkGroupAst( $ast->getId() === '#concatenation' && count($children) > 0 ) { - $isNonEmpty = TrinaryLogic::createYes(); - if (!$inAlternation) { - $isNonFalsy = TrinaryLogic::createYes(); + $hasMeaningfulToken = false; + foreach ($children as $child) { + $value = $this->getLiteralValue($child, $onlyLiterals, false, $patternModifiers); + if ($value !== null) { + $hasMeaningfulToken = true; + break; + } + } + + if ($hasMeaningfulToken) { + $isNonEmpty = TrinaryLogic::createYes(); + if (!$inAlternation) { + $isNonFalsy = TrinaryLogic::createYes(); + } } } elseif ($ast->getId() === '#quantification') { [$min] = $this->getQuantificationRange($ast); @@ -391,7 +413,7 @@ private function walkGroupAst( $oldLiterals = $onlyLiterals; if ($child->getId() === 'token') { - $this->getLiteralValue($child, $oldLiterals, true); + $this->getLiteralValue($child, $oldLiterals, true, $patternModifiers); } foreach ($oldLiterals ?? [] as $oldLiteral) { @@ -400,7 +422,7 @@ private function walkGroupAst( } $onlyLiterals = $newLiterals; } elseif ($ast->getId() === 'token') { - $literalValue = $this->getLiteralValue($ast, $onlyLiterals, !$inClass); + $literalValue = $this->getLiteralValue($ast, $onlyLiterals, !$inClass, $patternModifiers); if ($literalValue !== null) { if (Strings::match($literalValue, '/^\d+$/') === null) { $isNumeric = TrinaryLogic::createNo(); @@ -439,6 +461,7 @@ private function walkGroupAst( $inOptionalQuantification, $onlyLiterals, $inClass, + $patternModifiers, ); } } @@ -446,7 +469,7 @@ private function walkGroupAst( /** * @param array|null $onlyLiterals */ - private function getLiteralValue(TreeNode $node, ?array &$onlyLiterals, bool $appendLiterals): ?string + private function getLiteralValue(TreeNode $node, ?array &$onlyLiterals, bool $appendLiterals, string $patternModifiers): ?string { if ($node->getId() !== 'token') { return null; @@ -457,6 +480,10 @@ private function getLiteralValue(TreeNode $node, ?array &$onlyLiterals, bool $ap $value = $node->getValueValue(); if (in_array($token, ['literal', 'escaped_end_class'], true)) { + if (str_contains($patternModifiers, 'x') && trim($value) === '') { + return null; + } + if (strlen($value) > 1 && $value[0] === '\\') { return substr($value, 1); } elseif ( diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 31f4d83721..60bf015173 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -625,3 +625,14 @@ function (string $s): void { } }; +function (string $s): void { + if (preg_match('/( \d+ )/x', $s, $matches)) { + assertType('array{string, numeric-string}', $matches); + } +}; + +function (string $s): void { + if (preg_match('/( .? )/x', $s, $matches)) { + assertType('array{string, non-empty-string}', $matches); + } +}; From ae2e5ebc32cd8791e5c81a9db0ef1aafda90fb3b Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Mon, 5 Aug 2024 12:02:36 +0200 Subject: [PATCH 02/11] fix --- src/Type/Php/RegexGroupParser.php | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/src/Type/Php/RegexGroupParser.php b/src/Type/Php/RegexGroupParser.php index 488c34c23b..6bd02c1d34 100644 --- a/src/Type/Php/RegexGroupParser.php +++ b/src/Type/Php/RegexGroupParser.php @@ -375,18 +375,25 @@ private function walkGroupAst( $ast->getId() === '#concatenation' && count($children) > 0 ) { - $hasMeaningfulToken = false; + $meaningfulTokens = 0; foreach ($children as $child) { + if ($child->getId() !== 'token') { + $meaningfulTokens++; + continue; + } + $value = $this->getLiteralValue($child, $onlyLiterals, false, $patternModifiers); - if ($value !== null) { - $hasMeaningfulToken = true; - break; + if ($value === null) { + continue; } + + $meaningfulTokens++; } - if ($hasMeaningfulToken) { + if ($meaningfulTokens > 0) { $isNonEmpty = TrinaryLogic::createYes(); - if (!$inAlternation) { + + if ($meaningfulTokens > 1 && !$inAlternation) { $isNonFalsy = TrinaryLogic::createYes(); } } From 0239d4a2f7bb8825c482aa395c9a9b7fb6819e7f Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Mon, 5 Aug 2024 12:22:37 +0200 Subject: [PATCH 03/11] more tests --- tests/PHPStan/Analyser/nsrt/preg_match_shapes.php | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 60bf015173..9acba34ea0 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -633,6 +633,12 @@ function (string $s): void { function (string $s): void { if (preg_match('/( .? )/x', $s, $matches)) { + assertType('array{string, string}', $matches); + } +}; + +function (string $s): void { + if (preg_match('/( .+ )/x', $s, $matches)) { assertType('array{string, non-empty-string}', $matches); } }; From f1d5886f89719d1eb77308ba22aad1f08142ed53 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Mon, 5 Aug 2024 12:36:47 +0200 Subject: [PATCH 04/11] Update RegexGroupParser.php --- src/Type/Php/RegexGroupParser.php | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/Type/Php/RegexGroupParser.php b/src/Type/Php/RegexGroupParser.php index 6bd02c1d34..935a519a53 100644 --- a/src/Type/Php/RegexGroupParser.php +++ b/src/Type/Php/RegexGroupParser.php @@ -378,7 +378,6 @@ private function walkGroupAst( $meaningfulTokens = 0; foreach ($children as $child) { if ($child->getId() !== 'token') { - $meaningfulTokens++; continue; } @@ -419,15 +418,17 @@ private function walkGroupAst( foreach ($children as $child) { $oldLiterals = $onlyLiterals; - if ($child->getId() === 'token') { - $this->getLiteralValue($child, $oldLiterals, true, $patternModifiers); - } - + $this->getLiteralValue($child, $oldLiterals, true, $patternModifiers); foreach ($oldLiterals ?? [] as $oldLiteral) { $newLiterals[] = $oldLiteral; } } $onlyLiterals = $newLiterals; + + if (!$inOptionalQuantification) { + $isNonEmpty = TrinaryLogic::createYes(); + } + } elseif ($ast->getId() === 'token') { $literalValue = $this->getLiteralValue($ast, $onlyLiterals, !$inClass, $patternModifiers); if ($literalValue !== null) { From d857b722ae8dd41c7317a08a6548ed365b6b9d9c Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Mon, 5 Aug 2024 13:03:37 +0200 Subject: [PATCH 05/11] fix --- src/Type/Php/RegexGroupParser.php | 41 +++++++++++++++++-- tests/PHPStan/Analyser/nsrt/bug-11311.php | 2 +- .../Analyser/nsrt/preg_match_shapes.php | 4 +- 3 files changed, 40 insertions(+), 7 deletions(-) diff --git a/src/Type/Php/RegexGroupParser.php b/src/Type/Php/RegexGroupParser.php index 935a519a53..0ef162e928 100644 --- a/src/Type/Php/RegexGroupParser.php +++ b/src/Type/Php/RegexGroupParser.php @@ -377,16 +377,18 @@ private function walkGroupAst( ) { $meaningfulTokens = 0; foreach ($children as $child) { - if ($child->getId() !== 'token') { + $nonFalsy = false; + if (!$this->isNonEmptyNode($child, $patternModifiers, $nonFalsy)) { continue; } - $value = $this->getLiteralValue($child, $onlyLiterals, false, $patternModifiers); - if ($value === null) { + $meaningfulTokens++; + + if (!$nonFalsy || $inAlternation) { continue; } - $meaningfulTokens++; + $isNonFalsy = TrinaryLogic::createYes(); } if ($meaningfulTokens > 0) { @@ -474,6 +476,37 @@ private function walkGroupAst( } } + private function isNonEmptyNode(TreeNode $node, string $patternModifiers, bool &$isNonFalsy): bool + { + if ($node->getId() === '#quantification') { + [$min] = $this->getQuantificationRange($node); + + if ($min > 0) { + return true; + } + + if ($min === 0) { + return false; + } + } + + $literal = $this->getLiteralValue($node, $onlyLiterals, false, $patternModifiers); + if ($literal !== null) { + if ($literal !== '' && $literal !== '0') { + $isNonFalsy = true; + } + return true; + } + + foreach ($node->getChildren() as $child) { + if ($this->isNonEmptyNode($child, $patternModifiers, $isNonFalsy)) { + return true; + } + } + + return false; + } + /** * @param array|null $onlyLiterals */ diff --git a/tests/PHPStan/Analyser/nsrt/bug-11311.php b/tests/PHPStan/Analyser/nsrt/bug-11311.php index 197fe587bd..43bc3d6247 100644 --- a/tests/PHPStan/Analyser/nsrt/bug-11311.php +++ b/tests/PHPStan/Analyser/nsrt/bug-11311.php @@ -162,7 +162,7 @@ function (string $size): void { if (preg_match('/ab(\d+\d?)e?/', $size, $matches, PREG_UNMATCHED_AS_NULL) !== 1) { throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); } - assertType('array{string, non-falsy-string&numeric-string}', $matches); + assertType('array{string, numeric-string}', $matches); }; function (string $s): void { diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 9acba34ea0..1383037847 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -328,7 +328,7 @@ function (string $size): void { function bug11277a(string $value): void { if (preg_match('/^\[(.+,?)*\]$/', $value, $matches)) { - assertType('array{0: string, 1?: non-falsy-string}', $matches); + assertType('array{0: string, 1?: non-empty-string}', $matches); if (count($matches) === 2) { assertType('array{string, string}', $matches); // could be array{string, non-empty-string} } @@ -338,7 +338,7 @@ function bug11277a(string $value): void function bug11277b(string $value): void { if (preg_match('/^(?:(.+,?)|(x))*$/', $value, $matches)) { - assertType('array{0: string, 1?: non-falsy-string, 2?: non-empty-string}', $matches); + assertType('array{0: string, 1?: non-empty-string, 2?: non-empty-string}', $matches); if (count($matches) === 2) { assertType('array{string, string}', $matches); // could be array{string, non-empty-string} } From 77c8c956055461118235687213650bbd17d7401b Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Mon, 5 Aug 2024 13:06:17 +0200 Subject: [PATCH 06/11] remove double negation --- src/Type/Php/RegexGroupParser.php | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/Type/Php/RegexGroupParser.php b/src/Type/Php/RegexGroupParser.php index 0ef162e928..1a4526d53d 100644 --- a/src/Type/Php/RegexGroupParser.php +++ b/src/Type/Php/RegexGroupParser.php @@ -378,7 +378,7 @@ private function walkGroupAst( $meaningfulTokens = 0; foreach ($children as $child) { $nonFalsy = false; - if (!$this->isNonEmptyNode($child, $patternModifiers, $nonFalsy)) { + if ($this->isEmptyNode($child, $patternModifiers, $nonFalsy)) { continue; } @@ -426,11 +426,6 @@ private function walkGroupAst( } } $onlyLiterals = $newLiterals; - - if (!$inOptionalQuantification) { - $isNonEmpty = TrinaryLogic::createYes(); - } - } elseif ($ast->getId() === 'token') { $literalValue = $this->getLiteralValue($ast, $onlyLiterals, !$inClass, $patternModifiers); if ($literalValue !== null) { @@ -476,7 +471,7 @@ private function walkGroupAst( } } - private function isNonEmptyNode(TreeNode $node, string $patternModifiers, bool &$isNonFalsy): bool + private function isEmptyNode(TreeNode $node, string $patternModifiers, bool &$isNonFalsy): bool { if ($node->getId() === '#quantification') { [$min] = $this->getQuantificationRange($node); @@ -499,7 +494,7 @@ private function isNonEmptyNode(TreeNode $node, string $patternModifiers, bool & } foreach ($node->getChildren() as $child) { - if ($this->isNonEmptyNode($child, $patternModifiers, $isNonFalsy)) { + if ($this->isEmptyNode($child, $patternModifiers, $isNonFalsy)) { return true; } } From ea9490035ed4454910c6a678ca69906c28f53750 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Mon, 5 Aug 2024 13:12:36 +0200 Subject: [PATCH 07/11] Update RegexGroupParser.php --- src/Type/Php/RegexGroupParser.php | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/Type/Php/RegexGroupParser.php b/src/Type/Php/RegexGroupParser.php index 1a4526d53d..7ac763336b 100644 --- a/src/Type/Php/RegexGroupParser.php +++ b/src/Type/Php/RegexGroupParser.php @@ -388,12 +388,14 @@ private function walkGroupAst( continue; } + // a single token non-falsy on its own $isNonFalsy = TrinaryLogic::createYes(); } if ($meaningfulTokens > 0) { $isNonEmpty = TrinaryLogic::createYes(); + // two non-empty tokens concatenated results in a non-falsy string if ($meaningfulTokens > 1 && !$inAlternation) { $isNonFalsy = TrinaryLogic::createYes(); } @@ -477,11 +479,11 @@ private function isEmptyNode(TreeNode $node, string $patternModifiers, bool &$is [$min] = $this->getQuantificationRange($node); if ($min > 0) { - return true; + return false; } if ($min === 0) { - return false; + return true; } } @@ -490,16 +492,16 @@ private function isEmptyNode(TreeNode $node, string $patternModifiers, bool &$is if ($literal !== '' && $literal !== '0') { $isNonFalsy = true; } - return true; + return false; } foreach ($node->getChildren() as $child) { - if ($this->isEmptyNode($child, $patternModifiers, $isNonFalsy)) { - return true; + if (!$this->isEmptyNode($child, $patternModifiers, $isNonFalsy)) { + return false; } } - return false; + return true; } /** From 40dfc8fb731ad2b951b702081c247ffe76515bc8 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Mon, 5 Aug 2024 13:15:21 +0200 Subject: [PATCH 08/11] prevent unnecessary work --- src/Type/Php/RegexGroupParser.php | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Type/Php/RegexGroupParser.php b/src/Type/Php/RegexGroupParser.php index 7ac763336b..62d2f5fee8 100644 --- a/src/Type/Php/RegexGroupParser.php +++ b/src/Type/Php/RegexGroupParser.php @@ -390,6 +390,7 @@ private function walkGroupAst( // a single token non-falsy on its own $isNonFalsy = TrinaryLogic::createYes(); + break; } if ($meaningfulTokens > 0) { From 8dd37fba40e439d326ced7638a662bb0bd536742 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Mon, 5 Aug 2024 13:17:03 +0200 Subject: [PATCH 09/11] Update RegexGroupParser.php --- src/Type/Php/RegexGroupParser.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Type/Php/RegexGroupParser.php b/src/Type/Php/RegexGroupParser.php index 62d2f5fee8..038611e8b7 100644 --- a/src/Type/Php/RegexGroupParser.php +++ b/src/Type/Php/RegexGroupParser.php @@ -378,7 +378,7 @@ private function walkGroupAst( $meaningfulTokens = 0; foreach ($children as $child) { $nonFalsy = false; - if ($this->isEmptyNode($child, $patternModifiers, $nonFalsy)) { + if ($this->maybeEmptyNode($child, $patternModifiers, $nonFalsy)) { continue; } @@ -474,7 +474,7 @@ private function walkGroupAst( } } - private function isEmptyNode(TreeNode $node, string $patternModifiers, bool &$isNonFalsy): bool + private function maybeEmptyNode(TreeNode $node, string $patternModifiers, bool &$isNonFalsy): bool { if ($node->getId() === '#quantification') { [$min] = $this->getQuantificationRange($node); @@ -497,7 +497,7 @@ private function isEmptyNode(TreeNode $node, string $patternModifiers, bool &$is } foreach ($node->getChildren() as $child) { - if (!$this->isEmptyNode($child, $patternModifiers, $isNonFalsy)) { + if (!$this->maybeEmptyNode($child, $patternModifiers, $isNonFalsy)) { return false; } } From e277c1180aa216eafd8a6a68f78d4b16e3acf4a9 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Mon, 5 Aug 2024 13:17:42 +0200 Subject: [PATCH 10/11] Update RegexGroupParser.php --- src/Type/Php/RegexGroupParser.php | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/Type/Php/RegexGroupParser.php b/src/Type/Php/RegexGroupParser.php index 038611e8b7..81e7fe0b55 100644 --- a/src/Type/Php/RegexGroupParser.php +++ b/src/Type/Php/RegexGroupParser.php @@ -378,7 +378,7 @@ private function walkGroupAst( $meaningfulTokens = 0; foreach ($children as $child) { $nonFalsy = false; - if ($this->maybeEmptyNode($child, $patternModifiers, $nonFalsy)) { + if ($this->isMaybeEmptyNode($child, $patternModifiers, $nonFalsy)) { continue; } @@ -474,7 +474,7 @@ private function walkGroupAst( } } - private function maybeEmptyNode(TreeNode $node, string $patternModifiers, bool &$isNonFalsy): bool + private function isMaybeEmptyNode(TreeNode $node, string $patternModifiers, bool &$isNonFalsy): bool { if ($node->getId() === '#quantification') { [$min] = $this->getQuantificationRange($node); @@ -497,7 +497,7 @@ private function maybeEmptyNode(TreeNode $node, string $patternModifiers, bool & } foreach ($node->getChildren() as $child) { - if (!$this->maybeEmptyNode($child, $patternModifiers, $isNonFalsy)) { + if (!$this->isMaybeEmptyNode($child, $patternModifiers, $isNonFalsy)) { return false; } } From f6413f3f53079c3ad8eb307e1733fc8ae36b2982 Mon Sep 17 00:00:00 2001 From: Markus Staab Date: Mon, 5 Aug 2024 13:23:02 +0200 Subject: [PATCH 11/11] Update preg_match_shapes.php --- tests/PHPStan/Analyser/nsrt/preg_match_shapes.php | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 1383037847..ecbc91e005 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -637,6 +637,12 @@ function (string $s): void { } }; +function (string $s): void { + if (preg_match('/( .* )/x', $s, $matches)) { + assertType('array{string, string}', $matches); + } +}; + function (string $s): void { if (preg_match('/( .+ )/x', $s, $matches)) { assertType('array{string, non-empty-string}', $matches);