diff --git a/src/Type/Php/RegexGroupParser.php b/src/Type/Php/RegexGroupParser.php index 98a0c92c8f..81e7fe0b55 100644 --- a/src/Type/Php/RegexGroupParser.php +++ b/src/Type/Php/RegexGroupParser.php @@ -30,6 +30,7 @@ use function str_replace; use function strlen; use function substr; +use function trim; final class RegexGroupParser { @@ -126,7 +127,11 @@ private function walkRegexAst( $inAlternation ? $alternationId : null, $inOptionalQuantification, $parentGroup, - $this->createGroupType($ast, $this->allowConstantTypes($patternModifiers, $repeatedMoreThanOnce, $parentGroup)), + $this->createGroupType( + $ast, + $this->allowConstantTypes($patternModifiers, $repeatedMoreThanOnce, $parentGroup), + $patternModifiers, + ), ); $parentGroup = $group; } elseif ($ast->getId() === '#namedcapturing') { @@ -137,7 +142,11 @@ private function walkRegexAst( $inAlternation ? $alternationId : null, $inOptionalQuantification, $parentGroup, - $this->createGroupType($ast, $this->allowConstantTypes($patternModifiers, $repeatedMoreThanOnce, $parentGroup)), + $this->createGroupType( + $ast, + $this->allowConstantTypes($patternModifiers, $repeatedMoreThanOnce, $parentGroup), + $patternModifiers, + ), ); $parentGroup = $group; } elseif ($ast->getId() === '#noncapturing') { @@ -293,7 +302,7 @@ private function getQuantificationRange(TreeNode $node): array return [$min, $max]; } - private function createGroupType(TreeNode $group, bool $maybeConstant): Type + private function createGroupType(TreeNode $group, bool $maybeConstant, string $patternModifiers): Type { $isNonEmpty = TrinaryLogic::createMaybe(); $isNonFalsy = TrinaryLogic::createMaybe(); @@ -310,6 +319,7 @@ private function createGroupType(TreeNode $group, bool $maybeConstant): Type $inOptionalQuantification, $onlyLiterals, false, + $patternModifiers, ); if ($maybeConstant && $onlyLiterals !== null && $onlyLiterals !== []) { @@ -356,6 +366,7 @@ private function walkGroupAst( bool &$inOptionalQuantification, ?array &$onlyLiterals, bool $inClass, + string $patternModifiers, ): void { $children = $ast->getChildren(); @@ -364,9 +375,31 @@ private function walkGroupAst( $ast->getId() === '#concatenation' && count($children) > 0 ) { - $isNonEmpty = TrinaryLogic::createYes(); - if (!$inAlternation) { + $meaningfulTokens = 0; + foreach ($children as $child) { + $nonFalsy = false; + if ($this->isMaybeEmptyNode($child, $patternModifiers, $nonFalsy)) { + continue; + } + + $meaningfulTokens++; + + if (!$nonFalsy || $inAlternation) { + continue; + } + + // a single token non-falsy on its own $isNonFalsy = TrinaryLogic::createYes(); + break; + } + + if ($meaningfulTokens > 0) { + $isNonEmpty = TrinaryLogic::createYes(); + + // two non-empty tokens concatenated results in a non-falsy string + if ($meaningfulTokens > 1 && !$inAlternation) { + $isNonFalsy = TrinaryLogic::createYes(); + } } } elseif ($ast->getId() === '#quantification') { [$min] = $this->getQuantificationRange($ast); @@ -390,17 +423,14 @@ private function walkGroupAst( foreach ($children as $child) { $oldLiterals = $onlyLiterals; - if ($child->getId() === 'token') { - $this->getLiteralValue($child, $oldLiterals, true); - } - + $this->getLiteralValue($child, $oldLiterals, true, $patternModifiers); foreach ($oldLiterals ?? [] as $oldLiteral) { $newLiterals[] = $oldLiteral; } } $onlyLiterals = $newLiterals; } elseif ($ast->getId() === 'token') { - $literalValue = $this->getLiteralValue($ast, $onlyLiterals, !$inClass); + $literalValue = $this->getLiteralValue($ast, $onlyLiterals, !$inClass, $patternModifiers); if ($literalValue !== null) { if (Strings::match($literalValue, '/^\d+$/') === null) { $isNumeric = TrinaryLogic::createNo(); @@ -439,14 +469,46 @@ private function walkGroupAst( $inOptionalQuantification, $onlyLiterals, $inClass, + $patternModifiers, ); } } + private function isMaybeEmptyNode(TreeNode $node, string $patternModifiers, bool &$isNonFalsy): bool + { + if ($node->getId() === '#quantification') { + [$min] = $this->getQuantificationRange($node); + + if ($min > 0) { + return false; + } + + if ($min === 0) { + return true; + } + } + + $literal = $this->getLiteralValue($node, $onlyLiterals, false, $patternModifiers); + if ($literal !== null) { + if ($literal !== '' && $literal !== '0') { + $isNonFalsy = true; + } + return false; + } + + foreach ($node->getChildren() as $child) { + if (!$this->isMaybeEmptyNode($child, $patternModifiers, $isNonFalsy)) { + return false; + } + } + + return true; + } + /** * @param array|null $onlyLiterals */ - private function getLiteralValue(TreeNode $node, ?array &$onlyLiterals, bool $appendLiterals): ?string + private function getLiteralValue(TreeNode $node, ?array &$onlyLiterals, bool $appendLiterals, string $patternModifiers): ?string { if ($node->getId() !== 'token') { return null; @@ -457,6 +519,10 @@ private function getLiteralValue(TreeNode $node, ?array &$onlyLiterals, bool $ap $value = $node->getValueValue(); if (in_array($token, ['literal', 'escaped_end_class'], true)) { + if (str_contains($patternModifiers, 'x') && trim($value) === '') { + return null; + } + if (strlen($value) > 1 && $value[0] === '\\') { return substr($value, 1); } elseif ( diff --git a/tests/PHPStan/Analyser/nsrt/bug-11311.php b/tests/PHPStan/Analyser/nsrt/bug-11311.php index 197fe587bd..43bc3d6247 100644 --- a/tests/PHPStan/Analyser/nsrt/bug-11311.php +++ b/tests/PHPStan/Analyser/nsrt/bug-11311.php @@ -162,7 +162,7 @@ function (string $size): void { if (preg_match('/ab(\d+\d?)e?/', $size, $matches, PREG_UNMATCHED_AS_NULL) !== 1) { throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size)); } - assertType('array{string, non-falsy-string&numeric-string}', $matches); + assertType('array{string, numeric-string}', $matches); }; function (string $s): void { diff --git a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php index 31f4d83721..ecbc91e005 100644 --- a/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php +++ b/tests/PHPStan/Analyser/nsrt/preg_match_shapes.php @@ -328,7 +328,7 @@ function (string $size): void { function bug11277a(string $value): void { if (preg_match('/^\[(.+,?)*\]$/', $value, $matches)) { - assertType('array{0: string, 1?: non-falsy-string}', $matches); + assertType('array{0: string, 1?: non-empty-string}', $matches); if (count($matches) === 2) { assertType('array{string, string}', $matches); // could be array{string, non-empty-string} } @@ -338,7 +338,7 @@ function bug11277a(string $value): void function bug11277b(string $value): void { if (preg_match('/^(?:(.+,?)|(x))*$/', $value, $matches)) { - assertType('array{0: string, 1?: non-falsy-string, 2?: non-empty-string}', $matches); + assertType('array{0: string, 1?: non-empty-string, 2?: non-empty-string}', $matches); if (count($matches) === 2) { assertType('array{string, string}', $matches); // could be array{string, non-empty-string} } @@ -625,3 +625,26 @@ function (string $s): void { } }; +function (string $s): void { + if (preg_match('/( \d+ )/x', $s, $matches)) { + assertType('array{string, numeric-string}', $matches); + } +}; + +function (string $s): void { + if (preg_match('/( .? )/x', $s, $matches)) { + assertType('array{string, string}', $matches); + } +}; + +function (string $s): void { + if (preg_match('/( .* )/x', $s, $matches)) { + assertType('array{string, string}', $matches); + } +}; + +function (string $s): void { + if (preg_match('/( .+ )/x', $s, $matches)) { + assertType('array{string, non-empty-string}', $matches); + } +};