Skip to content

RegexGroupParser: Ignore whitespaces on "x"-modified patterns #3291

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Aug 5, 2024
88 changes: 77 additions & 11 deletions src/Type/Php/RegexGroupParser.php
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
use function str_replace;
use function strlen;
use function substr;
use function trim;

final class RegexGroupParser
{
Expand Down Expand Up @@ -126,7 +127,11 @@ private function walkRegexAst(
$inAlternation ? $alternationId : null,
$inOptionalQuantification,
$parentGroup,
$this->createGroupType($ast, $this->allowConstantTypes($patternModifiers, $repeatedMoreThanOnce, $parentGroup)),
$this->createGroupType(
$ast,
$this->allowConstantTypes($patternModifiers, $repeatedMoreThanOnce, $parentGroup),
$patternModifiers,
),
);
$parentGroup = $group;
} elseif ($ast->getId() === '#namedcapturing') {
Expand All @@ -137,7 +142,11 @@ private function walkRegexAst(
$inAlternation ? $alternationId : null,
$inOptionalQuantification,
$parentGroup,
$this->createGroupType($ast, $this->allowConstantTypes($patternModifiers, $repeatedMoreThanOnce, $parentGroup)),
$this->createGroupType(
$ast,
$this->allowConstantTypes($patternModifiers, $repeatedMoreThanOnce, $parentGroup),
$patternModifiers,
),
);
$parentGroup = $group;
} elseif ($ast->getId() === '#noncapturing') {
Expand Down Expand Up @@ -293,7 +302,7 @@ private function getQuantificationRange(TreeNode $node): array
return [$min, $max];
}

private function createGroupType(TreeNode $group, bool $maybeConstant): Type
private function createGroupType(TreeNode $group, bool $maybeConstant, string $patternModifiers): Type
{
$isNonEmpty = TrinaryLogic::createMaybe();
$isNonFalsy = TrinaryLogic::createMaybe();
Expand All @@ -310,6 +319,7 @@ private function createGroupType(TreeNode $group, bool $maybeConstant): Type
$inOptionalQuantification,
$onlyLiterals,
false,
$patternModifiers,
);

if ($maybeConstant && $onlyLiterals !== null && $onlyLiterals !== []) {
Expand Down Expand Up @@ -356,6 +366,7 @@ private function walkGroupAst(
bool &$inOptionalQuantification,
?array &$onlyLiterals,
bool $inClass,
string $patternModifiers,
): void
{
$children = $ast->getChildren();
Expand All @@ -364,9 +375,31 @@ private function walkGroupAst(
$ast->getId() === '#concatenation'
&& count($children) > 0
) {
$isNonEmpty = TrinaryLogic::createYes();
if (!$inAlternation) {
$meaningfulTokens = 0;
foreach ($children as $child) {
$nonFalsy = false;
if ($this->isMaybeEmptyNode($child, $patternModifiers, $nonFalsy)) {
continue;
}

$meaningfulTokens++;

if (!$nonFalsy || $inAlternation) {
continue;
}

// a single token non-falsy on its own
$isNonFalsy = TrinaryLogic::createYes();
break;
}

if ($meaningfulTokens > 0) {
$isNonEmpty = TrinaryLogic::createYes();

// two non-empty tokens concatenated results in a non-falsy string
if ($meaningfulTokens > 1 && !$inAlternation) {
$isNonFalsy = TrinaryLogic::createYes();
}
}
} elseif ($ast->getId() === '#quantification') {
[$min] = $this->getQuantificationRange($ast);
Expand All @@ -390,17 +423,14 @@ private function walkGroupAst(
foreach ($children as $child) {
$oldLiterals = $onlyLiterals;

if ($child->getId() === 'token') {
$this->getLiteralValue($child, $oldLiterals, true);
}

$this->getLiteralValue($child, $oldLiterals, true, $patternModifiers);
foreach ($oldLiterals ?? [] as $oldLiteral) {
$newLiterals[] = $oldLiteral;
}
}
$onlyLiterals = $newLiterals;
} elseif ($ast->getId() === 'token') {
$literalValue = $this->getLiteralValue($ast, $onlyLiterals, !$inClass);
$literalValue = $this->getLiteralValue($ast, $onlyLiterals, !$inClass, $patternModifiers);
if ($literalValue !== null) {
if (Strings::match($literalValue, '/^\d+$/') === null) {
$isNumeric = TrinaryLogic::createNo();
Expand Down Expand Up @@ -439,14 +469,46 @@ private function walkGroupAst(
$inOptionalQuantification,
$onlyLiterals,
$inClass,
$patternModifiers,
);
}
}

private function isMaybeEmptyNode(TreeNode $node, string $patternModifiers, bool &$isNonFalsy): bool
{
if ($node->getId() === '#quantification') {
[$min] = $this->getQuantificationRange($node);

if ($min > 0) {
return false;
}

if ($min === 0) {
return true;
}
}

$literal = $this->getLiteralValue($node, $onlyLiterals, false, $patternModifiers);
if ($literal !== null) {
if ($literal !== '' && $literal !== '0') {
$isNonFalsy = true;
}
return false;
}

foreach ($node->getChildren() as $child) {
if (!$this->isMaybeEmptyNode($child, $patternModifiers, $isNonFalsy)) {
return false;
}
}

return true;
}

/**
* @param array<string>|null $onlyLiterals
*/
private function getLiteralValue(TreeNode $node, ?array &$onlyLiterals, bool $appendLiterals): ?string
private function getLiteralValue(TreeNode $node, ?array &$onlyLiterals, bool $appendLiterals, string $patternModifiers): ?string
{
if ($node->getId() !== 'token') {
return null;
Expand All @@ -457,6 +519,10 @@ private function getLiteralValue(TreeNode $node, ?array &$onlyLiterals, bool $ap
$value = $node->getValueValue();

if (in_array($token, ['literal', 'escaped_end_class'], true)) {
if (str_contains($patternModifiers, 'x') && trim($value) === '') {
return null;
}

if (strlen($value) > 1 && $value[0] === '\\') {
return substr($value, 1);
} elseif (
Expand Down
2 changes: 1 addition & 1 deletion tests/PHPStan/Analyser/nsrt/bug-11311.php
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ function (string $size): void {
if (preg_match('/ab(\d+\d?)e?/', $size, $matches, PREG_UNMATCHED_AS_NULL) !== 1) {
throw new InvalidArgumentException(sprintf('Invalid size "%s"', $size));
}
assertType('array{string, non-falsy-string&numeric-string}', $matches);
assertType('array{string, numeric-string}', $matches);
};

function (string $s): void {
Expand Down
27 changes: 25 additions & 2 deletions tests/PHPStan/Analyser/nsrt/preg_match_shapes.php
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,7 @@ function (string $size): void {
function bug11277a(string $value): void
{
if (preg_match('/^\[(.+,?)*\]$/', $value, $matches)) {
assertType('array{0: string, 1?: non-falsy-string}', $matches);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

these were too optimistic before (.+,?) can be 0

assertType('array{0: string, 1?: non-empty-string}', $matches);
if (count($matches) === 2) {
assertType('array{string, string}', $matches); // could be array{string, non-empty-string}
}
Expand All @@ -338,7 +338,7 @@ function bug11277a(string $value): void
function bug11277b(string $value): void
{
if (preg_match('/^(?:(.+,?)|(x))*$/', $value, $matches)) {
assertType('array{0: string, 1?: non-falsy-string, 2?: non-empty-string}', $matches);
assertType('array{0: string, 1?: non-empty-string, 2?: non-empty-string}', $matches);
if (count($matches) === 2) {
assertType('array{string, string}', $matches); // could be array{string, non-empty-string}
}
Expand Down Expand Up @@ -625,3 +625,26 @@ function (string $s): void {
}
};

function (string $s): void {
if (preg_match('/( \d+ )/x', $s, $matches)) {
assertType('array{string, numeric-string}', $matches);
}
};

function (string $s): void {
if (preg_match('/( .? )/x', $s, $matches)) {
assertType('array{string, string}', $matches);
}
};

function (string $s): void {
if (preg_match('/( .* )/x', $s, $matches)) {
assertType('array{string, string}', $matches);
}
};

function (string $s): void {
if (preg_match('/( .+ )/x', $s, $matches)) {
assertType('array{string, non-empty-string}', $matches);
}
};
Loading