Skip to content
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/actions/setup-php/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ runs:
with:
php-version: ${{ inputs.php-version }}
coverage: xdebug
ini-values: zend.assertions=1

- id: composer-cache
run: echo "dir=$(composer config cache-files-dir)" >> $GITHUB_OUTPUT
Expand Down
1 change: 1 addition & 0 deletions scripts/test
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@
--display-deprecations \
--display-errors \
--display-notices \
--display-warnings \
--coverage-html build/coverage-report \
--coverage-filter src $@
84 changes: 84 additions & 0 deletions src/Language/Lexer/CharacterStream/CharacterStream.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
<?php

/**
* PackageFactory.ComponentEngine - Universal View Components for PHP
* Copyright (C) 2023 Contributors of PackageFactory.ComponentEngine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

declare(strict_types=1);

namespace PackageFactory\ComponentEngine\Language\Lexer\CharacterStream;

use PackageFactory\ComponentEngine\Parser\Source\Position;

final class CharacterStream
{
private int $byte;
private Cursor $cursor;
private ?string $characterUnderCursor = null;

public function __construct(private readonly string $source)
{
$this->byte = 0;
$this->cursor = new Cursor();

$this->next();
}

public function next(): void
{
$this->cursor->advance($this->characterUnderCursor);

$nextCharacter = $this->source[$this->byte++] ?? null;
if ($nextCharacter === null) {
$this->characterUnderCursor = null;
return;
}

$ord = ord($nextCharacter);
if ($ord >= 0x80) {
$nextCharacter .= $this->source[$this->byte++];
}
if ($ord >= 0xe0) {
$nextCharacter .= $this->source[$this->byte++];
}
if ($ord >= 0xf0) {
$nextCharacter .= $this->source[$this->byte++];
}

$this->characterUnderCursor = $nextCharacter;
}

public function current(): ?string
{
return $this->characterUnderCursor;
}

public function isEnd(): bool
{
return $this->characterUnderCursor === null;
}

public function getCurrentPosition(): Position
{
return $this->cursor->getCurrentPosition();
}

public function getPreviousPosition(): Position
{
return $this->cursor->getPreviousPosition();
}
}
61 changes: 61 additions & 0 deletions src/Language/Lexer/CharacterStream/Cursor.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
<?php

/**
* PackageFactory.ComponentEngine - Universal View Components for PHP
* Copyright (C) 2023 Contributors of PackageFactory.ComponentEngine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

declare(strict_types=1);

namespace PackageFactory\ComponentEngine\Language\Lexer\CharacterStream;

use PackageFactory\ComponentEngine\Parser\Source\Position;

final class Cursor
{
private int $currentLineNumber = 0;
private int $currentColumnNumber = 0;
private int $previousLineNumber = -1;
private int $previousColumnNumber = -1;

public function advance(?string $character): void
{
if ($character !== null) {
$this->previousLineNumber = $this->currentLineNumber;
$this->previousColumnNumber = $this->currentColumnNumber;

if ($character === "\n") {
$this->currentLineNumber++;
$this->currentColumnNumber = 0;
} else {
$this->currentColumnNumber++;
}
}
}

public function getCurrentPosition(): Position
{
return new Position($this->currentLineNumber, $this->currentColumnNumber);
}

public function getPreviousPosition(): Position
{
assert($this->previousLineNumber >= 0);
assert($this->previousColumnNumber >= 0);

return new Position($this->previousLineNumber, $this->previousColumnNumber);
}
}
197 changes: 197 additions & 0 deletions src/Language/Lexer/Lexer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
<?php

/**
* PackageFactory.ComponentEngine - Universal View Components for PHP
* Copyright (C) 2023 Contributors of PackageFactory.ComponentEngine
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/

declare(strict_types=1);

namespace PackageFactory\ComponentEngine\Language\Lexer;

use PackageFactory\ComponentEngine\Language\Lexer\CharacterStream\CharacterStream;
use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Matcher;
use PackageFactory\ComponentEngine\Language\Lexer\Matcher\Result;
use PackageFactory\ComponentEngine\Language\Lexer\Token\Token;
use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenType;
use PackageFactory\ComponentEngine\Language\Lexer\Token\TokenTypes;
use PackageFactory\ComponentEngine\Parser\Source\Position;
use PackageFactory\ComponentEngine\Parser\Source\Range;

final class Lexer
{
private readonly CharacterStream $characterStream;
private ?Position $startPosition = null;
private int $offset = 0;
private string $buffer = '';
private ?TokenType $tokenTypeUnderCursor = null;
private ?Token $tokenUnderCursor = null;
private ?LexerException $latestError = null;

public function __construct(string $source)
{
$this->characterStream = new CharacterStream($source);
}

public function read(TokenType $tokenType): void
{
assert($this->latestError === null);
$this->startPosition = $this->characterStream->getCurrentPosition();

if ($this->characterStream->isEnd()) {
throw $this->latestError = LexerException::becauseOfUnexpectedEndOfSource(
expectedTokenTypes: TokenTypes::from($tokenType),
affectedRangeInSource: $this->startPosition->toRange()
);
}

$this->tokenTypeUnderCursor = null;
$this->tokenUnderCursor = null;
$this->offset = 0;
$this->buffer = '';

while (true) {
$character = $this->characterStream->current();
$result = Matcher::for($tokenType)->match($character, $this->offset);

if ($result === Result::KEEP) {
$this->offset++;
$this->buffer .= $character;
$this->characterStream->next();
continue;
}

if ($result === Result::SATISFIED) {
$this->tokenTypeUnderCursor = $tokenType;
break;
}

if ($result === Result::CANCEL) {
throw $this->latestError = LexerException::becauseOfUnexpectedCharacterSequence(
expectedTokenTypes: TokenTypes::from($tokenType),
affectedRangeInSource: Range::from(
$this->startPosition,
$this->characterStream->getCurrentPosition()
),
actualCharacterSequence: $this->buffer . $character
);
}
}
}

public function readOneOf(TokenTypes $tokenTypes): void
{
assert($this->latestError === null);
$this->startPosition = $this->characterStream->getCurrentPosition();

if ($this->characterStream->isEnd()) {
throw $this->latestError = LexerException::becauseOfUnexpectedEndOfSource(
expectedTokenTypes: $tokenTypes,
affectedRangeInSource: $this->startPosition->toRange()
);
}

$this->tokenTypeUnderCursor = null;
$this->tokenUnderCursor = null;
$this->offset = 0;
$this->buffer = '';

$tokenTypeCandidates = $tokenTypes->items;
while (count($tokenTypeCandidates)) {
$character = $this->characterStream->current();

$nextTokenTypeCandidates = [];
foreach ($tokenTypeCandidates as $tokenType) {
$result = Matcher::for($tokenType)->match($character, $this->offset);

if ($result === Result::KEEP) {
$nextTokenTypeCandidates[] = $tokenType;
continue;
}

if ($result === Result::SATISFIED) {
$this->tokenTypeUnderCursor = $tokenType;
return;
}
}

$this->offset++;
$this->buffer .= $character;
$tokenTypeCandidates = $nextTokenTypeCandidates;
$this->characterStream->next();
}

throw $this->latestError = LexerException::becauseOfUnexpectedCharacterSequence(
expectedTokenTypes: $tokenTypes,
affectedRangeInSource: Range::from(
$this->startPosition,
$this->characterStream->getPreviousPosition()
),
actualCharacterSequence: $this->buffer
);
}

public function skipSpace(): void
{
assert($this->latestError === null);
$this->skip(TokenType::SPACE, TokenType::END_OF_LINE);
}

public function skipSpaceAndComments(): void
{
assert($this->latestError === null);
$this->skip(TokenType::SPACE, TokenType::END_OF_LINE, TokenType::COMMENT);
}

private function skip(TokenType ...$tokenTypes): void
{
while (true) {
$character = $this->characterStream->current();

foreach ($tokenTypes as $tokenType) {
$matcher = Matcher::for($tokenType);

if ($matcher->match($character, 0) === Result::KEEP) {
$this->read($tokenType);
continue 2;
}
}

break;
}
}

public function getTokenUnderCursor(): Token
{
assert($this->latestError === null);
assert($this->startPosition !== null);
assert($this->tokenTypeUnderCursor !== null);

return $this->tokenUnderCursor ??= new Token(
rangeInSource: Range::from(
$this->startPosition,
$this->characterStream->getPreviousPosition()
),
type: $this->tokenTypeUnderCursor,
value: $this->buffer
);
}

public function isEnd(): bool
{
return $this->characterStream->isEnd();
}
}
Loading