Skip to content

Commit c44c705

Browse files
committed
Add mb_trim polyfills
1 parent 06a3365 commit c44c705

File tree

7 files changed

+226
-69
lines changed

7 files changed

+226
-69
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@ Polyfills are provided for:
7070
- the `mb_ucfirst` and `mb_lcfirst` functions introduced in PHP 8.4;
7171
- the `array_find`, `array_find_key`, `array_any` and `array_all` functions introduced in PHP 8.4;
7272
- the `Deprecated` attribute introduced in PHP 8.4;
73+
- the `mb_trim`, `mb_ltrim` and `mb_rtrim` functions introduced in PHP 8.4;
7374

7475
It is strongly recommended to upgrade your PHP version and/or install the missing
7576
extensions whenever possible. This polyfill should be used only when there is no

src/Mbstring/Mbstring.php

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@
5050
* - mb_substr_count - Count the number of substring occurrences
5151
* - mb_ucfirst - Make a string's first character uppercase
5252
* - mb_lcfirst - Make a string's first character lowercase
53+
* - mb_trim - Strip whitespace (or other characters) from the beginning and end of a string
54+
* - mb_ltrim - Strip whitespace (or other characters) from the beginning of a string
55+
* - mb_rtrim - Strip whitespace (or other characters) from the end of a string
5356
*
5457
* Not implemented:
5558
* - mb_convert_kana - Convert "kana" one from another ("zen-kaku", "han-kaku" and more)
@@ -79,6 +82,8 @@ final class Mbstring
7982
private static $encodingList = ['ASCII', 'UTF-8'];
8083
private static $language = 'neutral';
8184
private static $internalEncoding = 'UTF-8';
85+
private const CHARACTERS = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}";
86+
8287

8388
public static function mb_convert_encoding($s, $toEncoding, $fromEncoding = null)
8489
{
@@ -980,6 +985,64 @@ private static function getEncoding($encoding)
980985
return $encoding;
981986
}
982987

988+
public static function mb_trim(string $string, string $characters = self::CHARACTERS, ?string $encoding = null): string
989+
{
990+
return self::mb_internal_trim('^[%s]+|[%s]+$', $string, $characters, $encoding);
991+
}
992+
993+
public static function mb_ltrim(string $string, string $characters = self::CHARACTERS, ?string $encoding = null): string
994+
{
995+
return self::mb_internal_trim('^[%s]+', $string, $characters, $encoding);
996+
}
997+
998+
public static function mb_rtrim(string $string, string $characters = self::CHARACTERS, ?string $encoding = null): string
999+
{
1000+
return self::mb_internal_trim('[%s]+$', $string, $characters, $encoding);
1001+
}
1002+
1003+
private static function mb_internal_trim(string $regex, string $string, string $characters = self::CHARACTERS, ?string $encoding = null): string
1004+
{
1005+
if (null === $encoding) {
1006+
$encoding = self::mb_internal_encoding();
1007+
}
1008+
1009+
try {
1010+
$validEncoding = @self::mb_check_encoding('', $encoding);
1011+
} catch (\ValueError $e) {
1012+
throw new \ValueError(sprintf('%s(): Argument #3 ($encoding) must be a valid encoding, "%s" given.', debug_backtrace()[1]['function'], $encoding));
1013+
}
1014+
1015+
// BC for PHP 7.3 and lower
1016+
if (!$validEncoding) {
1017+
throw new \ValueError(sprintf('%s(): Argument #3 ($encoding) must be a valid encoding, "%s" given.', debug_backtrace()[1]['function'], $encoding));
1018+
}
1019+
1020+
if ('' === $characters) {
1021+
return null === $encoding ? $string : mb_convert_encoding($string, $encoding);
1022+
}
1023+
1024+
$regexCharacter = preg_quote($characters, '/');
1025+
$regex = sprintf($regex, $regexCharacter, $regexCharacter);
1026+
1027+
if ('ASCII' === mb_detect_encoding($characters) && 'ASCII' === mb_detect_encoding($string) && !empty(array_intersect(str_split(self::CHARACTERS), str_split($string)))) {
1028+
$options = 'g';
1029+
} else {
1030+
$options = '';
1031+
}
1032+
1033+
try {
1034+
$a = mb_ereg_replace($regex, "", $string, $options);
1035+
1036+
if (null === $a) {
1037+
throw new \Exception();
1038+
}
1039+
1040+
return $a;
1041+
} catch (\Exception $e) {
1042+
return preg_replace(sprintf('/%s/', $regex), "", $string);
1043+
}
1044+
}
1045+
9831046
private static function assertEncoding(string $encoding, string $errorFormat): void
9841047
{
9851048
try {

src/Mbstring/bootstrap.php

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,19 @@ function mb_ucfirst(string $string, ?string $encoding = null): string { return p
144144
function mb_lcfirst(string $string, ?string $encoding = null): string { return p\Mbstring::mb_lcfirst($string, $encoding); }
145145
}
146146

147+
if (!function_exists('mb_trim')) {
148+
function mb_trim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string { return p\Mbstring::mb_trim($string, $characters, $encoding); }
149+
}
150+
151+
if (!function_exists('mb_ltrim')) {
152+
function mb_ltrim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string { return p\Mbstring::mb_ltrim($string, $characters, $encoding); }
153+
}
154+
155+
if (!function_exists('mb_rtrim')) {
156+
function mb_rtrim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string { return p\Mbstring::mb_rtrim($string, $characters, $encoding); }
157+
}
158+
159+
147160
if (extension_loaded('mbstring')) {
148161
return;
149162
}

src/Php84/Php84.php

Lines changed: 32 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -112,76 +112,59 @@ public static function array_all(array $array, callable $callback): bool
112112

113113
public static function mb_trim(string $string, string $characters = self::CHARACTERS, ?string $encoding = null): string
114114
{
115-
try {
116-
@mb_check_encoding('', $encoding);
117-
} catch (\ValueError $e) {
118-
throw new \ValueError(sprintf('%s(): Argument #3 ($encoding) must be a valid encoding, "%s" given', __METHOD__, $encoding));
119-
}
120-
121-
if ('' === $characters) {
122-
return null === $encoding ? $string : mb_convert_encoding($string, $encoding);
123-
}
124-
125-
if ($encoding !== null && $encoding !== 'UTF-8') {
126-
$string = mb_convert_encoding($string, "UTF-8", $encoding);
127-
$characters = mb_convert_encoding($characters, "UTF-8", $encoding);
128-
}
129-
130-
$regex = preg_quote($characters, '/');
131-
$regex = sprintf('^[%s]+|[%s]+$', $regex, $regex);
115+
return self::mb_internal_trim('^[%s]+|[%s]+$', $string, $characters, $encoding);
116+
}
132117

133-
if ('ASCII' === mb_detect_encoding($characters) && 'ASCII' === mb_detect_encoding($string) && !empty(array_intersect(str_split(self::CHARACTERS), str_split($string)))) {
134-
$options = 'g';
135-
} else {
136-
$options = '';
137-
}
118+
public static function mb_ltrim(string $string, string $characters = self::CHARACTERS, ?string $encoding = null): string
119+
{
120+
return self::mb_internal_trim('^[%s]+', $string, $characters, $encoding);
121+
}
138122

139-
return mb_ereg_replace($regex, "", $string, $options);
123+
public static function mb_rtrim(string $string, string $characters = self::CHARACTERS, ?string $encoding = null): string
124+
{
125+
return self::mb_internal_trim('[%s]+$', $string, $characters, $encoding);
140126
}
141127

142-
public static function mb_ltrim(string $string, string $characters = self::CHARACTERS, ?string $encoding = null): string
128+
private static function mb_internal_trim(string $regex, string $string, string $characters = self::CHARACTERS, ?string $encoding = null): string
143129
{
130+
if (null === $encoding) {
131+
$encoding = mb_internal_encoding();
132+
}
133+
144134
try {
145-
@mb_check_encoding('', $encoding);
135+
$validEncoding = @mb_check_encoding('', $encoding);
146136
} catch (\ValueError $e) {
147-
throw new \ValueError(sprintf('%s(): Argument #3 ($encoding) must be a valid encoding, "%s" given', __METHOD__, $encoding));
137+
throw new \ValueError(sprintf('%s(): Argument #3 ($encoding) must be a valid encoding, "%s" given.', debug_backtrace()[1]['function'], $encoding));
138+
}
139+
140+
// BC for PHP 7.3 and lower
141+
if (!$validEncoding) {
142+
throw new \ValueError(sprintf('%s(): Argument #3 ($encoding) must be a valid encoding, "%s" given.', debug_backtrace()[1]['function'], $encoding));
148143
}
149144

150145
if ('' === $characters) {
151146
return null === $encoding ? $string : mb_convert_encoding($string, $encoding);
152147
}
153148

154-
$regex = sprintf('^[%s]+', preg_quote($characters, '/'));
149+
$regexCharacter = preg_quote($characters, '/');
150+
$regex = sprintf($regex, $regexCharacter, $regexCharacter);
155151

156-
if ('ASCII' === mb_detect_encoding($characters) && 'ASCII' === mb_detect_encoding($string)) {
152+
if ('ASCII' === mb_detect_encoding($characters) && 'ASCII' === mb_detect_encoding($string) && !empty(array_intersect(str_split(self::CHARACTERS), str_split($string)))) {
157153
$options = 'g';
158154
} else {
159155
$options = '';
160156
}
161157

162-
return mb_ereg_replace($regex, "", $string, $options);
163-
}
164-
165-
public static function mb_rtrim(string $string, string $characters = self::CHARACTERS, ?string $encoding = null): string
166-
{
167158
try {
168-
@mb_check_encoding('', $encoding);
169-
} catch (\ValueError $e) {
170-
throw new \ValueError(sprintf('%s(): Argument #3 ($encoding) must be a valid encoding, "%s" given', __METHOD__, $encoding));
171-
}
172-
173-
if ('' === $characters) {
174-
return null === $encoding ? $string : mb_convert_encoding($string, $encoding);
175-
}
159+
$a = mb_ereg_replace($regex, "", $string, $options);
176160

177-
$regex = sprintf('[%s]+$', preg_quote($characters, '/'));
161+
if (null === $a) {
162+
throw new \Exception();
163+
}
178164

179-
if ('ASCII' === mb_detect_encoding($characters)) {
180-
$options = 'g';
181-
} else {
182-
$options = '';
165+
return $a;
166+
} catch (\Exception $e) {
167+
return preg_replace(sprintf('/%s/', $regex), "", $string);
183168
}
184-
185-
return mb_ereg_replace($regex, "", $string, $options);
186-
}
169+
}
187170
}

src/Php84/bootstrap.php

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -40,22 +40,13 @@ function array_all(array $array, callable $callback): bool { return p\Php84::arr
4040
}
4141

4242
if (!function_exists('mb_trim')) {
43-
function mb_trim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string
44-
{
45-
return p\Php84::mb_trim($string, $characters, $encoding);
46-
}
43+
function mb_trim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string { return p\Php84::mb_trim($string, $characters, $encoding); }
4744
}
4845

4946
if (!function_exists('mb_ltrim')) {
50-
function mb_ltrim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string
51-
{
52-
return p\Php84::mb_ltrim($string, $characters, $encoding);
53-
}
47+
function mb_ltrim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string { return p\Php84::mb_ltrim($string, $characters, $encoding); }
5448
}
5549

5650
if (!function_exists('mb_rtrim')) {
57-
function mb_rtrim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string
58-
{
59-
return p\Php84::mb_rtrim($string, $characters, $encoding);
60-
}
51+
function mb_rtrim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string { return p\Php84::mb_rtrim($string, $characters, $encoding); }
6152
}

tests/Mbstring/MbstringTest.php

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -808,4 +808,111 @@ public static function lcFirstDataProvider(): array
808808
['ß', 'ß'],
809809
];
810810
}
811+
812+
/**
813+
* @covers \Symfony\Polyfill\Mbstring\Mbstring::mb_trim
814+
*
815+
* @dataProvider mbTrimProvider
816+
*/
817+
public function testMbTrim(string $expected, string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): void
818+
{
819+
$this->assertSame($expected, mb_trim($string, $characters, $encoding));
820+
}
821+
822+
/**
823+
* @covers \Symfony\Polyfill\Mbstring\Mbstring::mb_ltrim
824+
*
825+
* @dataProvider mbLTrimProvider
826+
*/
827+
public function testMbLTrim(string $expected, string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): void
828+
{
829+
$this->assertEquals($expected, mb_ltrim($string, $characters, $encoding));
830+
}
831+
832+
/**
833+
* @covers \Symfony\Polyfill\Mbstring\Mbstring::mb_rtrim
834+
*
835+
* @dataProvider mbRTrimProvider
836+
*/
837+
public function testMbRTrim(string $expected, string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): void
838+
{
839+
$this->assertSame($expected, mb_rtrim($string, $characters, $encoding));
840+
}
841+
842+
public function testMbTrimException(): void
843+
{
844+
$this->expectException(\ValueError::class);
845+
mb_trim("\u{180F}", "", "NULL");
846+
}
847+
848+
public function testMbTrimEncoding(): void
849+
{
850+
$this->assertSame('', mb_convert_encoding(mb_trim("\x81\x40\x82\xa0\x81\x40", "\x81\x40", "SJIS"), "UTF-8", "SJIS"));
851+
$this->assertSame('226f575b', bin2hex(mb_ltrim(mb_convert_encoding("\u{FFFE}漢字", "UTF-16LE", "UTF-8"), mb_convert_encoding("\u{FFFE}\u{FEFF}", "UTF-16LE", "UTF-8"), "UTF-16LE")));
852+
$this->assertSame('6f225b57', bin2hex(mb_ltrim(mb_convert_encoding("\u{FEFF}漢字", "UTF-16BE", "UTF-8"), mb_convert_encoding("\u{FFFE}\u{FEFF}", "UTF-16BE", "UTF-8"), "UTF-16BE")));
853+
}
854+
855+
public static function mbTrimProvider(): iterable
856+
{
857+
yield ['ABC', 'ABC'];
858+
yield ['ABC', "\0\t\nABC \0\t\n"];
859+
yield ["\0\t\nABC \0\t\n", "\0\t\nABC \0\t\n", ''];
860+
861+
yield ['', ''];
862+
863+
yield ["あいうえおあお", " あいうえおあお ", " ", "UTF-8"];
864+
yield ["foo BAR Spa", "foo BAR Spaß", "ß", "UTF-8"];
865+
yield ["oo BAR Spaß", "oo BAR Spaß", "f", "UTF-8"];
866+
867+
yield ["oo BAR Spa", "foo BAR Spaß", "ßf", "UTF-8"];
868+
yield ["oo BAR Spa", "foo BAR Spaß", "", "UTF-8"];
869+
yield ["いうおえお", " あいうおえお あ", "", "UTF-8"];
870+
yield ["いうおえお", " あいうおえお あ", "", "UTF-8"];
871+
yield [" あいうおえお ", " あいうおえお a", "あa", "UTF-8"];
872+
yield [" あいうおえお a", " あいうおえお a", "\xe3", "UTF-8"];
873+
874+
yield ["", str_repeat(" ", 129)];
875+
yield ["a", str_repeat(" ", 129) . "a"];
876+
877+
yield ["", " \f\n\r\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}"];
878+
879+
yield [' abcd ', ' abcd ', ''];
880+
881+
yield ['f', 'foo', 'oo'];
882+
883+
yield ["foo\n", "foo\n", 'o'];
884+
}
885+
886+
public static function mbLTrimProvider(): iterable
887+
{
888+
yield ['ABC', 'ABC'];
889+
yield ["ABC \0\t\n", "\0\t\nABC \0\t\n"];
890+
yield ["\0\t\nABC \0\t\n", "\0\t\nABC \0\t\n", ''];
891+
892+
yield ['', ''];
893+
894+
yield [' test ', ' test ', ''];
895+
896+
yield ['いああああ', 'あああああああああああああああああああああああああああああああああいああああ', ''];
897+
898+
yield ["漢字", "\u{FFFE}漢字", "\u{FFFE}\u{FEFF}"];
899+
yield [' abcd ', ' abcd ', ''];
900+
}
901+
902+
public static function mbRTrimProvider(): iterable
903+
{
904+
yield ['ABC', 'ABC'];
905+
yield ["ABC", "ABC \0\t\n"];
906+
yield ["\0\t\nABC \0\t\n", "\0\t\nABC \0\t\n", ''];
907+
908+
yield ['', ''];
909+
910+
yield [" a", str_repeat(" ", 129) . "a"];
911+
912+
yield ['あああああああああああああああああああああああああああああああああい', 'あああああああああああああああああああああああああああああああああいああああ', ''];
913+
914+
yield [' abcd ', ' abcd ', ''];
915+
916+
yield ["foo\n", "foo\n", 'o'];
917+
}
811918
}

0 commit comments

Comments
 (0)