@@ -31,31 +31,56 @@ public function __construct(Extractor $extractor)
3131 preg_match ('/charset=(?:"| \')?(.*?)(?=$|\s|;|"| \'|>)/i ' , $ contentType , $ match );
3232 if (!empty ($ match [1 ])) {
3333 $ encoding = trim ($ match [1 ], ', ' );
34- try {
35- $ ret = mb_encoding_aliases ($ encoding ?? '' );
36- if ($ ret === false ) {
37- $ encoding = null ;
38- }
39- } catch (\ValueError $ exception ) {
40- $ encoding = null ;
41- }
34+ $ encoding = $ this ->getValidEncoding ($ encoding );
4235 }
4336 if (is_null ($ encoding ) && !empty ($ html )) {
4437 preg_match ('/charset=(?:"| \')?(.*?)(?=$|\s|;|"| \'|>)/i ' , $ html , $ match );
4538 if (!empty ($ match [1 ])) {
4639 $ encoding = trim ($ match [1 ], ', ' );
40+ $ encoding = $ this ->getValidEncoding ($ encoding );
4741 }
42+ }
43+ $ this ->document = !empty ($ html ) ? Parser::parse ($ html , $ encoding ) : new DOMDocument ();
44+ $ this ->initXPath ();
45+ }
46+
47+ /**
48+ * Get valid encoding name if it exists, otherwise return null
49+ *
50+ * Uses mb_encoding_aliases() to verify the encoding is valid.
51+ *
52+ * TODO: When dropping PHP 7.4 support, remove the PHP_VERSION_ID < 80000 branch.
53+ * PHP version differences:
54+ * - PHP 7.4: mb_encoding_aliases() returns false for invalid encoding and throws Warning for empty string
55+ * - PHP 8.0+: mb_encoding_aliases() throws ValueError for invalid/empty encoding
56+ *
57+ * @see https://www.php.net/manual/en/function.mb-encoding-aliases.php
58+ */
59+ private function getValidEncoding (?string $ encoding ): ?string
60+ {
61+ if (PHP_VERSION_ID < 80000 ) {
62+ // PHP 7.4: Check return value (false = invalid encoding)
63+ // Need to check empty() first to avoid Warning
64+ // TODO: Remove this entire branch when PHP 7.4 support is dropped
65+ if (empty ($ encoding )) {
66+ return null ;
67+ }
68+ $ ret = mb_encoding_aliases ($ encoding );
69+ if ($ ret === false ) {
70+ return null ;
71+ } else {
72+ return $ encoding ;
73+ }
74+ } else {
75+ // PHP 8.0+: ValueError exception is thrown for invalid/empty encoding
4876 try {
49- $ ret = mb_encoding_aliases ($ encoding ?? '' );
50- if ($ ret === false ) {
51- $ encoding = null ;
52- }
77+ $ aliases = mb_encoding_aliases ($ encoding ?? '' );
78+ // Check if aliases array is not empty (valid encoding should have at least one alias)
79+ return !empty ($ aliases ) ? $ encoding : null ;
5380 } catch (\ValueError $ exception ) {
54- $ encoding = null ;
81+ return null ;
5582 }
5683 }
57- $ this ->document = !empty ($ html ) ? Parser::parse ($ html , $ encoding ) : new DOMDocument ();
58- $ this ->initXPath ();
5984 }
6085
6186 private function initXPath ()
0 commit comments