Skip to content

Commit fb000c5

Browse files
committed
Ensure meta_encoding checks meta charset tag (closes #919)
1 parent 14b7dc2 commit fb000c5

File tree

2 files changed

+19
-1
lines changed

2 files changed

+19
-1
lines changed

lib/nokogiri/html/document.rb

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,12 @@ class Document < Nokogiri::XML::Document
66
# then nil is returned.
77
def meta_encoding
88
meta = meta_content_type and
9-
match = /charset\s*=\s*([\w-]+)/i.match(meta['content']) and
9+
match = /charset\s*=\s*([\w-]+)/i.match(meta['content'])
10+
if match
1011
match[1]
12+
else
13+
cs = at('meta[@charset]') and cs[:charset]
14+
end
1115
end
1216

1317
###

test/html/test_document.rb

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,20 @@ def test_meta_encoding_handles_malformed_content_charset
166166
assert_nil doc.meta_encoding
167167
end
168168

169+
def test_meta_encoding_checks_charset
170+
doc = Nokogiri::HTML(<<-eohtml)
171+
<html>
172+
<head>
173+
<meta charset="UTF-8">
174+
</head>
175+
<body>
176+
foo
177+
</body>
178+
</html>
179+
eohtml
180+
assert_equal 'UTF-8', doc.meta_encoding
181+
end
182+
169183
def test_meta_encoding=
170184
@html.meta_encoding = 'EUC-JP'
171185
assert_equal 'EUC-JP', @html.meta_encoding

0 commit comments

Comments
 (0)