Skip to content

Commit 8977902

Browse files
committed
feat: medium: utf-8-validation.go
1 parent 61713c2 commit 8977902

File tree

1 file changed

+59
-0
lines changed

1 file changed

+59
-0
lines changed

src/medium/utf-8-validation.go

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
package medium
2+
3+
// validUtf8 returns whether the provided integer array is a valid UTF-8 encoding
4+
// Source: https://leetcode.com/problems/utf-8-validation
5+
// A character in UTF8 can be from 1 to 4 bytes long, subjected to the following rules:
6+
//
7+
// For a 1-byte character, the first bit is a 0, followed by its Unicode code.
8+
// For an n-bytes character, the first n bits are all one's, the n + 1 bit is 0, followed by n - 1 bytes with the most significant 2 bits being 10.
9+
// --------------------+-----------------------------------------
10+
// Number of Bytes | UTF-8 Octet Sequence (binary)
11+
// --------------------+-----------------------------------------
12+
// 1 | 0xxxxxxx
13+
// 2 | 110xxxxx 10xxxxxx
14+
// 3 | 1110xxxx 10xxxxxx 10xxxxxx
15+
// 4 | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
16+
func validUtf8(data []int) bool {
17+
current := 0
18+
19+
for _, d := range data {
20+
if d < 0 || d > 255 { // ensure input is valid
21+
return false
22+
}
23+
24+
if current > 0 {
25+
const continuation = 0x2 // most significant 2 bits being 10
26+
if getFirstTwoBits(d) != continuation {
27+
return false
28+
}
29+
current--
30+
} else {
31+
if getFirstTwoBits(d) > 1 {
32+
l := getCharacterLength(d)
33+
if l <= 1 || l > 4 {
34+
return false
35+
}
36+
current = l - 1
37+
}
38+
}
39+
40+
}
41+
42+
return current == 0
43+
}
44+
45+
func getFirstTwoBits(d int) int {
46+
return d >> 6
47+
}
48+
49+
func getCharacterLength(d int) int {
50+
length := 0
51+
52+
const shift = 1 << 7
53+
for d&shift != 0 {
54+
d <<= 1
55+
length += 1
56+
}
57+
58+
return length
59+
}

0 commit comments

Comments
 (0)