Skip to content

Commit bbb934e

Browse files
zeertzjqyegappan
andauthored
vim-patch:9.0.1629: having utf16idx() rounding up is inconvenient (neovim#24019)
Problem: Having utf16idx() rounding up is inconvenient. Solution: Make utf16idx() round down. (Yegappan Lakshmanan, closes vim/vim#12523) vim/vim@9570703 Co-authored-by: Yegappan Lakshmanan <[email protected]>
1 parent 79a5b89 commit bbb934e

File tree

3 files changed

+18
-13
lines changed

3 files changed

+18
-13
lines changed

runtime/doc/builtin.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9146,8 +9146,8 @@ utf16idx({string}, {idx} [, {countcc} [, {charidx}]])
91469146
When {charidx} is present and TRUE, {idx} is used as the
91479147
character index in the String {string} instead of as the byte
91489148
index.
9149-
An {idx} in the middle of a UTF-8 sequence is rounded upwards
9150-
to the end of that sequence.
9149+
An {idx} in the middle of a UTF-8 sequence is rounded
9150+
downwards to the beginning of that sequence.
91519151

91529152
Returns -1 if the arguments are invalid or if there are less
91539153
than {idx} bytes in {string}. If there are exactly {idx} bytes

src/nvim/strings.c

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2014,6 +2014,9 @@ void f_strtrans(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
20142014
}
20152015

20162016
/// "utf16idx()" function
2017+
///
2018+
/// Converts a byte or character offset in a string to the corresponding UTF-16
2019+
/// code unit offset.
20172020
void f_utf16idx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
20182021
{
20192022
rettv->vval.v_number = -1;
@@ -2050,6 +2053,7 @@ void f_utf16idx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
20502053

20512054
const char *p;
20522055
int len;
2056+
int utf16idx = 0;
20532057
for (p = str, len = 0; charidx ? idx >= 0 : p <= str + idx; len++) {
20542058
if (*p == NUL) {
20552059
// If the index is exactly the number of bytes or characters in the
@@ -2059,6 +2063,7 @@ void f_utf16idx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
20592063
}
20602064
return;
20612065
}
2066+
utf16idx = len;
20622067
const int clen = ptr2len(p);
20632068
const int c = (clen > 1) ? utf_ptr2char(p) : *p;
20642069
if (c > 0xFFFF) {
@@ -2070,7 +2075,7 @@ void f_utf16idx(typval_T *argvars, typval_T *rettv, EvalFuncData fptr)
20702075
}
20712076
}
20722077

2073-
rettv->vval.v_number = len > 0 ? len - 1 : 0;
2078+
rettv->vval.v_number = utf16idx;
20742079
}
20752080

20762081
/// "tolower(string)" function

test/old/testdir/test_functions.vim

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,14 +1395,14 @@ func Test_utf16idx_from_byteidx()
13951395
" UTF-16 index of a string with four byte characters
13961396
let str = 'a😊😊b'
13971397
call assert_equal(0, utf16idx(str, 0))
1398-
call assert_equal(2, utf16idx(str, 1))
1399-
call assert_equal(2, utf16idx(str, 2))
1400-
call assert_equal(2, utf16idx(str, 3))
1401-
call assert_equal(2, utf16idx(str, 4))
1402-
call assert_equal(4, utf16idx(str, 5))
1403-
call assert_equal(4, utf16idx(str, 6))
1404-
call assert_equal(4, utf16idx(str, 7))
1405-
call assert_equal(4, utf16idx(str, 8))
1398+
call assert_equal(1, utf16idx(str, 1))
1399+
call assert_equal(1, utf16idx(str, 2))
1400+
call assert_equal(1, utf16idx(str, 3))
1401+
call assert_equal(1, utf16idx(str, 4))
1402+
call assert_equal(3, utf16idx(str, 5))
1403+
call assert_equal(3, utf16idx(str, 6))
1404+
call assert_equal(3, utf16idx(str, 7))
1405+
call assert_equal(3, utf16idx(str, 8))
14061406
call assert_equal(5, utf16idx(str, 9))
14071407
call assert_equal(6, utf16idx(str, 10))
14081408
call assert_equal(-1, utf16idx(str, 11))
@@ -1498,8 +1498,8 @@ func Test_utf16idx_from_charidx()
14981498
" UTF-16 index of a string with four byte characters
14991499
let str = "a😊😊b"
15001500
call assert_equal(0, utf16idx(str, 0, v:false, v:true))
1501-
call assert_equal(2, utf16idx(str, 1, v:false, v:true))
1502-
call assert_equal(4, utf16idx(str, 2, v:false, v:true))
1501+
call assert_equal(1, utf16idx(str, 1, v:false, v:true))
1502+
call assert_equal(3, utf16idx(str, 2, v:false, v:true))
15031503
call assert_equal(5, utf16idx(str, 3, v:false, v:true))
15041504
call assert_equal(6, utf16idx(str, 4, v:false, v:true))
15051505
call assert_equal(-1, utf16idx(str, 5, v:false, v:true))

0 commit comments

Comments
 (0)