Skip to content

Commit 36f341c

Browse files
authored
gh-127787: allow retrieving the clipped slice length in _PyUnicodeError_GetParams (GH-128980)
1 parent bf150f6 commit 36f341c

File tree

2 files changed

+110
-15
lines changed

2 files changed

+110
-15
lines changed

Include/internal/pycore_pyerrors.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -196,9 +196,9 @@ extern int _PyUnicodeError_GetParams(
196196
Py_ssize_t *objlen,
197197
Py_ssize_t *start,
198198
Py_ssize_t *end,
199+
Py_ssize_t *slen,
199200
int as_bytes);
200201

201-
202202
#ifdef __cplusplus
203203
}
204204
#endif

Objects/exceptions.c

+109-14
Original file line numberDiff line numberDiff line change
@@ -2954,8 +2954,10 @@ unicode_error_set_end_impl(PyObject *self, Py_ssize_t end)
29542954
* The 'start' can be negative or not, but when adjusting the value,
29552955
* we clip it in [0, max(0, objlen - 1)] and do not interpret it as
29562956
* a relative offset.
2957+
*
2958+
* This function always succeeds.
29572959
*/
2958-
static inline Py_ssize_t
2960+
static Py_ssize_t
29592961
unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen)
29602962
{
29612963
assert(objlen >= 0);
@@ -2969,14 +2971,34 @@ unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen)
29692971
}
29702972

29712973

2974+
/* Assert some properties of the adjusted 'start' value. */
2975+
#ifndef NDEBUG
2976+
static void
2977+
assert_adjusted_unicode_error_start(Py_ssize_t start, Py_ssize_t objlen)
2978+
{
2979+
assert(objlen >= 0);
2980+
/* in the future, `min_start` may be something else */
2981+
Py_ssize_t min_start = 0;
2982+
assert(start >= min_start);
2983+
/* in the future, `max_start` may be something else */
2984+
Py_ssize_t max_start = Py_MAX(min_start, objlen - 1);
2985+
assert(start <= max_start);
2986+
}
2987+
#else
2988+
#define assert_adjusted_unicode_error_start(...)
2989+
#endif
2990+
2991+
29722992
/*
29732993
* Adjust the (exclusive) 'end' value of a UnicodeError object.
29742994
*
29752995
* The 'end' can be negative or not, but when adjusting the value,
29762996
* we clip it in [min(1, objlen), max(min(1, objlen), objlen)] and
29772997
* do not interpret it as a relative offset.
2998+
*
2999+
* This function always succeeds.
29783000
*/
2979-
static inline Py_ssize_t
3001+
static Py_ssize_t
29803002
unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
29813003
{
29823004
assert(objlen >= 0);
@@ -2990,6 +3012,59 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
29903012
}
29913013

29923014

3015+
/* Assert some properties of the adjusted 'end' value. */
3016+
#ifndef NDEBUG
3017+
static void
3018+
assert_adjusted_unicode_error_end(Py_ssize_t end, Py_ssize_t objlen)
3019+
{
3020+
assert(objlen >= 0);
3021+
/* in the future, `min_end` may be something else */
3022+
Py_ssize_t min_end = Py_MIN(1, objlen);
3023+
assert(end >= min_end);
3024+
/* in the future, `max_end` may be something else */
3025+
Py_ssize_t max_end = Py_MAX(min_end, objlen);
3026+
assert(end <= max_end);
3027+
}
3028+
#else
3029+
#define assert_adjusted_unicode_error_end(...)
3030+
#endif
3031+
3032+
3033+
/*
3034+
* Adjust the length of the range described by a UnicodeError object.
3035+
*
3036+
* The 'start' and 'end' arguments must have been obtained by
3037+
* unicode_error_adjust_start() and unicode_error_adjust_end().
3038+
*
3039+
* The result is clipped in [0, objlen]. By construction, it
3040+
* will always be smaller than 'objlen' as 'start' and 'end'
3041+
* are smaller than 'objlen'.
3042+
*/
3043+
static Py_ssize_t
3044+
unicode_error_adjust_len(Py_ssize_t start, Py_ssize_t end, Py_ssize_t objlen)
3045+
{
3046+
assert_adjusted_unicode_error_start(start, objlen);
3047+
assert_adjusted_unicode_error_end(end, objlen);
3048+
Py_ssize_t ranlen = end - start;
3049+
assert(ranlen <= objlen);
3050+
return ranlen < 0 ? 0 : ranlen;
3051+
}
3052+
3053+
3054+
/* Assert some properties of the adjusted range 'len' value. */
3055+
#ifndef NDEBUG
3056+
static void
3057+
assert_adjusted_unicode_error_len(Py_ssize_t ranlen, Py_ssize_t objlen)
3058+
{
3059+
assert(objlen >= 0);
3060+
assert(ranlen >= 0);
3061+
assert(ranlen <= objlen);
3062+
}
3063+
#else
3064+
#define assert_adjusted_unicode_error_len(...)
3065+
#endif
3066+
3067+
29933068
/*
29943069
* Get various common parameters of a UnicodeError object.
29953070
*
@@ -3004,22 +3079,24 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
30043079
* objlen The 'object' length.
30053080
* start The clipped 'start' attribute.
30063081
* end The clipped 'end' attribute.
3082+
* slen The length of the slice described by the clipped 'start'
3083+
* and 'end' values. It always lies in [0, objlen].
30073084
*
30083085
* An output parameter can be NULL to indicate that
30093086
* the corresponding value does not need to be stored.
30103087
*
30113088
* Input parameter:
30123089
*
3013-
* as_bytes If 1, the error's 'object' attribute must be a bytes object,
3014-
* i.e. the call is for a `UnicodeDecodeError`. Otherwise, the
3015-
* 'object' attribute must be a string.
3090+
* as_bytes If true, the error's 'object' attribute must be a `bytes`,
3091+
* i.e. 'self' is a `UnicodeDecodeError` instance. Otherwise,
3092+
* the 'object' attribute must be a string.
30163093
*
30173094
* A TypeError is raised if the 'object' type is incompatible.
30183095
*/
30193096
int
30203097
_PyUnicodeError_GetParams(PyObject *self,
30213098
PyObject **obj, Py_ssize_t *objlen,
3022-
Py_ssize_t *start, Py_ssize_t *end,
3099+
Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t *slen,
30233100
int as_bytes)
30243101
{
30253102
assert(self != NULL);
@@ -3034,16 +3111,30 @@ _PyUnicodeError_GetParams(PyObject *self,
30343111
if (objlen != NULL) {
30353112
*objlen = n;
30363113
}
3114+
3115+
Py_ssize_t start_value = -1;
3116+
if (start != NULL || slen != NULL) {
3117+
start_value = unicode_error_adjust_start(exc->start, n);
3118+
}
30373119
if (start != NULL) {
3038-
*start = unicode_error_adjust_start(exc->start, n);
3039-
assert(*start >= 0);
3040-
assert(*start <= n);
3120+
assert_adjusted_unicode_error_start(start_value, n);
3121+
*start = start_value;
3122+
}
3123+
3124+
Py_ssize_t end_value = -1;
3125+
if (end != NULL || slen != NULL) {
3126+
end_value = unicode_error_adjust_end(exc->end, n);
30413127
}
30423128
if (end != NULL) {
3043-
*end = unicode_error_adjust_end(exc->end, n);
3044-
assert(*end >= 0);
3045-
assert(*end <= n);
3129+
assert_adjusted_unicode_error_end(end_value, n);
3130+
*end = end_value;
3131+
}
3132+
3133+
if (slen != NULL) {
3134+
*slen = unicode_error_adjust_len(start_value, end_value, n);
3135+
assert_adjusted_unicode_error_len(*slen, n);
30463136
}
3137+
30473138
if (obj != NULL) {
30483139
*obj = r;
30493140
}
@@ -3111,7 +3202,9 @@ static inline int
31113202
unicode_error_get_start_impl(PyObject *self, Py_ssize_t *start, int as_bytes)
31123203
{
31133204
assert(self != NULL);
3114-
return _PyUnicodeError_GetParams(self, NULL, NULL, start, NULL, as_bytes);
3205+
return _PyUnicodeError_GetParams(self, NULL, NULL,
3206+
start, NULL, NULL,
3207+
as_bytes);
31153208
}
31163209

31173210

@@ -3177,7 +3270,9 @@ static inline int
31773270
unicode_error_get_end_impl(PyObject *self, Py_ssize_t *end, int as_bytes)
31783271
{
31793272
assert(self != NULL);
3180-
return _PyUnicodeError_GetParams(self, NULL, NULL, NULL, end, as_bytes);
3273+
return _PyUnicodeError_GetParams(self, NULL, NULL,
3274+
NULL, end, NULL,
3275+
as_bytes);
31813276
}
31823277

31833278

0 commit comments

Comments
 (0)