@@ -2954,8 +2954,10 @@ unicode_error_set_end_impl(PyObject *self, Py_ssize_t end)
2954
2954
* The 'start' can be negative or not, but when adjusting the value,
2955
2955
* we clip it in [0, max(0, objlen - 1)] and do not interpret it as
2956
2956
* a relative offset.
2957
+ *
2958
+ * This function always succeeds.
2957
2959
*/
2958
- static inline Py_ssize_t
2960
+ static Py_ssize_t
2959
2961
unicode_error_adjust_start (Py_ssize_t start , Py_ssize_t objlen )
2960
2962
{
2961
2963
assert (objlen >= 0 );
@@ -2969,14 +2971,34 @@ unicode_error_adjust_start(Py_ssize_t start, Py_ssize_t objlen)
2969
2971
}
2970
2972
2971
2973
2974
+ /* Assert some properties of the adjusted 'start' value. */
2975
+ #ifndef NDEBUG
2976
+ static void
2977
+ assert_adjusted_unicode_error_start (Py_ssize_t start , Py_ssize_t objlen )
2978
+ {
2979
+ assert (objlen >= 0 );
2980
+ /* in the future, `min_start` may be something else */
2981
+ Py_ssize_t min_start = 0 ;
2982
+ assert (start >= min_start );
2983
+ /* in the future, `max_start` may be something else */
2984
+ Py_ssize_t max_start = Py_MAX (min_start , objlen - 1 );
2985
+ assert (start <= max_start );
2986
+ }
2987
+ #else
2988
+ #define assert_adjusted_unicode_error_start (...)
2989
+ #endif
2990
+
2991
+
2972
2992
/*
2973
2993
* Adjust the (exclusive) 'end' value of a UnicodeError object.
2974
2994
*
2975
2995
* The 'end' can be negative or not, but when adjusting the value,
2976
2996
* we clip it in [min(1, objlen), max(min(1, objlen), objlen)] and
2977
2997
* do not interpret it as a relative offset.
2998
+ *
2999
+ * This function always succeeds.
2978
3000
*/
2979
- static inline Py_ssize_t
3001
+ static Py_ssize_t
2980
3002
unicode_error_adjust_end (Py_ssize_t end , Py_ssize_t objlen )
2981
3003
{
2982
3004
assert (objlen >= 0 );
@@ -2990,6 +3012,59 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
2990
3012
}
2991
3013
2992
3014
3015
+ /* Assert some properties of the adjusted 'end' value. */
3016
+ #ifndef NDEBUG
3017
+ static void
3018
+ assert_adjusted_unicode_error_end (Py_ssize_t end , Py_ssize_t objlen )
3019
+ {
3020
+ assert (objlen >= 0 );
3021
+ /* in the future, `min_end` may be something else */
3022
+ Py_ssize_t min_end = Py_MIN (1 , objlen );
3023
+ assert (end >= min_end );
3024
+ /* in the future, `max_end` may be something else */
3025
+ Py_ssize_t max_end = Py_MAX (min_end , objlen );
3026
+ assert (end <= max_end );
3027
+ }
3028
+ #else
3029
+ #define assert_adjusted_unicode_error_end (...)
3030
+ #endif
3031
+
3032
+
3033
+ /*
3034
+ * Adjust the length of the range described by a UnicodeError object.
3035
+ *
3036
+ * The 'start' and 'end' arguments must have been obtained by
3037
+ * unicode_error_adjust_start() and unicode_error_adjust_end().
3038
+ *
3039
+ * The result is clipped in [0, objlen]. By construction, it
3040
+ * will always be smaller than 'objlen' as 'start' and 'end'
3041
+ * are smaller than 'objlen'.
3042
+ */
3043
+ static Py_ssize_t
3044
+ unicode_error_adjust_len (Py_ssize_t start , Py_ssize_t end , Py_ssize_t objlen )
3045
+ {
3046
+ assert_adjusted_unicode_error_start (start , objlen );
3047
+ assert_adjusted_unicode_error_end (end , objlen );
3048
+ Py_ssize_t ranlen = end - start ;
3049
+ assert (ranlen <= objlen );
3050
+ return ranlen < 0 ? 0 : ranlen ;
3051
+ }
3052
+
3053
+
3054
+ /* Assert some properties of the adjusted range 'len' value. */
3055
+ #ifndef NDEBUG
3056
+ static void
3057
+ assert_adjusted_unicode_error_len (Py_ssize_t ranlen , Py_ssize_t objlen )
3058
+ {
3059
+ assert (objlen >= 0 );
3060
+ assert (ranlen >= 0 );
3061
+ assert (ranlen <= objlen );
3062
+ }
3063
+ #else
3064
+ #define assert_adjusted_unicode_error_len (...)
3065
+ #endif
3066
+
3067
+
2993
3068
/*
2994
3069
* Get various common parameters of a UnicodeError object.
2995
3070
*
@@ -3004,22 +3079,24 @@ unicode_error_adjust_end(Py_ssize_t end, Py_ssize_t objlen)
3004
3079
* objlen The 'object' length.
3005
3080
* start The clipped 'start' attribute.
3006
3081
* end The clipped 'end' attribute.
3082
+ * slen The length of the slice described by the clipped 'start'
3083
+ * and 'end' values. It always lies in [0, objlen].
3007
3084
*
3008
3085
* An output parameter can be NULL to indicate that
3009
3086
* the corresponding value does not need to be stored.
3010
3087
*
3011
3088
* Input parameter:
3012
3089
*
3013
- * as_bytes If 1 , the error's 'object' attribute must be a bytes object ,
3014
- * i.e. the call is for a `UnicodeDecodeError`. Otherwise, the
3015
- * 'object' attribute must be a string.
3090
+ * as_bytes If true , the error's 'object' attribute must be a ` bytes` ,
3091
+ * i.e. 'self' is a `UnicodeDecodeError` instance . Otherwise,
3092
+ * the 'object' attribute must be a string.
3016
3093
*
3017
3094
* A TypeError is raised if the 'object' type is incompatible.
3018
3095
*/
3019
3096
int
3020
3097
_PyUnicodeError_GetParams (PyObject * self ,
3021
3098
PyObject * * obj , Py_ssize_t * objlen ,
3022
- Py_ssize_t * start , Py_ssize_t * end ,
3099
+ Py_ssize_t * start , Py_ssize_t * end , Py_ssize_t * slen ,
3023
3100
int as_bytes )
3024
3101
{
3025
3102
assert (self != NULL );
@@ -3034,16 +3111,30 @@ _PyUnicodeError_GetParams(PyObject *self,
3034
3111
if (objlen != NULL ) {
3035
3112
* objlen = n ;
3036
3113
}
3114
+
3115
+ Py_ssize_t start_value = -1 ;
3116
+ if (start != NULL || slen != NULL ) {
3117
+ start_value = unicode_error_adjust_start (exc -> start , n );
3118
+ }
3037
3119
if (start != NULL ) {
3038
- * start = unicode_error_adjust_start (exc -> start , n );
3039
- assert (* start >= 0 );
3040
- assert (* start <= n );
3120
+ assert_adjusted_unicode_error_start (start_value , n );
3121
+ * start = start_value ;
3122
+ }
3123
+
3124
+ Py_ssize_t end_value = -1 ;
3125
+ if (end != NULL || slen != NULL ) {
3126
+ end_value = unicode_error_adjust_end (exc -> end , n );
3041
3127
}
3042
3128
if (end != NULL ) {
3043
- * end = unicode_error_adjust_end (exc -> end , n );
3044
- assert (* end >= 0 );
3045
- assert (* end <= n );
3129
+ assert_adjusted_unicode_error_end (end_value , n );
3130
+ * end = end_value ;
3131
+ }
3132
+
3133
+ if (slen != NULL ) {
3134
+ * slen = unicode_error_adjust_len (start_value , end_value , n );
3135
+ assert_adjusted_unicode_error_len (* slen , n );
3046
3136
}
3137
+
3047
3138
if (obj != NULL ) {
3048
3139
* obj = r ;
3049
3140
}
@@ -3111,7 +3202,9 @@ static inline int
3111
3202
unicode_error_get_start_impl (PyObject * self , Py_ssize_t * start , int as_bytes )
3112
3203
{
3113
3204
assert (self != NULL );
3114
- return _PyUnicodeError_GetParams (self , NULL , NULL , start , NULL , as_bytes );
3205
+ return _PyUnicodeError_GetParams (self , NULL , NULL ,
3206
+ start , NULL , NULL ,
3207
+ as_bytes );
3115
3208
}
3116
3209
3117
3210
@@ -3177,7 +3270,9 @@ static inline int
3177
3270
unicode_error_get_end_impl (PyObject * self , Py_ssize_t * end , int as_bytes )
3178
3271
{
3179
3272
assert (self != NULL );
3180
- return _PyUnicodeError_GetParams (self , NULL , NULL , NULL , end , as_bytes );
3273
+ return _PyUnicodeError_GetParams (self , NULL , NULL ,
3274
+ NULL , end , NULL ,
3275
+ as_bytes );
3181
3276
}
3182
3277
3183
3278
0 commit comments