Skip to content

bpo-47256: re module, limit the maximum capturing group to 1,073,741,823, increasing the depth of backtracking. #32411

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from Apr 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
:mod:`re` module, limit the maximum capturing group to 1,073,741,823 in
64-bit build, this increases the depth of backtracking.
12 changes: 6 additions & 6 deletions Modules/_sre/sre.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@
#define SRE_CODE Py_UCS4
#if SIZEOF_SIZE_T > 4
# define SRE_MAXREPEAT (~(SRE_CODE)0)
# define SRE_MAXGROUPS ((~(SRE_CODE)0) / 2)
# define SRE_MAXGROUPS ((SRE_CODE)INT32_MAX / 2)
#else
# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX)
# define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_SIZE_T / 2)
# define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_VOID_P / 2)
#endif

typedef struct {
Expand Down Expand Up @@ -73,12 +73,12 @@ typedef struct {
Py_ssize_t pos, endpos;
int isbytes;
int charsize; /* character size */
/* registers */
Py_ssize_t lastindex;
Py_ssize_t lastmark;
const void** mark;
int match_all;
int must_advance;
/* marks */
int lastmark;
int lastindex;
const void** mark;
/* dynamically allocated stuff */
char* data_stack;
size_t data_stack_size;
Expand Down
76 changes: 38 additions & 38 deletions Modules/_sre/sre_lib.h
Original file line number Diff line number Diff line change
Expand Up @@ -450,20 +450,23 @@ do { \

#define MARK_PUSH(lastmark) \
do if (lastmark >= 0) { \
i = lastmark; /* ctx->lastmark may change if reallocated */ \
DATA_STACK_PUSH(state, state->mark, (i+1)*sizeof(void*)); \
size_t _marks_size = (lastmark+1) * sizeof(void*); \
DATA_STACK_PUSH(state, state->mark, _marks_size); \
} while (0)
#define MARK_POP(lastmark) \
do if (lastmark >= 0) { \
DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 1); \
size_t _marks_size = (lastmark+1) * sizeof(void*); \
DATA_STACK_POP(state, state->mark, _marks_size, 1); \
} while (0)
#define MARK_POP_KEEP(lastmark) \
do if (lastmark >= 0) { \
DATA_STACK_POP(state, state->mark, (lastmark+1)*sizeof(void*), 0); \
size_t _marks_size = (lastmark+1) * sizeof(void*); \
DATA_STACK_POP(state, state->mark, _marks_size, 0); \
} while (0)
#define MARK_POP_DISCARD(lastmark) \
do if (lastmark >= 0) { \
DATA_STACK_POP_DISCARD(state, (lastmark+1)*sizeof(void*)); \
size_t _marks_size = (lastmark+1) * sizeof(void*); \
DATA_STACK_POP_DISCARD(state, _marks_size); \
} while (0)

#define JUMP_NONE 0
Expand All @@ -488,10 +491,10 @@ do { \
ctx->pattern = pattern; \
ctx->ptr = ptr; \
DATA_ALLOC(SRE(match_context), nextctx); \
nextctx->last_ctx_pos = ctx_pos; \
nextctx->jump = jumpvalue; \
nextctx->pattern = nextpattern; \
nextctx->toplevel = toplevel_; \
nextctx->jump = jumpvalue; \
nextctx->last_ctx_pos = ctx_pos; \
pattern = nextpattern; \
ctx_pos = alloc_pos; \
ctx = nextctx; \
Expand All @@ -507,18 +510,18 @@ do { \
DO_JUMPX(jumpvalue, jumplabel, nextpattern, 0)

typedef struct {
Py_ssize_t last_ctx_pos;
Py_ssize_t jump;
const SRE_CHAR* ptr;
const SRE_CODE* pattern;
Py_ssize_t count;
Py_ssize_t lastmark;
Py_ssize_t lastindex;
union {
SRE_CODE chr;
SRE_REPEAT* rep;
} u;
int lastmark;
int lastindex;
const SRE_CODE* pattern;
const SRE_CHAR* ptr;
int toplevel;
int jump;
Py_ssize_t last_ctx_pos;
} SRE(match_context);

#define MAYBE_CHECK_SIGNALS \
Expand Down Expand Up @@ -558,8 +561,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
{
const SRE_CHAR* end = (const SRE_CHAR *)state->end;
Py_ssize_t alloc_pos, ctx_pos = -1;
Py_ssize_t i, ret = 0;
Py_ssize_t jump;
Py_ssize_t ret = 0;
int jump;
unsigned int sigcount=0;

SRE(match_context)* ctx;
Expand Down Expand Up @@ -607,20 +610,22 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
/* <MARK> <gid> */
TRACE(("|%p|%p|MARK %d\n", pattern,
ptr, pattern[0]));
i = pattern[0];
if (i & 1)
state->lastindex = i/2 + 1;
if (i > state->lastmark) {
/* state->lastmark is the highest valid index in the
state->mark array. If it is increased by more than 1,
the intervening marks must be set to NULL to signal
that these marks have not been encountered. */
Py_ssize_t j = state->lastmark + 1;
while (j < i)
state->mark[j++] = NULL;
state->lastmark = i;
{
int i = pattern[0];
if (i & 1)
state->lastindex = i/2 + 1;
if (i > state->lastmark) {
/* state->lastmark is the highest valid index in the
state->mark array. If it is increased by more than 1,
the intervening marks must be set to NULL to signal
that these marks have not been encountered. */
int j = state->lastmark + 1;
while (j < i)
state->mark[j++] = NULL;
state->lastmark = i;
}
state->mark[i] = ptr;
}
state->mark[i] = ptr;
pattern++;
DISPATCH;

Expand Down Expand Up @@ -1373,9 +1378,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
/* match backreference */
TRACE(("|%p|%p|GROUPREF %d\n", pattern,
ptr, pattern[0]));
i = pattern[0];
{
Py_ssize_t groupref = i+i;
int groupref = pattern[0] * 2;
if (groupref >= state->lastmark) {
RETURN_FAILURE;
} else {
Expand All @@ -1398,9 +1402,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
/* match backreference */
TRACE(("|%p|%p|GROUPREF_IGNORE %d\n", pattern,
ptr, pattern[0]));
i = pattern[0];
{
Py_ssize_t groupref = i+i;
int groupref = pattern[0] * 2;
if (groupref >= state->lastmark) {
RETURN_FAILURE;
} else {
Expand All @@ -1424,9 +1427,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
/* match backreference */
TRACE(("|%p|%p|GROUPREF_UNI_IGNORE %d\n", pattern,
ptr, pattern[0]));
i = pattern[0];
{
Py_ssize_t groupref = i+i;
int groupref = pattern[0] * 2;
if (groupref >= state->lastmark) {
RETURN_FAILURE;
} else {
Expand All @@ -1450,9 +1452,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
/* match backreference */
TRACE(("|%p|%p|GROUPREF_LOC_IGNORE %d\n", pattern,
ptr, pattern[0]));
i = pattern[0];
{
Py_ssize_t groupref = i+i;
int groupref = pattern[0] * 2;
if (groupref >= state->lastmark) {
RETURN_FAILURE;
} else {
Expand All @@ -1476,9 +1477,8 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
TRACE(("|%p|%p|GROUPREF_EXISTS %d\n", pattern,
ptr, pattern[0]));
/* <GROUPREF_EXISTS> <group> <skip> codeyes <JUMP> codeno ... */
i = pattern[0];
{
Py_ssize_t groupref = i+i;
int groupref = pattern[0] * 2;
if (groupref >= state->lastmark) {
pattern += pattern[1];
DISPATCH;
Expand Down