X-Git-Url: http://nsz.repo.hu/git/?p=musl;a=blobdiff_plain;f=src%2Fregex%2Fregcomp.c;h=5cedfd520285072610e9e0ffec059cebd280bfbf;hp=8987f5aa684290b17d28bd8dfd2a91b6aa7de872;hb=c713d8797804903b54203a645e023e2077c7556d;hpb=58bf74850f5f7286dc290aa22ad982f50620a1c8 diff --git a/src/regex/regcomp.c b/src/regex/regcomp.c index 8987f5aa..5cedfd52 100644 --- a/src/regex/regcomp.c +++ b/src/regex/regcomp.c @@ -53,7 +53,6 @@ typedef struct { tre_ctype_t class; tre_ctype_t *neg_classes; int backref; - int *params; } tre_pos_and_tags_t; @@ -103,10 +102,7 @@ typedef struct { long code_min; long code_max; int position; - union { - tre_ctype_t class; - int *params; - } u; + tre_ctype_t class; tre_ctype_t *neg_classes; } tre_literal_t; @@ -652,7 +648,7 @@ tre_parse_bracket_items(tre_parse_ctx_t *ctx, int negate, status = tre_new_item(ctx->mem, min, max, &i, &max_i, items); if (status != REG_OK) break; - ((tre_literal_t*)((*items)[i-1])->obj)->u.class = class; + ((tre_literal_t*)((*items)[i-1])->obj)->class = class; } /* Add opposite-case counterpoints if REG_ICASE is present. @@ -961,6 +957,8 @@ tre_parse(tre_parse_ctx_t *ctx) tre_stack_t *stack = ctx->stack; int bottom = tre_stack_num_objects(stack); int depth = 0; + wchar_t wc; + int clen; if (!ctx->nofirstsub) { @@ -1050,8 +1048,6 @@ tre_parse(tre_parse_ctx_t *ctx) } case PARSE_UNION: - if (!*ctx->re) - break; switch (*ctx->re) { case CHAR_PIPE: @@ -1084,8 +1080,6 @@ tre_parse(tre_parse_ctx_t *ctx) case PARSE_POSTFIX: /* Parse postfix operators. */ - if (!*ctx->re) - break; switch (*ctx->re) { case CHAR_PLUS: @@ -1105,20 +1099,6 @@ tre_parse(tre_parse_ctx_t *ctx) if (*ctx->re == CHAR_QUESTIONMARK) rep_max = 1; - { - if (*(ctx->re + 1) == CHAR_QUESTIONMARK) - { - minimal = 1; - ctx->re++; - } - else if (*(ctx->re + 1) == CHAR_STAR - || *(ctx->re + 1) == CHAR_PLUS) - { - /* These are reserved for future extensions. */ - return REG_BADRPT; - } - } - ctx->re++; tmp_node = tre_ast_new_iter(ctx->mem, result, rep_min, rep_max, minimal); @@ -1161,18 +1141,13 @@ tre_parse(tre_parse_ctx_t *ctx) an empty set of `()', a bracket expression, `.', `^', `$', a `\' followed by a character, or a single character. */ - /* End of regexp? (empty string). */ - if (!*ctx->re) - goto parse_literal; - switch (*ctx->re) { case CHAR_LPAREN: /* parenthesized subexpression */ - if (ctx->cflags & REG_EXTENDED - || (ctx->re > ctx->re_start - && *(ctx->re - 1) == CHAR_BACKSLASH)) + if (ctx->cflags & REG_EXTENDED) { + lparen: depth++; { ctx->re++; @@ -1188,25 +1163,6 @@ tre_parse(tre_parse_ctx_t *ctx) goto parse_literal; break; - case CHAR_RPAREN: /* end of current subexpression */ - if ((ctx->cflags & REG_EXTENDED && depth > 0) - || (ctx->re > ctx->re_start - && *(ctx->re - 1) == CHAR_BACKSLASH)) - { - /* We were expecting an atom, but instead the current - subexpression was closed. POSIX leaves the meaning of - this to be implementation-defined. We interpret this as - an empty expression (which matches an empty string). */ - result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); - if (result == NULL) - return REG_ESPACE; - if (!(ctx->cflags & REG_EXTENDED)) - ctx->re--; - } - else - goto parse_literal; - break; - case CHAR_LBRACKET: /* bracket expression */ ctx->re++; status = tre_parse_bracket(ctx, &result); @@ -1217,13 +1173,14 @@ tre_parse(tre_parse_ctx_t *ctx) case CHAR_BACKSLASH: /* If this is "\(" or "\)" chew off the backslash and try again. */ - if (!(ctx->cflags & REG_EXTENDED) - && (*(ctx->re + 1) == CHAR_LPAREN - || *(ctx->re + 1) == CHAR_RPAREN)) + if (!(ctx->cflags & REG_EXTENDED) && *(ctx->re + 1) == CHAR_LPAREN) { ctx->re++; - STACK_PUSHX(stack, int, PARSE_ATOM); - break; + goto lparen; + } + if (!(ctx->cflags & REG_EXTENDED) && *(ctx->re + 1) == CHAR_RPAREN) + { + goto empty_atom; } /* If a macro is used, parse the expanded macro recursively. */ @@ -1245,7 +1202,7 @@ tre_parse(tre_parse_ctx_t *ctx) } } - if (!*ctx->re) + if (!ctx->re[1]) /* Trailing backslash. */ return REG_EESCAPE; @@ -1383,14 +1340,13 @@ tre_parse(tre_parse_ctx_t *ctx) break; case CHAR_CARET: /* beginning of line assertion */ - /* '^' has a special meaning everywhere in EREs, and in the - beginning of the RE and after \( is BREs. */ + /* '^' has a special meaning everywhere in EREs, and at + beginning of BRE. */ if (ctx->cflags & REG_EXTENDED - || (ctx->re - 2 >= ctx->re_start - && *(ctx->re - 2) == CHAR_BACKSLASH - && *(ctx->re - 1) == CHAR_LPAREN) || ctx->re == ctx->re_start) { + if (!(ctx->cflags & REG_EXTENDED)) + STACK_PUSHX(stack, int, PARSE_CATENATION); result = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1); if (result == NULL) @@ -1403,10 +1359,8 @@ tre_parse(tre_parse_ctx_t *ctx) case CHAR_DOLLAR: /* end of line assertion. */ /* '$' is special everywhere in EREs, and in the end of the - string and before \) is BREs. */ + string in BREs. */ if (ctx->cflags & REG_EXTENDED - || (*(ctx->re + 1) == CHAR_BACKSLASH - && *(ctx->re + 2) == CHAR_RPAREN) || !*(ctx->re + 1)) { result = tre_ast_new_literal(ctx->mem, ASSERTION, @@ -1419,34 +1373,28 @@ tre_parse(tre_parse_ctx_t *ctx) goto parse_literal; break; + case CHAR_RPAREN: + if (!depth) + goto parse_literal; + case CHAR_STAR: + case CHAR_PIPE: + case CHAR_LBRACE: + case CHAR_PLUS: + case CHAR_QUESTIONMARK: + if (!(ctx->cflags & REG_EXTENDED)) + goto parse_literal; + + case 0: + empty_atom: + result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); + if (!result) + return REG_ESPACE; + break; + default: parse_literal: - /* We are expecting an atom. If the subexpression (or the whole - regexp ends here, we interpret it as an empty expression - (which matches an empty string). */ - if ( - (!*ctx->re - || *ctx->re == CHAR_STAR - || (ctx->cflags & REG_EXTENDED - && (*ctx->re == CHAR_PIPE - || *ctx->re == CHAR_LBRACE - || *ctx->re == CHAR_PLUS - || *ctx->re == CHAR_QUESTIONMARK)) - /* Test for "\)" in BRE mode. */ - || (!(ctx->cflags & REG_EXTENDED) - && !*(ctx->re + 1) - && *ctx->re == CHAR_BACKSLASH - && *(ctx->re + 1) == CHAR_LBRACE))) - { - result = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1); - if (!result) - return REG_ESPACE; - break; - } - - wchar_t wc; - int clen = mbtowc(&wc, ctx->re, -1); + clen = mbtowc(&wc, ctx->re, -1); if (clen<0) clen=1, wc=WEOF; /* Note that we can't use an tre_isalpha() test here, since there @@ -2279,8 +2227,7 @@ typedef enum { iteration count to a catenated sequence of copies of the node. */ static reg_errcode_t tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast, - int *position, tre_tag_direction_t *tag_directions, - int *max_depth) + int *position, tre_tag_direction_t *tag_directions) { reg_errcode_t status = REG_OK; int bottom = tre_stack_num_objects(stack); @@ -2579,8 +2526,7 @@ tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2, set to the number of tags seen on the path. */ static reg_errcode_t tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags, - int *assertions, int *params, int *num_tags_seen, - int *params_seen) + int *assertions, int *num_tags_seen) { tre_literal_t *lit; tre_union_t *uni; @@ -2751,7 +2697,7 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree) node->lastpos = tre_set_one(mem, lit->position, (int)lit->code_min, (int)lit->code_max, - lit->u.class, lit->neg_classes, + lit->class, lit->neg_classes, -1); if (!node->lastpos) return REG_ESPACE; @@ -2822,8 +2768,7 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree) case NFL_POST_CATENATION: { - int num_tags, *tags, assertions, params_seen; - int *params; + int num_tags, *tags, assertions; reg_errcode_t status; tre_catenation_t *cat = node->obj; node->nullable = cat->left->nullable && cat->right->nullable; @@ -2835,8 +2780,7 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree) with tre_match_empty() to get the number of tags and parameters. */ status = tre_match_empty(stack, cat->left, - NULL, NULL, NULL, &num_tags, - ¶ms_seen); + NULL, NULL, &num_tags); if (status != REG_OK) return status; /* Allocate arrays for the tags and parameters. */ @@ -2848,7 +2792,7 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree) /* Second pass with tre_mach_empty() to get the list of tags and parameters. */ status = tre_match_empty(stack, cat->left, tags, - &assertions, params, NULL, NULL); + &assertions, NULL); if (status != REG_OK) { xfree(tags); @@ -2873,8 +2817,7 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree) with tre_match_empty() to get the number of tags and parameters. */ status = tre_match_empty(stack, cat->right, - NULL, NULL, NULL, &num_tags, - ¶ms_seen); + NULL, NULL, &num_tags); if (status != REG_OK) return status; /* Allocate arrays for the tags and parameters. */ @@ -2886,7 +2829,7 @@ tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree) /* Second pass with tre_mach_empty() to get the list of tags and parameters. */ status = tre_match_empty(stack, cat->right, tags, - &assertions, params, NULL, NULL); + &assertions, NULL); if (status != REG_OK) { xfree(tags); @@ -3139,7 +3082,7 @@ tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions, int -regcomp(regex_t *preg, const char *regex, int cflags) +regcomp(regex_t *restrict preg, const char *restrict regex, int cflags) { tre_stack_t *stack; tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r; @@ -3242,7 +3185,7 @@ regcomp(regex_t *preg, const char *regex, int cflags) /* Expand iteration nodes. */ errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position, - tag_directions, &tnfa->params_depth); + tag_directions); if (errcode != REG_OK) ERROR_EXIT(errcode);