regex: reject repetitions in some cases with REG_BADRPT
[musl] / src / regex / regcomp.c
index 978dd87..078f657 100644 (file)
@@ -834,22 +834,24 @@ static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
                                        return REG_EBRACE;
                                s++;
                        }
-                       node = tre_ast_new_literal(ctx->mem, v, v, ctx->position);
-                       ctx->position++;
+                       node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
                        s--;
                        break;
+               case '{':
+                       /* reject repetitions after empty expression in BRE */
+                       if (!ere)
+                               return REG_BADRPT;
                default:
                        if (!ere && (unsigned)*s-'1' < 9) {
                                /* back reference */
                                int val = *s - '0';
-                               node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position);
+                               node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++);
                                ctx->max_backref = MAX(val, ctx->max_backref);
                        } else {
                                /* extension: accept unknown escaped char
                                   as a literal */
                                goto parse_literal;
                        }
-                       ctx->position++;
                }
                s++;
                break;
@@ -882,10 +884,14 @@ static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
                s++;
                break;
        case '*':
-       case '|':
+               return REG_BADPAT;
        case '{':
        case '+':
        case '?':
+               /* reject repetitions after empty expression in ERE */
+               if (ere)
+                       return REG_BADRPT;
+       case '|':
                if (!ere)
                        goto parse_literal;
        case 0:
@@ -966,8 +972,9 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
                }
 
        parse_iter:
-               /* extension: repetitions are accepted after an empty node
-                  eg. (+), ^*, a$?, a|{2} */
+               /* extension: repetitions are rejected after an empty node
+                  eg. (+), |*, {2}, but assertions are not treated as empty
+                  so ^* or $? are accepted currently. */
                switch (*s) {
                case '+':
                case '?':
@@ -1584,7 +1591,8 @@ tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
                  {
                    status = tre_add_tag_right(mem, left, tag_left);
                    tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
-                   status = tre_add_tag_right(mem, right, tag_right);
+                   if (status == REG_OK)
+                     status = tre_add_tag_right(mem, right, tag_right);
                    tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
                  }
                num_tags += 2;