X-Git-Url: http://nsz.repo.hu/git/?a=blobdiff_plain;f=src%2Fregex%2Fregcomp.c;h=fb24556ebe12e33cd07d86a7953e2ce30f45c3b7;hb=25ea9f712c30c32957de493d4711ee39d0bbb024;hp=b3dbb25231cd55ec0b715fd41553619fffa9e6e9;hpb=25160f1c08235cf5b6a9617c5640380618a0f6ff;p=musl

diff --git a/src/regex/regcomp.c b/src/regex/regcomp.c
index b3dbb252..fb24556e 100644
--- a/src/regex/regcomp.c
+++ b/src/regex/regcomp.c
@@ -401,8 +401,8 @@ typedef struct {
 	tre_ast_node_t *n;
 	/* Position in the regexp pattern after a parse function returns. */
 	const char *s;
-	/* The first character of the regexp. */
-	const char *re;
+	/* The first character of the last subexpression parsed. */
+	const char *start;
 	/* Current submatch ID. */
 	int submatch_id;
 	/* Current position (number of literal). */
@@ -636,6 +636,20 @@ static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s)
 		goto parse_bracket_done;
 
 	if (neg.negate) {
+		/*
+		 * With REG_NEWLINE, POSIX requires that newlines are not matched by
+		 * any form of a non-matching list.
+		 */
+		if (ctx->cflags & REG_NEWLINE) {
+			lit = tre_new_lit(&ls);
+			if (!lit) {
+				err = REG_ESPACE;
+				goto parse_bracket_done;
+			}
+			lit->code_min = '\n';
+			lit->code_max = '\n';
+			lit->position = -1;
+		}
 		/* Sort the array if we need to negate it. */
 		qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
 		/* extra lit for the last negated range */
@@ -708,7 +722,7 @@ static const char *parse_dup_count(const char *s, int *n)
 	return s;
 }
 
-static reg_errcode_t parse_dup(tre_parse_ctx_t *ctx, const char *s)
+static const char *parse_dup(const char *s, int ere, int *pmin, int *pmax)
 {
 	int min, max;
 
@@ -723,19 +737,13 @@ static reg_errcode_t parse_dup(tre_parse_ctx_t *ctx, const char *s)
 		max > RE_DUP_MAX ||
 		min > RE_DUP_MAX ||
 		min < 0 ||
-		(!(ctx->cflags & REG_EXTENDED) && *s++ != '\\') ||
+		(!ere && *s++ != '\\') ||
 		*s++ != '}'
 	)
-		return REG_BADBR;
-
-	if (min == 0 && max == 0)
-		ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
-	else
-		ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
-	if (!ctx->n)
-		return REG_ESPACE;
-	ctx->s = s;
-	return REG_OK;
+		return 0;
+	*pmin = min;
+	*pmax = max;
+	return s;
 }
 
 static int hexval(unsigned c)
@@ -882,20 +890,19 @@ static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
 		break;
 	case '^':
 		/* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
-		if (!ere && s != ctx->re)
+		if (!ere && s != ctx->start)
 			goto parse_literal;
 		node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
 		s++;
 		break;
 	case '$':
-		/* '$' is special everywhere in EREs, and in the end of the string in BREs. */
-		if (!ere && s[1])
+		/* '$' is special everywhere in EREs, and at the end of a BRE subexpression. */
+		if (!ere && s[1] && (s[1]!='\\'|| (s[2]!=')' && s[2]!='|')))
 			goto parse_literal;
 		node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
 		s++;
 		break;
 	case '*':
-		return REG_BADPAT;
 	case '{':
 	case '+':
 	case '?':
@@ -951,7 +958,7 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
 {
 	tre_ast_node_t *nbranch=0, *nunion=0;
 	int ere = ctx->cflags & REG_EXTENDED;
-	const char *s = ctx->re;
+	const char *s = ctx->start;
 	int subid = 0;
 	int depth = 0;
 	reg_errcode_t err;
@@ -969,6 +976,7 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
 				s++;
 			depth++;
 			nbranch = nunion = 0;
+			ctx->start = s;
 			continue;
 		}
 		if ((!ere && *s == '\\' && s[1] == ')') ||
@@ -984,10 +992,9 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
 		}
 
 	parse_iter:
-		/* extension: repetitions are rejected after an empty node
-		   eg. (+), |*, {2}, but assertions are not treated as empty
-		   so ^* or $? are accepted currently. */
 		for (;;) {
+			int min, max;
+
 			if (*s!='\\' && *s!='*') {
 				if (!ere)
 					break;
@@ -1002,26 +1009,33 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
 			if (*s=='\\')
 				s++;
 
+			/* handle ^* at the start of a BRE. */
+			if (!ere && s==ctx->start+1 && s[-1]=='^')
+				break;
+
 			/* extension: multiple consecutive *+?{,} is unspecified,
 			   but (a+)+ has to be supported so accepting a++ makes
 			   sense, note however that the RE_DUP_MAX limit can be
 			   circumvented: (a{255}){255} uses a lot of memory.. */
 			if (*s=='{') {
-				err = parse_dup(ctx, s+1);
-				if (err != REG_OK)
-					return err;
-				s = ctx->s;
+				s = parse_dup(s+1, ere, &min, &max);
+				if (!s)
+					return REG_BADBR;
 			} else {
-				int min=0, max=-1;
+				min=0;
+				max=-1;
 				if (*s == '+')
 					min = 1;
 				if (*s == '?')
 					max = 1;
 				s++;
-				ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
-				if (!ctx->n)
-					return REG_ESPACE;
 			}
+			if (max == 0)
+				ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+			else
+				ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
+			if (!ctx->n)
+				return REG_ESPACE;
 		}
 
 		nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
@@ -1039,8 +1053,10 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
 
 			if (c == '\\' && s[1] == '|') {
 				s+=2;
+				ctx->start = s;
 			} else if (c == '|') {
 				s++;
+				ctx->start = s;
 			} else {
 				if (c == '\\') {
 					if (!depth) return REG_EPAREN;
@@ -1107,6 +1123,7 @@ tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
   c->right->firstpos = NULL;
   c->right->lastpos = NULL;
   c->right->num_tags = 0;
+  c->right->num_submatches = 0;
   node->obj = c;
   node->type = CATENATION;
   return REG_OK;
@@ -1137,6 +1154,7 @@ tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
   c->left->firstpos = NULL;
   c->left->lastpos = NULL;
   c->left->num_tags = 0;
+  c->left->num_submatches = 0;
   node->obj = c;
   node->type = CATENATION;
   return REG_OK;
@@ -2689,7 +2707,7 @@ regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
 
   /* Allocate a stack used throughout the compilation process for various
      purposes. */
-  stack = tre_stack_new(512, 10240, 128);
+  stack = tre_stack_new(512, 1024000, 128);
   if (!stack)
     return REG_ESPACE;
   /* Allocate a fast memory allocator. */
@@ -2704,7 +2722,7 @@ regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
   memset(&parse_ctx, 0, sizeof(parse_ctx));
   parse_ctx.mem = mem;
   parse_ctx.stack = stack;
-  parse_ctx.re = regex;
+  parse_ctx.start = regex;
   parse_ctx.cflags = cflags;
   parse_ctx.max_backref = -1;
   errcode = tre_parse(&parse_ctx);