projects
/
musl
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
remove unused __getdents, rename and move file
[musl]
/
src
/
regex
/
regcomp.c
diff --git
a/src/regex/regcomp.c
b/src/regex/regcomp.c
index
ccd3755
..
fb24556
100644
(file)
--- a/
src/regex/regcomp.c
+++ b/
src/regex/regcomp.c
@@
-401,8
+401,8
@@
typedef struct {
tre_ast_node_t *n;
/* Position in the regexp pattern after a parse function returns. */
const char *s;
tre_ast_node_t *n;
/* Position in the regexp pattern after a parse function returns. */
const char *s;
- /* The first character of the
regexp
. */
- const char *
re
;
+ /* The first character of the
last subexpression parsed
. */
+ const char *
start
;
/* Current submatch ID. */
int submatch_id;
/* Current position (number of literal). */
/* Current submatch ID. */
int submatch_id;
/* Current position (number of literal). */
@@
-636,6
+636,20
@@
static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s)
goto parse_bracket_done;
if (neg.negate) {
goto parse_bracket_done;
if (neg.negate) {
+ /*
+ * With REG_NEWLINE, POSIX requires that newlines are not matched by
+ * any form of a non-matching list.
+ */
+ if (ctx->cflags & REG_NEWLINE) {
+ lit = tre_new_lit(&ls);
+ if (!lit) {
+ err = REG_ESPACE;
+ goto parse_bracket_done;
+ }
+ lit->code_min = '\n';
+ lit->code_max = '\n';
+ lit->position = -1;
+ }
/* Sort the array if we need to negate it. */
qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
/* extra lit for the last negated range */
/* Sort the array if we need to negate it. */
qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
/* extra lit for the last negated range */
@@
-708,7
+722,7
@@
static const char *parse_dup_count(const char *s, int *n)
return s;
}
return s;
}
-static
reg_errcode_t parse_dup(tre_parse_ctx_t *ctx, const char *s
)
+static
const char *parse_dup(const char *s, int ere, int *pmin, int *pmax
)
{
int min, max;
{
int min, max;
@@
-723,19
+737,13
@@
static reg_errcode_t parse_dup(tre_parse_ctx_t *ctx, const char *s)
max > RE_DUP_MAX ||
min > RE_DUP_MAX ||
min < 0 ||
max > RE_DUP_MAX ||
min > RE_DUP_MAX ||
min < 0 ||
- (!
(ctx->cflags & REG_EXTENDED)
&& *s++ != '\\') ||
+ (!
ere
&& *s++ != '\\') ||
*s++ != '}'
)
*s++ != '}'
)
- return REG_BADBR;
-
- if (min == 0 && max == 0)
- ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
- else
- ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
- if (!ctx->n)
- return REG_ESPACE;
- ctx->s = s;
- return REG_OK;
+ return 0;
+ *pmin = min;
+ *pmax = max;
+ return s;
}
static int hexval(unsigned c)
}
static int hexval(unsigned c)
@@
-838,6
+846,9
@@
static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
s--;
break;
case '{':
s--;
break;
case '{':
+ case '+':
+ case '?':
+ /* extension: treat \+, \? as repetitions in BRE */
/* reject repetitions after empty expression in BRE */
if (!ere)
return REG_BADRPT;
/* reject repetitions after empty expression in BRE */
if (!ere)
return REG_BADRPT;
@@
-879,20
+890,19
@@
static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
break;
case '^':
/* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
break;
case '^':
/* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
- if (!ere && s != ctx->
re
)
+ if (!ere && s != ctx->
start
)
goto parse_literal;
node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
s++;
break;
case '$':
goto parse_literal;
node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
s++;
break;
case '$':
- /* '$' is special everywhere in EREs, and
in the end of the string in BREs
. */
- if (!ere && s[1])
+ /* '$' is special everywhere in EREs, and
at the end of a BRE subexpression
. */
+ if (!ere && s[1]
&& (s[1]!='\\'|| (s[2]!=')' && s[2]!='|'))
)
goto parse_literal;
node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
s++;
break;
case '*':
goto parse_literal;
node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
s++;
break;
case '*':
- return REG_BADPAT;
case '{':
case '+':
case '?':
case '{':
case '+':
case '?':
@@
-948,7
+958,7
@@
static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
{
tre_ast_node_t *nbranch=0, *nunion=0;
int ere = ctx->cflags & REG_EXTENDED;
{
tre_ast_node_t *nbranch=0, *nunion=0;
int ere = ctx->cflags & REG_EXTENDED;
- const char *s = ctx->
re
;
+ const char *s = ctx->
start
;
int subid = 0;
int depth = 0;
reg_errcode_t err;
int subid = 0;
int depth = 0;
reg_errcode_t err;
@@
-966,6
+976,7
@@
static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
s++;
depth++;
nbranch = nunion = 0;
s++;
depth++;
nbranch = nunion = 0;
+ ctx->start = s;
continue;
}
if ((!ere && *s == '\\' && s[1] == ')') ||
continue;
}
if ((!ere && *s == '\\' && s[1] == ')') ||
@@
-981,10
+992,9
@@
static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
}
parse_iter:
}
parse_iter:
- /* extension: repetitions are rejected after an empty node
- eg. (+), |*, {2}, but assertions are not treated as empty
- so ^* or $? are accepted currently. */
for (;;) {
for (;;) {
+ int min, max;
+
if (*s!='\\' && *s!='*') {
if (!ere)
break;
if (*s!='\\' && *s!='*') {
if (!ere)
break;
@@
-993,31
+1003,39
@@
static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
}
if (*s=='\\' && ere)
break;
}
if (*s=='\\' && ere)
break;
- if (*s=='\\' && s[1]!='{')
+ /* extension: treat \+, \? as repetitions in BRE */
+ if (*s=='\\' && s[1]!='+' && s[1]!='?' && s[1]!='{')
break;
if (*s=='\\')
s++;
break;
if (*s=='\\')
s++;
+ /* handle ^* at the start of a BRE. */
+ if (!ere && s==ctx->start+1 && s[-1]=='^')
+ break;
+
/* extension: multiple consecutive *+?{,} is unspecified,
but (a+)+ has to be supported so accepting a++ makes
sense, note however that the RE_DUP_MAX limit can be
circumvented: (a{255}){255} uses a lot of memory.. */
if (*s=='{') {
/* extension: multiple consecutive *+?{,} is unspecified,
but (a+)+ has to be supported so accepting a++ makes
sense, note however that the RE_DUP_MAX limit can be
circumvented: (a{255}){255} uses a lot of memory.. */
if (*s=='{') {
- err = parse_dup(ctx, s+1);
- if (err != REG_OK)
- return err;
- s = ctx->s;
+ s = parse_dup(s+1, ere, &min, &max);
+ if (!s)
+ return REG_BADBR;
} else {
} else {
- int min=0, max=-1;
+ min=0;
+ max=-1;
if (*s == '+')
min = 1;
if (*s == '?')
max = 1;
s++;
if (*s == '+')
min = 1;
if (*s == '?')
max = 1;
s++;
- ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
- if (!ctx->n)
- return REG_ESPACE;
}
}
+ if (max == 0)
+ ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
+ else
+ ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
+ if (!ctx->n)
+ return REG_ESPACE;
}
nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
}
nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
@@
-1035,8
+1053,10
@@
static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
if (c == '\\' && s[1] == '|') {
s+=2;
if (c == '\\' && s[1] == '|') {
s+=2;
+ ctx->start = s;
} else if (c == '|') {
s++;
} else if (c == '|') {
s++;
+ ctx->start = s;
} else {
if (c == '\\') {
if (!depth) return REG_EPAREN;
} else {
if (c == '\\') {
if (!depth) return REG_EPAREN;
@@
-1103,6
+1123,7
@@
tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
c->right->firstpos = NULL;
c->right->lastpos = NULL;
c->right->num_tags = 0;
c->right->firstpos = NULL;
c->right->lastpos = NULL;
c->right->num_tags = 0;
+ c->right->num_submatches = 0;
node->obj = c;
node->type = CATENATION;
return REG_OK;
node->obj = c;
node->type = CATENATION;
return REG_OK;
@@
-1133,6
+1154,7
@@
tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
c->left->firstpos = NULL;
c->left->lastpos = NULL;
c->left->num_tags = 0;
c->left->firstpos = NULL;
c->left->lastpos = NULL;
c->left->num_tags = 0;
+ c->left->num_submatches = 0;
node->obj = c;
node->type = CATENATION;
return REG_OK;
node->obj = c;
node->type = CATENATION;
return REG_OK;
@@
-2685,7
+2707,7
@@
regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
/* Allocate a stack used throughout the compilation process for various
purposes. */
/* Allocate a stack used throughout the compilation process for various
purposes. */
- stack = tre_stack_new(512, 10240, 128);
+ stack = tre_stack_new(512, 10240
00
, 128);
if (!stack)
return REG_ESPACE;
/* Allocate a fast memory allocator. */
if (!stack)
return REG_ESPACE;
/* Allocate a fast memory allocator. */
@@
-2700,7
+2722,7
@@
regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
memset(&parse_ctx, 0, sizeof(parse_ctx));
parse_ctx.mem = mem;
parse_ctx.stack = stack;
memset(&parse_ctx, 0, sizeof(parse_ctx));
parse_ctx.mem = mem;
parse_ctx.stack = stack;
- parse_ctx.
re
= regex;
+ parse_ctx.
start
= regex;
parse_ctx.cflags = cflags;
parse_ctx.max_backref = -1;
errcode = tre_parse(&parse_ctx);
parse_ctx.cflags = cflags;
parse_ctx.max_backref = -1;
errcode = tre_parse(&parse_ctx);