2 regcomp.c - TRE POSIX compatible regex compilation functions.
4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 /***********************************************************************
45 ***********************************************************************/
54 tre_ctype_t *neg_classes;
59 /***********************************************************************
60 from tre-ast.c and tre-ast.h
61 ***********************************************************************/
63 /* The different AST node types. */
71 /* Special subtypes of TRE_LITERAL. */
72 #define EMPTY -1 /* Empty leaf (denotes empty string). */
73 #define ASSERTION -2 /* Assertion leaf. */
74 #define TAG -3 /* Tag leaf. */
75 #define BACKREF -4 /* Back reference leaf. */
77 #define IS_SPECIAL(x) ((x)->code_min < 0)
78 #define IS_EMPTY(x) ((x)->code_min == EMPTY)
79 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
80 #define IS_TAG(x) ((x)->code_min == TAG)
81 #define IS_BACKREF(x) ((x)->code_min == BACKREF)
84 /* A generic AST node. All AST nodes consist of this node on the top
85 level with `obj' pointing to the actual content. */
87 tre_ast_type_t type; /* Type of the node. */
88 void *obj; /* Pointer to actual node. */
93 tre_pos_and_tags_t *firstpos;
94 tre_pos_and_tags_t *lastpos;
98 /* A "literal" node. These are created for assertions, back references,
99 tags, matching parameter settings, and all expressions that match one
106 tre_ctype_t *neg_classes;
109 /* A "catenation" node. These are created when two regexps are concatenated.
110 If there are more than one subexpressions in sequence, the `left' part
111 holds all but the last, and `right' part holds the last subexpression
112 (catenation is left associative). */
114 tre_ast_node_t *left;
115 tre_ast_node_t *right;
118 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
121 /* Subexpression to match. */
123 /* Minimum number of consecutive matches. */
125 /* Maximum number of consecutive matches. */
127 /* If 0, match as many characters as possible, if 1 match as few as
128 possible. Note that this does not always mean the same thing as
129 matching as many/few repetitions as possible. */
130 unsigned int minimal:1;
133 /* An "union" node. These are created for the "|" operator. */
135 tre_ast_node_t *left;
136 tre_ast_node_t *right;
140 static tre_ast_node_t *
141 tre_ast_new_node(tre_mem_t mem, int type, void *obj)
143 tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node);
149 node->submatch_id = -1;
153 static tre_ast_node_t *
154 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
156 tre_ast_node_t *node;
159 lit = tre_mem_calloc(mem, sizeof *lit);
160 node = tre_ast_new_node(mem, LITERAL, lit);
163 lit->code_min = code_min;
164 lit->code_max = code_max;
165 lit->position = position;
169 static tre_ast_node_t *
170 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal)
172 tre_ast_node_t *node;
173 tre_iteration_t *iter;
175 iter = tre_mem_calloc(mem, sizeof *iter);
176 node = tre_ast_new_node(mem, ITERATION, iter);
182 iter->minimal = minimal;
183 node->num_submatches = arg->num_submatches;
187 static tre_ast_node_t *
188 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
190 tre_ast_node_t *node;
195 un = tre_mem_calloc(mem, sizeof *un);
196 node = tre_ast_new_node(mem, UNION, un);
201 node->num_submatches = left->num_submatches + right->num_submatches;
205 static tre_ast_node_t *
206 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
208 tre_ast_node_t *node;
209 tre_catenation_t *cat;
213 cat = tre_mem_calloc(mem, sizeof *cat);
214 node = tre_ast_new_node(mem, CATENATION, cat);
219 node->num_submatches = left->num_submatches + right->num_submatches;
224 /***********************************************************************
225 from tre-stack.c and tre-stack.h
226 ***********************************************************************/
228 typedef struct tre_stack_rec tre_stack_t;
230 /* Creates a new stack object. `size' is initial size in bytes, `max_size'
231 is maximum size, and `increment' specifies how much more space will be
232 allocated with realloc() if all space gets used up. Returns the stack
233 object or NULL if out of memory. */
235 tre_stack_new(int size, int max_size, int increment);
237 /* Frees the stack object. */
239 tre_stack_destroy(tre_stack_t *s);
241 /* Returns the current number of objects in the stack. */
243 tre_stack_num_objects(tre_stack_t *s);
245 /* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
246 `value' on top of stack `s'. Returns REG_ESPACE if out of memory.
247 This tries to realloc() more space before failing if maximum size
248 has not yet been reached. Returns REG_OK if successful. */
249 #define declare_pushf(typetag, type) \
250 static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
252 declare_pushf(voidptr, void *);
253 declare_pushf(int, int);
255 /* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
256 element off of stack `s' and returns it. The stack must not be
258 #define declare_popf(typetag, type) \
259 static type tre_stack_pop_ ## typetag(tre_stack_t *s)
261 declare_popf(voidptr, void *);
262 declare_popf(int, int);
264 /* Just to save some typing. */
265 #define STACK_PUSH(s, typetag, value) \
268 status = tre_stack_push_ ## typetag(s, value); \
270 while (/*CONSTCOND*/0)
272 #define STACK_PUSHX(s, typetag, value) \
274 status = tre_stack_push_ ## typetag(s, value); \
275 if (status != REG_OK) \
279 #define STACK_PUSHR(s, typetag, value) \
281 reg_errcode_t _status; \
282 _status = tre_stack_push_ ## typetag(s, value); \
283 if (_status != REG_OK) \
287 union tre_stack_item {
292 struct tre_stack_rec {
297 union tre_stack_item *stack;
302 tre_stack_new(int size, int max_size, int increment)
306 s = xmalloc(sizeof(*s));
309 s->stack = xmalloc(sizeof(*s->stack) * size);
310 if (s->stack == NULL)
316 s->max_size = max_size;
317 s->increment = increment;
324 tre_stack_destroy(tre_stack_t *s)
331 tre_stack_num_objects(tre_stack_t *s)
337 tre_stack_push(tre_stack_t *s, union tre_stack_item value)
339 if (s->ptr < s->size)
341 s->stack[s->ptr] = value;
346 if (s->size >= s->max_size)
352 union tre_stack_item *new_buffer;
354 new_size = s->size + s->increment;
355 if (new_size > s->max_size)
356 new_size = s->max_size;
357 new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
358 if (new_buffer == NULL)
362 assert(new_size > s->size);
364 s->stack = new_buffer;
365 tre_stack_push(s, value);
371 #define define_pushf(typetag, type) \
372 declare_pushf(typetag, type) { \
373 union tre_stack_item item; \
374 item.typetag ## _value = value; \
375 return tre_stack_push(s, item); \
378 define_pushf(int, int)
379 define_pushf(voidptr, void *)
381 #define define_popf(typetag, type) \
382 declare_popf(typetag, type) { \
383 return s->stack[--s->ptr].typetag ## _value; \
386 define_popf(int, int)
387 define_popf(voidptr, void *)
390 /***********************************************************************
391 from tre-parse.c and tre-parse.h
392 ***********************************************************************/
396 /* Memory allocator. The AST is allocated using this. */
398 /* Stack used for keeping track of regexp syntax. */
400 /* The parsed node after a parse function returns. */
402 /* Position in the regexp pattern after a parse function returns. */
404 /* The first character of the last subexpression parsed. */
406 /* Current submatch ID. */
408 /* Current position (number of literal). */
410 /* The highest back reference or -1 if none seen so far. */
412 /* Compilation flags. */
416 /* Some macros for expanding \w, \s, etc. */
417 static const struct {
419 const char *expansion;
421 {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
422 {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
423 {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
424 {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
428 /* Expands a macro delimited by `regex' and `regex_end' to `buf', which
429 must have at least `len' items. Sets buf[0] to zero if the there
430 is no match in `tre_macros'. */
431 static const char *tre_expand_macro(const char *s)
434 for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);
435 return tre_macros[i].expansion;
439 tre_compare_lit(const void *a, const void *b)
441 const tre_literal_t *const *la = a;
442 const tre_literal_t *const *lb = b;
443 /* assumes the range of valid code_min is < INT_MAX */
444 return la[0]->code_min - lb[0]->code_min;
454 static tre_literal_t *tre_new_lit(struct literals *p)
457 if (p->len >= p->cap) {
461 a = xrealloc(p->a, p->cap * sizeof *p->a);
467 *a = tre_mem_calloc(p->mem, sizeof **a);
471 static int add_icase_literals(struct literals *ls, int min, int max)
475 for (c=min; c<=max; ) {
476 /* assumes islower(c) and isupper(c) are exclusive
477 and toupper(c)!=c if islower(c).
478 multiple opposite case characters are not supported */
479 if (tre_islower(c)) {
480 b = e = tre_toupper(c);
481 for (c++, e++; c<=max; c++, e++)
482 if (tre_toupper(c) != e) break;
483 } else if (tre_isupper(c)) {
484 b = e = tre_tolower(c);
485 for (c++, e++; c<=max; c++, e++)
486 if (tre_tolower(c) != e) break;
491 lit = tre_new_lit(ls);
502 /* Maximum number of character classes in a negated bracket expression. */
503 #define MAX_NEG_CLASSES 64
508 tre_ctype_t a[MAX_NEG_CLASSES];
511 // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
515 Bracket = '[' List ']' | '[^' List ']'
516 List = Term | List Term
517 Term = Char | Range | Chclass | Eqclass
518 Range = Char '-' Char | Char '-' '-'
519 Char = Coll | coll_single
521 Coll = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]'
522 Eqclass = '[=' coll_single '=]' | '[=' coll_multi '=]'
523 Chclass = '[:' class ':]'
525 coll_single is a single char collating element but it can be
526 '-' only at the beginning or end of a List and
527 ']' only at the beginning of a List and
528 '^' anywhere except after the openning '['
531 static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, struct literals *ls, struct neg *neg)
533 const char *start = s;
541 len = mbtowc(&wc, s, -1);
543 return *s ? REG_BADPAT : REG_EBRACK;
544 if (*s == ']' && s != start) {
548 if (*s == '-' && s != start && s[1] != ']' &&
549 /* extension: [a-z--@] is accepted as [a-z]|[--@] */
550 (s[1] != '-' || s[2] == ']'))
552 if (*s == '[' && (s[1] == '.' || s[1] == '='))
553 /* collating symbols and equivalence classes are not supported */
555 if (*s == '[' && s[1] == ':') {
556 char tmp[CHARCLASS_NAME_MAX+1];
558 for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) {
562 class = tre_ctype(tmp);
566 if (!class || s[len+1] != ']')
574 if (*s == '-' && s[1] != ']') {
576 len = mbtowc(&wc, s, -1);
578 /* XXX - Should use collation order instead of
579 encoding values in character ranges. */
580 if (len <= 0 || min > max)
586 if (class && neg->negate) {
587 if (neg->len >= MAX_NEG_CLASSES)
589 neg->a[neg->len++] = class;
591 tre_literal_t *lit = tre_new_lit(ls);
599 /* Add opposite-case codepoints if REG_ICASE is present.
600 It seems that POSIX requires that bracket negation
601 should happen before case-folding, but most practical
602 implementations do it the other way around. Changing
603 the order would need efficient representation of
604 case-fold ranges and bracket range sets even with
605 simple patterns so this is ok for now. */
606 if (ctx->cflags & REG_ICASE && !class)
607 if (add_icase_literals(ls, min, max))
613 static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s)
615 int i, max, min, negmax, negmin;
616 tre_ast_node_t *node = 0, *n;
626 ls.a = xmalloc(ls.cap * sizeof *ls.a);
630 neg.negate = *s == '^';
634 err = parse_bracket_terms(ctx, s, &ls, &neg);
636 goto parse_bracket_done;
639 /* Sort the array if we need to negate it. */
640 qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
641 /* extra lit for the last negated range */
642 lit = tre_new_lit(&ls);
645 goto parse_bracket_done;
647 lit->code_min = TRE_CHAR_MAX+1;
648 lit->code_max = TRE_CHAR_MAX+1;
650 /* negated classes */
652 nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a);
655 goto parse_bracket_done;
657 memcpy(nc, neg.a, neg.len*sizeof *neg.a);
662 /* Build a union of the items in the array, negated if necessary. */
664 for (i = 0; i < ls.len; i++) {
671 negmin = MAX(max + 1, negmin);
675 lit->code_min = negmin;
676 lit->code_max = negmax;
679 lit->position = ctx->position;
680 lit->neg_classes = nc;
681 n = tre_ast_new_node(ctx->mem, LITERAL, lit);
682 node = tre_ast_new_union(ctx->mem, node, n);
696 static const char *parse_dup_count(const char *s, int *n)
703 *n = 10 * *n + (*s - '0');
705 if (!isdigit(*s) || *n > RE_DUP_MAX)
711 static const char *parse_dup(const char *s, int ere, int *pmin, int *pmax)
715 s = parse_dup_count(s, &min);
717 s = parse_dup_count(s+1, &max);
722 (max < min && max >= 0) ||
726 (!ere && *s++ != '\\') ||
735 static int hexval(unsigned c)
737 if (c-'0'<10) return c-'0';
739 if (c-'a'<6) return c-'a'+10;
743 static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int subid)
745 if (node->submatch_id >= 0) {
746 tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
749 n = tre_ast_new_catenation(ctx->mem, n, node);
752 n->num_submatches = node->num_submatches;
755 node->submatch_id = subid;
756 node->num_submatches++;
763 Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^' Branch '$'
764 Branch = Atom | Branch Atom
765 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch '\)' | back_ref
766 Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count '\}'
768 (leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
771 Regex = Branch | Regex '|' Branch
772 Branch = Atom | Branch Atom
773 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex ')' | '^' | '$'
774 Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count ',' Count '}'
776 (a*+?, ^*, $+, \X, {, (|a) are unspecified)
779 static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
781 int len, ere = ctx->cflags & REG_EXTENDED;
783 tre_ast_node_t *node;
787 return parse_bracket(ctx, s+1);
789 p = tre_expand_macro(s+1);
791 /* assume \X expansion is a single atom */
792 reg_errcode_t err = parse_atom(ctx, p);
796 /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
801 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
804 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
807 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
810 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
820 for (i=0; i<len && v<0x110000; i++) {
831 node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
837 /* extension: treat \+, \? as repetitions in BRE */
838 /* reject repetitions after empty expression in BRE */
842 /* extension: treat \| as alternation in BRE */
844 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
850 if (!ere && (unsigned)*s-'1' < 9) {
853 node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++);
854 ctx->max_backref = MAX(val, ctx->max_backref);
856 /* extension: accept unknown escaped char
864 if (ctx->cflags & REG_NEWLINE) {
865 tre_ast_node_t *tmp1, *tmp2;
866 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++);
867 tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX, ctx->position++);
869 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
873 node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++);
878 /* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
879 if (!ere && s != ctx->start)
881 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
885 /* '$' is special everywhere in EREs, and at the end of a BRE subexpression. */
886 if (!ere && s[1] && (s[1]!='\\'|| (s[2]!=')' && s[2]!='|')))
888 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
895 /* reject repetitions after empty expression in ERE */
902 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
906 len = mbtowc(&wc, s, -1);
909 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) {
910 tre_ast_node_t *tmp1, *tmp2;
911 /* multiple opposite case characters are not supported */
912 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc), ctx->position);
913 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc), ctx->position);
915 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
919 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
933 #define PUSHPTR(err, s, v) do { \
934 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \
938 #define PUSHINT(err, s, v) do { \
939 if ((err = tre_stack_push_int(s, v)) != REG_OK) \
943 static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
945 tre_ast_node_t *nbranch=0, *nunion=0;
946 int ere = ctx->cflags & REG_EXTENDED;
947 const char *s = ctx->start;
951 tre_stack_t *stack = ctx->stack;
953 PUSHINT(err, stack, subid++);
955 if ((!ere && *s == '\\' && s[1] == '(') ||
956 (ere && *s == '(')) {
957 PUSHPTR(err, stack, nunion);
958 PUSHPTR(err, stack, nbranch);
959 PUSHINT(err, stack, subid++);
964 nbranch = nunion = 0;
968 if ((!ere && *s == '\\' && s[1] == ')') ||
969 (ere && *s == ')' && depth)) {
970 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
974 err = parse_atom(ctx, s);
984 if (*s!='\\' && *s!='*') {
987 if (*s!='+' && *s!='?' && *s!='{')
992 /* extension: treat \+, \? as repetitions in BRE */
993 if (*s=='\\' && s[1]!='+' && s[1]!='?' && s[1]!='{')
998 /* handle ^* at the start of a BRE. */
999 if (!ere && s==ctx->start+1 && s[-1]=='^')
1002 /* extension: multiple consecutive *+?{,} is unspecified,
1003 but (a+)+ has to be supported so accepting a++ makes
1004 sense, note however that the RE_DUP_MAX limit can be
1005 circumvented: (a{255}){255} uses a lot of memory.. */
1007 s = parse_dup(s+1, ere, &min, &max);
1020 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
1022 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
1027 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
1028 if ((ere && *s == '|') ||
1029 (ere && *s == ')' && depth) ||
1030 (!ere && *s == '\\' && s[1] == ')') ||
1031 /* extension: treat \| as alternation in BRE */
1032 (!ere && *s == '\\' && s[1] == '|') ||
1034 /* extension: empty branch is unspecified (), (|a), (a|)
1035 here they are not rejected but match on empty string */
1037 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
1040 if (c == '\\' && s[1] == '|') {
1043 } else if (c == '|') {
1048 if (!depth) return REG_EPAREN;
1050 } else if (c == ')')
1053 err = marksub(ctx, nunion, tre_stack_pop_int(stack));
1056 if (!c && depth<0) {
1057 ctx->submatch_id = subid;
1062 nbranch = tre_stack_pop_voidptr(stack);
1063 nunion = tre_stack_pop_voidptr(stack);
1071 /***********************************************************************
1073 ***********************************************************************/
1078 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1083 Algorithms to setup tags so that submatch addressing can be done.
1087 /* Inserts a catenation node to the root of the tree given in `node'.
1088 As the left child a new tag with number `tag_id' to `node' is added,
1089 and the right child is the old root. */
1090 static reg_errcode_t
1091 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1093 tre_catenation_t *c;
1095 c = tre_mem_alloc(mem, sizeof(*c));
1098 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
1099 if (c->left == NULL)
1101 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1102 if (c->right == NULL)
1105 c->right->obj = node->obj;
1106 c->right->type = node->type;
1107 c->right->nullable = -1;
1108 c->right->submatch_id = -1;
1109 c->right->firstpos = NULL;
1110 c->right->lastpos = NULL;
1111 c->right->num_tags = 0;
1112 c->right->num_submatches = 0;
1114 node->type = CATENATION;
1118 /* Inserts a catenation node to the root of the tree given in `node'.
1119 As the right child a new tag with number `tag_id' to `node' is added,
1120 and the left child is the old root. */
1121 static reg_errcode_t
1122 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1124 tre_catenation_t *c;
1126 c = tre_mem_alloc(mem, sizeof(*c));
1129 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
1130 if (c->right == NULL)
1132 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1133 if (c->left == NULL)
1136 c->left->obj = node->obj;
1137 c->left->type = node->type;
1138 c->left->nullable = -1;
1139 c->left->submatch_id = -1;
1140 c->left->firstpos = NULL;
1141 c->left->lastpos = NULL;
1142 c->left->num_tags = 0;
1143 c->left->num_submatches = 0;
1145 node->type = CATENATION;
1151 ADDTAGS_AFTER_ITERATION,
1152 ADDTAGS_AFTER_UNION_LEFT,
1153 ADDTAGS_AFTER_UNION_RIGHT,
1154 ADDTAGS_AFTER_CAT_LEFT,
1155 ADDTAGS_AFTER_CAT_RIGHT,
1156 ADDTAGS_SET_SUBMATCH_END
1157 } tre_addtags_symbol_t;
1166 /* Go through `regset' and set submatch data for submatches that are
1169 tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
1173 for (i = 0; regset[i] >= 0; i++)
1175 int id = regset[i] / 2;
1176 int start = !(regset[i] % 2);
1178 tnfa->submatch_data[id].so_tag = tag;
1180 tnfa->submatch_data[id].eo_tag = tag;
1186 /* Adds tags to appropriate locations in the parse tree in `tree', so that
1187 subexpressions marked for submatch addressing can be traced. */
1188 static reg_errcode_t
1189 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
1192 reg_errcode_t status = REG_OK;
1193 tre_addtags_symbol_t symbol;
1194 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
1195 int bottom = tre_stack_num_objects(stack);
1196 /* True for first pass (counting number of needed tags) */
1197 int first_pass = (mem == NULL || tnfa == NULL);
1198 int *regset, *orig_regset;
1199 int num_tags = 0; /* Total number of tags. */
1200 int num_minimals = 0; /* Number of special minimal tags. */
1201 int tag = 0; /* The tag that is to be added next. */
1202 int next_tag = 1; /* Next tag to use after this one. */
1203 int *parents; /* Stack of submatches the current submatch is
1205 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
1206 tre_tag_states_t *saved_states;
1208 tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
1212 tnfa->minimal_tags[0] = -1;
1215 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
1219 orig_regset = regset;
1221 parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
1222 if (parents == NULL)
1229 saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
1230 if (saved_states == NULL)
1239 for (i = 0; i <= tnfa->num_submatches; i++)
1240 saved_states[i].tag = -1;
1243 STACK_PUSH(stack, voidptr, node);
1244 STACK_PUSH(stack, int, ADDTAGS_RECURSE);
1246 while (tre_stack_num_objects(stack) > bottom)
1248 if (status != REG_OK)
1251 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
1255 case ADDTAGS_SET_SUBMATCH_END:
1257 int id = tre_stack_pop_int(stack);
1260 /* Add end of this submatch to regset. */
1261 for (i = 0; regset[i] >= 0; i++);
1262 regset[i] = id * 2 + 1;
1265 /* Pop this submatch from the parents stack. */
1266 for (i = 0; parents[i] >= 0; i++);
1267 parents[i - 1] = -1;
1271 case ADDTAGS_RECURSE:
1272 node = tre_stack_pop_voidptr(stack);
1274 if (node->submatch_id >= 0)
1276 int id = node->submatch_id;
1280 /* Add start of this submatch to regset. */
1281 for (i = 0; regset[i] >= 0; i++);
1287 for (i = 0; parents[i] >= 0; i++);
1288 tnfa->submatch_data[id].parents = NULL;
1291 int *p = xmalloc(sizeof(*p) * (i + 1));
1294 status = REG_ESPACE;
1297 assert(tnfa->submatch_data[id].parents == NULL);
1298 tnfa->submatch_data[id].parents = p;
1299 for (i = 0; parents[i] >= 0; i++)
1305 /* Add end of this submatch to regset after processing this
1307 STACK_PUSHX(stack, int, node->submatch_id);
1308 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
1315 tre_literal_t *lit = node->obj;
1317 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1322 /* Regset is not empty, so add a tag before the
1323 literal or backref. */
1326 status = tre_add_tag_left(mem, node, tag);
1327 tnfa->tag_directions[tag] = direction;
1328 if (minimal_tag >= 0)
1330 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1331 tnfa->minimal_tags[i] = tag;
1332 tnfa->minimal_tags[i + 1] = minimal_tag;
1333 tnfa->minimal_tags[i + 2] = -1;
1337 tre_purge_regset(regset, tnfa, tag);
1352 assert(!IS_TAG(lit));
1358 tre_catenation_t *cat = node->obj;
1359 tre_ast_node_t *left = cat->left;
1360 tre_ast_node_t *right = cat->right;
1361 int reserved_tag = -1;
1364 /* After processing right child. */
1365 STACK_PUSHX(stack, voidptr, node);
1366 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
1368 /* Process right child. */
1369 STACK_PUSHX(stack, voidptr, right);
1370 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1372 /* After processing left child. */
1373 STACK_PUSHX(stack, int, next_tag + left->num_tags);
1374 if (left->num_tags > 0 && right->num_tags > 0)
1376 /* Reserve the next tag to the right child. */
1377 reserved_tag = next_tag;
1380 STACK_PUSHX(stack, int, reserved_tag);
1381 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
1383 /* Process left child. */
1384 STACK_PUSHX(stack, voidptr, left);
1385 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1391 tre_iteration_t *iter = node->obj;
1395 STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
1399 STACK_PUSHX(stack, int, tag);
1400 STACK_PUSHX(stack, int, iter->minimal);
1402 STACK_PUSHX(stack, voidptr, node);
1403 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
1405 STACK_PUSHX(stack, voidptr, iter->arg);
1406 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1408 /* Regset is not empty, so add a tag here. */
1409 if (regset[0] >= 0 || iter->minimal)
1414 status = tre_add_tag_left(mem, node, tag);
1416 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
1418 tnfa->tag_directions[tag] = direction;
1419 if (minimal_tag >= 0)
1421 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1422 tnfa->minimal_tags[i] = tag;
1423 tnfa->minimal_tags[i + 1] = minimal_tag;
1424 tnfa->minimal_tags[i + 2] = -1;
1428 tre_purge_regset(regset, tnfa, tag);
1436 direction = TRE_TAG_MINIMIZE;
1441 tre_union_t *uni = node->obj;
1442 tre_ast_node_t *left = uni->left;
1443 tre_ast_node_t *right = uni->right;
1449 left_tag = next_tag;
1450 right_tag = next_tag + 1;
1455 right_tag = next_tag;
1458 /* After processing right child. */
1459 STACK_PUSHX(stack, int, right_tag);
1460 STACK_PUSHX(stack, int, left_tag);
1461 STACK_PUSHX(stack, voidptr, regset);
1462 STACK_PUSHX(stack, int, regset[0] >= 0);
1463 STACK_PUSHX(stack, voidptr, node);
1464 STACK_PUSHX(stack, voidptr, right);
1465 STACK_PUSHX(stack, voidptr, left);
1466 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
1468 /* Process right child. */
1469 STACK_PUSHX(stack, voidptr, right);
1470 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1472 /* After processing left child. */
1473 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
1475 /* Process left child. */
1476 STACK_PUSHX(stack, voidptr, left);
1477 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1479 /* Regset is not empty, so add a tag here. */
1485 status = tre_add_tag_left(mem, node, tag);
1486 tnfa->tag_directions[tag] = direction;
1487 if (minimal_tag >= 0)
1489 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1490 tnfa->minimal_tags[i] = tag;
1491 tnfa->minimal_tags[i + 1] = minimal_tag;
1492 tnfa->minimal_tags[i + 2] = -1;
1496 tre_purge_regset(regset, tnfa, tag);
1505 if (node->num_submatches > 0)
1507 /* The next two tags are reserved for markers. */
1517 if (node->submatch_id >= 0)
1520 /* Push this submatch on the parents stack. */
1521 for (i = 0; parents[i] >= 0; i++);
1522 parents[i] = node->submatch_id;
1523 parents[i + 1] = -1;
1526 break; /* end case: ADDTAGS_RECURSE */
1528 case ADDTAGS_AFTER_ITERATION:
1532 node = tre_stack_pop_voidptr(stack);
1535 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
1536 + tre_stack_pop_int(stack);
1541 minimal = tre_stack_pop_int(stack);
1542 enter_tag = tre_stack_pop_int(stack);
1544 minimal_tag = enter_tag;
1550 direction = TRE_TAG_MINIMIZE;
1552 direction = TRE_TAG_MAXIMIZE;
1557 case ADDTAGS_AFTER_CAT_LEFT:
1559 int new_tag = tre_stack_pop_int(stack);
1560 next_tag = tre_stack_pop_int(stack);
1568 case ADDTAGS_AFTER_CAT_RIGHT:
1569 node = tre_stack_pop_voidptr(stack);
1571 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
1572 + ((tre_catenation_t *)node->obj)->right->num_tags;
1575 case ADDTAGS_AFTER_UNION_LEFT:
1576 /* Lift the bottom of the `regset' array so that when processing
1577 the right operand the items currently in the array are
1578 invisible. The original bottom was saved at ADDTAGS_UNION and
1579 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1580 while (*regset >= 0)
1584 case ADDTAGS_AFTER_UNION_RIGHT:
1586 int added_tags, tag_left, tag_right;
1587 tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
1588 tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
1589 node = tre_stack_pop_voidptr(stack);
1590 added_tags = tre_stack_pop_int(stack);
1593 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
1594 + ((tre_union_t *)node->obj)->right->num_tags + added_tags
1595 + ((node->num_submatches > 0) ? 2 : 0);
1597 regset = tre_stack_pop_voidptr(stack);
1598 tag_left = tre_stack_pop_int(stack);
1599 tag_right = tre_stack_pop_int(stack);
1601 /* Add tags after both children, the left child gets a smaller
1602 tag than the right child. This guarantees that we prefer
1603 the left child over the right child. */
1604 /* XXX - This is not always necessary (if the children have
1605 tags which must be seen for every match of that child). */
1606 /* XXX - Check if this is the only place where tre_add_tag_right
1607 is used. If so, use tre_add_tag_left (putting the tag before
1608 the child as opposed after the child) and throw away
1609 tre_add_tag_right. */
1610 if (node->num_submatches > 0)
1614 status = tre_add_tag_right(mem, left, tag_left);
1615 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1616 if (status == REG_OK)
1617 status = tre_add_tag_right(mem, right, tag_right);
1618 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1622 direction = TRE_TAG_MAXIMIZE;
1630 } /* end switch(symbol) */
1631 } /* end while(tre_stack_num_objects(stack) > bottom) */
1634 tre_purge_regset(regset, tnfa, tag);
1636 if (!first_pass && minimal_tag >= 0)
1639 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1640 tnfa->minimal_tags[i] = tag;
1641 tnfa->minimal_tags[i + 1] = minimal_tag;
1642 tnfa->minimal_tags[i + 2] = -1;
1647 assert(tree->num_tags == num_tags);
1648 tnfa->end_tag = num_tags;
1649 tnfa->num_tags = num_tags;
1650 tnfa->num_minimals = num_minimals;
1653 xfree(saved_states);
1660 AST to TNFA compilation routines.
1666 } tre_copyast_symbol_t;
1668 /* Flags for tre_copy_ast(). */
1669 #define COPY_REMOVE_TAGS 1
1670 #define COPY_MAXIMIZE_FIRST_TAG 2
1672 static reg_errcode_t
1673 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1674 int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1675 tre_ast_node_t **copy, int *max_pos)
1677 reg_errcode_t status = REG_OK;
1678 int bottom = tre_stack_num_objects(stack);
1681 tre_ast_node_t **result = copy;
1682 tre_copyast_symbol_t symbol;
1684 STACK_PUSH(stack, voidptr, ast);
1685 STACK_PUSH(stack, int, COPY_RECURSE);
1687 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1689 tre_ast_node_t *node;
1690 if (status != REG_OK)
1693 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1696 case COPY_SET_RESULT_PTR:
1697 result = tre_stack_pop_voidptr(stack);
1700 node = tre_stack_pop_voidptr(stack);
1705 tre_literal_t *lit = node->obj;
1706 int pos = lit->position;
1707 int min = lit->code_min;
1708 int max = lit->code_max;
1709 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1711 /* XXX - e.g. [ab] has only one position but two
1712 nodes, so we are creating holes in the state space
1713 here. Not fatal, just wastes memory. */
1717 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1719 /* Change this tag to empty. */
1723 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1726 /* Maximize the first tag. */
1727 tag_directions[max] = TRE_TAG_MAXIMIZE;
1730 *result = tre_ast_new_literal(mem, min, max, pos);
1731 if (*result == NULL)
1732 status = REG_ESPACE;
1734 tre_literal_t *p = (*result)->obj;
1735 p->class = lit->class;
1736 p->neg_classes = lit->neg_classes;
1745 tre_union_t *uni = node->obj;
1747 *result = tre_ast_new_union(mem, uni->left, uni->right);
1748 if (*result == NULL)
1750 status = REG_ESPACE;
1753 tmp = (*result)->obj;
1754 result = &tmp->left;
1755 STACK_PUSHX(stack, voidptr, uni->right);
1756 STACK_PUSHX(stack, int, COPY_RECURSE);
1757 STACK_PUSHX(stack, voidptr, &tmp->right);
1758 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1759 STACK_PUSHX(stack, voidptr, uni->left);
1760 STACK_PUSHX(stack, int, COPY_RECURSE);
1765 tre_catenation_t *cat = node->obj;
1766 tre_catenation_t *tmp;
1767 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1768 if (*result == NULL)
1770 status = REG_ESPACE;
1773 tmp = (*result)->obj;
1776 result = &tmp->left;
1778 STACK_PUSHX(stack, voidptr, cat->right);
1779 STACK_PUSHX(stack, int, COPY_RECURSE);
1780 STACK_PUSHX(stack, voidptr, &tmp->right);
1781 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1782 STACK_PUSHX(stack, voidptr, cat->left);
1783 STACK_PUSHX(stack, int, COPY_RECURSE);
1788 tre_iteration_t *iter = node->obj;
1789 STACK_PUSHX(stack, voidptr, iter->arg);
1790 STACK_PUSHX(stack, int, COPY_RECURSE);
1791 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1792 iter->max, iter->minimal);
1793 if (*result == NULL)
1795 status = REG_ESPACE;
1798 iter = (*result)->obj;
1799 result = &iter->arg;
1809 *pos_add += num_copied;
1816 } tre_expand_ast_symbol_t;
1818 /* Expands each iteration node that has a finite nonzero minimum or maximum
1819 iteration count to a catenated sequence of copies of the node. */
1820 static reg_errcode_t
1821 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1822 int *position, tre_tag_direction_t *tag_directions)
1824 reg_errcode_t status = REG_OK;
1825 int bottom = tre_stack_num_objects(stack);
1827 int pos_add_total = 0;
1831 STACK_PUSHR(stack, voidptr, ast);
1832 STACK_PUSHR(stack, int, EXPAND_RECURSE);
1833 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1835 tre_ast_node_t *node;
1836 tre_expand_ast_symbol_t symbol;
1838 if (status != REG_OK)
1841 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1842 node = tre_stack_pop_voidptr(stack);
1845 case EXPAND_RECURSE:
1850 tre_literal_t *lit= node->obj;
1851 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1853 lit->position += pos_add;
1854 if (lit->position > max_pos)
1855 max_pos = lit->position;
1861 tre_union_t *uni = node->obj;
1862 STACK_PUSHX(stack, voidptr, uni->right);
1863 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1864 STACK_PUSHX(stack, voidptr, uni->left);
1865 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1870 tre_catenation_t *cat = node->obj;
1871 STACK_PUSHX(stack, voidptr, cat->right);
1872 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1873 STACK_PUSHX(stack, voidptr, cat->left);
1874 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1879 tre_iteration_t *iter = node->obj;
1880 STACK_PUSHX(stack, int, pos_add);
1881 STACK_PUSHX(stack, voidptr, node);
1882 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1883 STACK_PUSHX(stack, voidptr, iter->arg);
1884 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1885 /* If we are going to expand this node at EXPAND_AFTER_ITER
1886 then don't increase the `pos' fields of the nodes now, it
1887 will get done when expanding. */
1888 if (iter->min > 1 || iter->max > 1)
1898 case EXPAND_AFTER_ITER:
1900 tre_iteration_t *iter = node->obj;
1902 pos_add = tre_stack_pop_int(stack);
1903 pos_add_last = pos_add;
1904 if (iter->min > 1 || iter->max > 1)
1906 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1908 int pos_add_save = pos_add;
1910 /* Create a catenated sequence of copies of the node. */
1911 for (j = 0; j < iter->min; j++)
1913 tre_ast_node_t *copy;
1914 /* Remove tags from all but the last copy. */
1915 int flags = ((j + 1 < iter->min)
1917 : COPY_MAXIMIZE_FIRST_TAG);
1918 pos_add_save = pos_add;
1919 status = tre_copy_ast(mem, stack, iter->arg, flags,
1920 &pos_add, tag_directions, ©,
1922 if (status != REG_OK)
1925 seq1 = tre_ast_new_catenation(mem, seq1, copy);
1932 if (iter->max == -1)
1934 /* No upper limit. */
1935 pos_add_save = pos_add;
1936 status = tre_copy_ast(mem, stack, iter->arg, 0,
1937 &pos_add, NULL, &seq2, &max_pos);
1938 if (status != REG_OK)
1940 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1946 for (j = iter->min; j < iter->max; j++)
1948 tre_ast_node_t *tmp, *copy;
1949 pos_add_save = pos_add;
1950 status = tre_copy_ast(mem, stack, iter->arg, 0,
1951 &pos_add, NULL, ©, &max_pos);
1952 if (status != REG_OK)
1955 seq2 = tre_ast_new_catenation(mem, copy, seq2);
1960 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1963 seq2 = tre_ast_new_union(mem, tmp, seq2);
1969 pos_add = pos_add_save;
1972 else if (seq2 != NULL)
1973 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1976 node->obj = seq1->obj;
1977 node->type = seq1->type;
1981 pos_add_total += pos_add - pos_add_last;
1982 if (iter_depth == 0)
1983 pos_add = pos_add_total;
1993 *position += pos_add_total;
1995 /* `max_pos' should never be larger than `*position' if the above
1996 code works, but just an extra safeguard let's make sure
1997 `*position' is set large enough so enough memory will be
1998 allocated for the transition table. */
1999 if (max_pos > *position)
2000 *position = max_pos;
2005 static tre_pos_and_tags_t *
2006 tre_set_empty(tre_mem_t mem)
2008 tre_pos_and_tags_t *new_set;
2010 new_set = tre_mem_calloc(mem, sizeof(*new_set));
2011 if (new_set == NULL)
2014 new_set[0].position = -1;
2015 new_set[0].code_min = -1;
2016 new_set[0].code_max = -1;
2021 static tre_pos_and_tags_t *
2022 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2023 tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
2025 tre_pos_and_tags_t *new_set;
2027 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2028 if (new_set == NULL)
2031 new_set[0].position = position;
2032 new_set[0].code_min = code_min;
2033 new_set[0].code_max = code_max;
2034 new_set[0].class = class;
2035 new_set[0].neg_classes = neg_classes;
2036 new_set[0].backref = backref;
2037 new_set[1].position = -1;
2038 new_set[1].code_min = -1;
2039 new_set[1].code_max = -1;
2044 static tre_pos_and_tags_t *
2045 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2046 int *tags, int assertions)
2049 tre_pos_and_tags_t *new_set;
2053 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2054 for (s1 = 0; set1[s1].position >= 0; s1++);
2055 for (s2 = 0; set2[s2].position >= 0; s2++);
2056 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2060 for (s1 = 0; set1[s1].position >= 0; s1++)
2062 new_set[s1].position = set1[s1].position;
2063 new_set[s1].code_min = set1[s1].code_min;
2064 new_set[s1].code_max = set1[s1].code_max;
2065 new_set[s1].assertions = set1[s1].assertions | assertions;
2066 new_set[s1].class = set1[s1].class;
2067 new_set[s1].neg_classes = set1[s1].neg_classes;
2068 new_set[s1].backref = set1[s1].backref;
2069 if (set1[s1].tags == NULL && tags == NULL)
2070 new_set[s1].tags = NULL;
2073 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2074 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2075 * (i + num_tags + 1)));
2076 if (new_tags == NULL)
2078 for (j = 0; j < i; j++)
2079 new_tags[j] = set1[s1].tags[j];
2080 for (i = 0; i < num_tags; i++)
2081 new_tags[j + i] = tags[i];
2082 new_tags[j + i] = -1;
2083 new_set[s1].tags = new_tags;
2087 for (s2 = 0; set2[s2].position >= 0; s2++)
2089 new_set[s1 + s2].position = set2[s2].position;
2090 new_set[s1 + s2].code_min = set2[s2].code_min;
2091 new_set[s1 + s2].code_max = set2[s2].code_max;
2092 /* XXX - why not | assertions here as well? */
2093 new_set[s1 + s2].assertions = set2[s2].assertions;
2094 new_set[s1 + s2].class = set2[s2].class;
2095 new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
2096 new_set[s1 + s2].backref = set2[s2].backref;
2097 if (set2[s2].tags == NULL)
2098 new_set[s1 + s2].tags = NULL;
2101 for (i = 0; set2[s2].tags[i] >= 0; i++);
2102 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2103 if (new_tags == NULL)
2105 for (j = 0; j < i; j++)
2106 new_tags[j] = set2[s2].tags[j];
2108 new_set[s1 + s2].tags = new_tags;
2111 new_set[s1 + s2].position = -1;
2115 /* Finds the empty path through `node' which is the one that should be
2116 taken according to POSIX.2 rules, and adds the tags on that path to
2117 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2118 set to the number of tags seen on the path. */
2119 static reg_errcode_t
2120 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2121 int *assertions, int *num_tags_seen)
2125 tre_catenation_t *cat;
2126 tre_iteration_t *iter;
2128 int bottom = tre_stack_num_objects(stack);
2129 reg_errcode_t status = REG_OK;
2133 status = tre_stack_push_voidptr(stack, node);
2135 /* Walk through the tree recursively. */
2136 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2138 node = tre_stack_pop_voidptr(stack);
2143 lit = (tre_literal_t *)node->obj;
2144 switch (lit->code_min)
2147 if (lit->code_max >= 0)
2151 /* Add the tag to `tags'. */
2152 for (i = 0; tags[i] >= 0; i++)
2153 if (tags[i] == lit->code_max)
2157 tags[i] = lit->code_max;
2166 assert(lit->code_max >= 1
2167 || lit->code_max <= ASSERT_LAST);
2168 if (assertions != NULL)
2169 *assertions |= lit->code_max;
2180 /* Subexpressions starting earlier take priority over ones
2181 starting later, so we prefer the left subexpression over the
2182 right subexpression. */
2183 uni = (tre_union_t *)node->obj;
2184 if (uni->left->nullable)
2185 STACK_PUSHX(stack, voidptr, uni->left)
2186 else if (uni->right->nullable)
2187 STACK_PUSHX(stack, voidptr, uni->right)
2193 /* The path must go through both children. */
2194 cat = (tre_catenation_t *)node->obj;
2195 assert(cat->left->nullable);
2196 assert(cat->right->nullable);
2197 STACK_PUSHX(stack, voidptr, cat->left);
2198 STACK_PUSHX(stack, voidptr, cat->right);
2202 /* A match with an empty string is preferred over no match at
2203 all, so we go through the argument if possible. */
2204 iter = (tre_iteration_t *)node->obj;
2205 if (iter->arg->nullable)
2206 STACK_PUSHX(stack, voidptr, iter->arg);
2222 NFL_POST_CATENATION,
2224 } tre_nfl_stack_symbol_t;
2227 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2228 the nodes of the AST `tree'. */
2229 static reg_errcode_t
2230 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2232 int bottom = tre_stack_num_objects(stack);
2234 STACK_PUSHR(stack, voidptr, tree);
2235 STACK_PUSHR(stack, int, NFL_RECURSE);
2237 while (tre_stack_num_objects(stack) > bottom)
2239 tre_nfl_stack_symbol_t symbol;
2240 tre_ast_node_t *node;
2242 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2243 node = tre_stack_pop_voidptr(stack);
2251 tre_literal_t *lit = (tre_literal_t *)node->obj;
2252 if (IS_BACKREF(lit))
2254 /* Back references: nullable = false, firstpos = {i},
2257 node->firstpos = tre_set_one(mem, lit->position, 0,
2258 TRE_CHAR_MAX, 0, NULL, -1);
2259 if (!node->firstpos)
2261 node->lastpos = tre_set_one(mem, lit->position, 0,
2262 TRE_CHAR_MAX, 0, NULL,
2263 (int)lit->code_max);
2267 else if (lit->code_min < 0)
2269 /* Tags, empty strings, params, and zero width assertions:
2270 nullable = true, firstpos = {}, and lastpos = {}. */
2272 node->firstpos = tre_set_empty(mem);
2273 if (!node->firstpos)
2275 node->lastpos = tre_set_empty(mem);
2281 /* Literal at position i: nullable = false, firstpos = {i},
2285 tre_set_one(mem, lit->position, (int)lit->code_min,
2286 (int)lit->code_max, 0, NULL, -1);
2287 if (!node->firstpos)
2289 node->lastpos = tre_set_one(mem, lit->position,
2292 lit->class, lit->neg_classes,
2301 /* Compute the attributes for the two subtrees, and after that
2303 STACK_PUSHR(stack, voidptr, node);
2304 STACK_PUSHR(stack, int, NFL_POST_UNION);
2305 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2306 STACK_PUSHR(stack, int, NFL_RECURSE);
2307 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2308 STACK_PUSHR(stack, int, NFL_RECURSE);
2312 /* Compute the attributes for the two subtrees, and after that
2314 STACK_PUSHR(stack, voidptr, node);
2315 STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2316 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2317 STACK_PUSHR(stack, int, NFL_RECURSE);
2318 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2319 STACK_PUSHR(stack, int, NFL_RECURSE);
2323 /* Compute the attributes for the subtree, and after that for
2325 STACK_PUSHR(stack, voidptr, node);
2326 STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2327 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2328 STACK_PUSHR(stack, int, NFL_RECURSE);
2331 break; /* end case: NFL_RECURSE */
2333 case NFL_POST_UNION:
2335 tre_union_t *uni = (tre_union_t *)node->obj;
2336 node->nullable = uni->left->nullable || uni->right->nullable;
2337 node->firstpos = tre_set_union(mem, uni->left->firstpos,
2338 uni->right->firstpos, NULL, 0);
2339 if (!node->firstpos)
2341 node->lastpos = tre_set_union(mem, uni->left->lastpos,
2342 uni->right->lastpos, NULL, 0);
2348 case NFL_POST_ITERATION:
2350 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2352 if (iter->min == 0 || iter->arg->nullable)
2356 node->firstpos = iter->arg->firstpos;
2357 node->lastpos = iter->arg->lastpos;
2361 case NFL_POST_CATENATION:
2363 int num_tags, *tags, assertions;
2364 reg_errcode_t status;
2365 tre_catenation_t *cat = node->obj;
2366 node->nullable = cat->left->nullable && cat->right->nullable;
2368 /* Compute firstpos. */
2369 if (cat->left->nullable)
2371 /* The left side matches the empty string. Make a first pass
2372 with tre_match_empty() to get the number of tags and
2374 status = tre_match_empty(stack, cat->left,
2375 NULL, NULL, &num_tags);
2376 if (status != REG_OK)
2378 /* Allocate arrays for the tags and parameters. */
2379 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2384 /* Second pass with tre_mach_empty() to get the list of
2385 tags and parameters. */
2386 status = tre_match_empty(stack, cat->left, tags,
2388 if (status != REG_OK)
2394 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2397 if (!node->firstpos)
2402 node->firstpos = cat->left->firstpos;
2405 /* Compute lastpos. */
2406 if (cat->right->nullable)
2408 /* The right side matches the empty string. Make a first pass
2409 with tre_match_empty() to get the number of tags and
2411 status = tre_match_empty(stack, cat->right,
2412 NULL, NULL, &num_tags);
2413 if (status != REG_OK)
2415 /* Allocate arrays for the tags and parameters. */
2416 tags = xmalloc(sizeof(int) * (num_tags + 1));
2421 /* Second pass with tre_mach_empty() to get the list of
2422 tags and parameters. */
2423 status = tre_match_empty(stack, cat->right, tags,
2425 if (status != REG_OK)
2431 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2439 node->lastpos = cat->right->lastpos;
2454 /* Adds a transition from each position in `p1' to each position in `p2'. */
2455 static reg_errcode_t
2456 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2457 tre_tnfa_transition_t *transitions,
2458 int *counts, int *offs)
2460 tre_pos_and_tags_t *orig_p2 = p2;
2461 tre_tnfa_transition_t *trans;
2462 int i, j, k, l, dup, prev_p2_pos;
2464 if (transitions != NULL)
2465 while (p1->position >= 0)
2469 while (p2->position >= 0)
2471 /* Optimization: if this position was already handled, skip it. */
2472 if (p2->position == prev_p2_pos)
2477 prev_p2_pos = p2->position;
2478 /* Set `trans' to point to the next unused transition from
2479 position `p1->position'. */
2480 trans = transitions + offs[p1->position];
2481 while (trans->state != NULL)
2484 /* If we find a previous transition from `p1->position' to
2485 `p2->position', it is overwritten. This can happen only
2486 if there are nested loops in the regexp, like in "((a)*)*".
2487 In POSIX.2 repetition using the outer loop is always
2488 preferred over using the inner loop. Therefore the
2489 transition for the inner loop is useless and can be thrown
2491 /* XXX - The same position is used for all nodes in a bracket
2492 expression, so this optimization cannot be used (it will
2493 break bracket expressions) unless I figure out a way to
2495 if (trans->state_id == p2->position)
2503 if (trans->state == NULL)
2504 (trans + 1)->state = NULL;
2505 /* Use the character ranges, assertions, etc. from `p1' for
2506 the transition from `p1' to `p2'. */
2507 trans->code_min = p1->code_min;
2508 trans->code_max = p1->code_max;
2509 trans->state = transitions + offs[p2->position];
2510 trans->state_id = p2->position;
2511 trans->assertions = p1->assertions | p2->assertions
2512 | (p1->class ? ASSERT_CHAR_CLASS : 0)
2513 | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
2514 if (p1->backref >= 0)
2516 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
2517 assert(p2->backref < 0);
2518 trans->u.backref = p1->backref;
2519 trans->assertions |= ASSERT_BACKREF;
2522 trans->u.class = p1->class;
2523 if (p1->neg_classes != NULL)
2525 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
2526 trans->neg_classes =
2527 xmalloc(sizeof(*trans->neg_classes) * (i + 1));
2528 if (trans->neg_classes == NULL)
2530 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2531 trans->neg_classes[i] = p1->neg_classes[i];
2532 trans->neg_classes[i] = (tre_ctype_t)0;
2535 trans->neg_classes = NULL;
2537 /* Find out how many tags this transition has. */
2539 if (p1->tags != NULL)
2540 while(p1->tags[i] >= 0)
2543 if (p2->tags != NULL)
2544 while(p2->tags[j] >= 0)
2547 /* If we are overwriting a transition, free the old tag array. */
2548 if (trans->tags != NULL)
2552 /* If there were any tags, allocate an array and fill it. */
2555 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2559 if (p1->tags != NULL)
2560 while(p1->tags[i] >= 0)
2562 trans->tags[i] = p1->tags[i];
2567 if (p2->tags != NULL)
2568 while (p2->tags[j] >= 0)
2570 /* Don't add duplicates. */
2572 for (k = 0; k < i; k++)
2573 if (trans->tags[k] == p2->tags[j])
2579 trans->tags[l++] = p2->tags[j];
2582 trans->tags[l] = -1;
2590 /* Compute a maximum limit for the number of transitions leaving
2592 while (p1->position >= 0)
2595 while (p2->position >= 0)
2597 counts[p1->position]++;
2605 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2606 labelled with one character range (there are no transitions on empty
2607 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2609 static reg_errcode_t
2610 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2611 int *counts, int *offs)
2614 tre_catenation_t *cat;
2615 tre_iteration_t *iter;
2616 reg_errcode_t errcode = REG_OK;
2618 /* XXX - recurse using a stack!. */
2624 uni = (tre_union_t *)node->obj;
2625 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2626 if (errcode != REG_OK)
2628 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2632 cat = (tre_catenation_t *)node->obj;
2633 /* Add a transition from each position in cat->left->lastpos
2634 to each position in cat->right->firstpos. */
2635 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2636 transitions, counts, offs);
2637 if (errcode != REG_OK)
2639 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2640 if (errcode != REG_OK)
2642 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2646 iter = (tre_iteration_t *)node->obj;
2647 assert(iter->max == -1 || iter->max == 1);
2649 if (iter->max == -1)
2651 assert(iter->min == 0 || iter->min == 1);
2652 /* Add a transition from each last position in the iterated
2653 expression to each first position. */
2654 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2655 transitions, counts, offs);
2656 if (errcode != REG_OK)
2659 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2666 #define ERROR_EXIT(err) \
2670 if (/*CONSTCOND*/1) \
2673 while (/*CONSTCOND*/0)
2677 regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
2680 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2681 tre_pos_and_tags_t *p;
2682 int *counts = NULL, *offs = NULL;
2684 tre_tnfa_transition_t *transitions, *initial;
2685 tre_tnfa_t *tnfa = NULL;
2686 tre_submatch_data_t *submatch_data;
2687 tre_tag_direction_t *tag_directions = NULL;
2688 reg_errcode_t errcode;
2691 /* Parse context. */
2692 tre_parse_ctx_t parse_ctx;
2694 /* Allocate a stack used throughout the compilation process for various
2696 stack = tre_stack_new(512, 1024000, 128);
2699 /* Allocate a fast memory allocator. */
2700 mem = tre_mem_new();
2703 tre_stack_destroy(stack);
2707 /* Parse the regexp. */
2708 memset(&parse_ctx, 0, sizeof(parse_ctx));
2709 parse_ctx.mem = mem;
2710 parse_ctx.stack = stack;
2711 parse_ctx.start = regex;
2712 parse_ctx.cflags = cflags;
2713 parse_ctx.max_backref = -1;
2714 errcode = tre_parse(&parse_ctx);
2715 if (errcode != REG_OK)
2716 ERROR_EXIT(errcode);
2717 preg->re_nsub = parse_ctx.submatch_id - 1;
2721 tre_ast_print(tree);
2722 #endif /* TRE_DEBUG */
2724 /* Referring to nonexistent subexpressions is illegal. */
2725 if (parse_ctx.max_backref > (int)preg->re_nsub)
2726 ERROR_EXIT(REG_ESUBREG);
2728 /* Allocate the TNFA struct. */
2729 tnfa = xcalloc(1, sizeof(tre_tnfa_t));
2731 ERROR_EXIT(REG_ESPACE);
2732 tnfa->have_backrefs = parse_ctx.max_backref >= 0;
2733 tnfa->have_approx = 0;
2734 tnfa->num_submatches = parse_ctx.submatch_id;
2736 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
2737 regexp does not have back references, this can be skipped. */
2738 if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
2741 /* Figure out how many tags we will need. */
2742 errcode = tre_add_tags(NULL, stack, tree, tnfa);
2743 if (errcode != REG_OK)
2744 ERROR_EXIT(errcode);
2746 if (tnfa->num_tags > 0)
2748 tag_directions = xmalloc(sizeof(*tag_directions)
2749 * (tnfa->num_tags + 1));
2750 if (tag_directions == NULL)
2751 ERROR_EXIT(REG_ESPACE);
2752 tnfa->tag_directions = tag_directions;
2753 memset(tag_directions, -1,
2754 sizeof(*tag_directions) * (tnfa->num_tags + 1));
2756 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
2757 sizeof(*tnfa->minimal_tags));
2758 if (tnfa->minimal_tags == NULL)
2759 ERROR_EXIT(REG_ESPACE);
2761 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
2762 sizeof(*submatch_data));
2763 if (submatch_data == NULL)
2764 ERROR_EXIT(REG_ESPACE);
2765 tnfa->submatch_data = submatch_data;
2767 errcode = tre_add_tags(mem, stack, tree, tnfa);
2768 if (errcode != REG_OK)
2769 ERROR_EXIT(errcode);
2773 /* Expand iteration nodes. */
2774 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
2776 if (errcode != REG_OK)
2777 ERROR_EXIT(errcode);
2779 /* Add a dummy node for the final state.
2780 XXX - For certain patterns this dummy node can be optimized away,
2781 for example "a*" or "ab*". Figure out a simple way to detect
2782 this possibility. */
2784 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
2785 if (tmp_ast_r == NULL)
2786 ERROR_EXIT(REG_ESPACE);
2788 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
2790 ERROR_EXIT(REG_ESPACE);
2792 errcode = tre_compute_nfl(mem, stack, tree);
2793 if (errcode != REG_OK)
2794 ERROR_EXIT(errcode);
2796 counts = xmalloc(sizeof(int) * parse_ctx.position);
2798 ERROR_EXIT(REG_ESPACE);
2800 offs = xmalloc(sizeof(int) * parse_ctx.position);
2802 ERROR_EXIT(REG_ESPACE);
2804 for (i = 0; i < parse_ctx.position; i++)
2806 tre_ast_to_tnfa(tree, NULL, counts, NULL);
2809 for (i = 0; i < parse_ctx.position; i++)
2812 add += counts[i] + 1;
2815 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
2816 if (transitions == NULL)
2817 ERROR_EXIT(REG_ESPACE);
2818 tnfa->transitions = transitions;
2819 tnfa->num_transitions = add;
2821 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2822 if (errcode != REG_OK)
2823 ERROR_EXIT(errcode);
2825 tnfa->firstpos_chars = NULL;
2829 while (p->position >= 0)
2835 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2836 if (initial == NULL)
2837 ERROR_EXIT(REG_ESPACE);
2838 tnfa->initial = initial;
2841 for (p = tree->firstpos; p->position >= 0; p++)
2843 initial[i].state = transitions + offs[p->position];
2844 initial[i].state_id = p->position;
2845 initial[i].tags = NULL;
2846 /* Copy the arrays p->tags, and p->params, they are allocated
2847 from a tre_mem object. */
2851 for (j = 0; p->tags[j] >= 0; j++);
2852 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
2853 if (!initial[i].tags)
2854 ERROR_EXIT(REG_ESPACE);
2855 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2857 initial[i].assertions = p->assertions;
2860 initial[i].state = NULL;
2862 tnfa->num_transitions = add;
2863 tnfa->final = transitions + offs[tree->lastpos[0].position];
2864 tnfa->num_states = parse_ctx.position;
2865 tnfa->cflags = cflags;
2867 tre_mem_destroy(mem);
2868 tre_stack_destroy(stack);
2872 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2876 /* Free everything that was allocated and return the error code. */
2877 tre_mem_destroy(mem);
2879 tre_stack_destroy(stack);
2884 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2893 regfree(regex_t *preg)
2897 tre_tnfa_transition_t *trans;
2899 tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2903 for (i = 0; i < tnfa->num_transitions; i++)
2904 if (tnfa->transitions[i].state)
2906 if (tnfa->transitions[i].tags)
2907 xfree(tnfa->transitions[i].tags);
2908 if (tnfa->transitions[i].neg_classes)
2909 xfree(tnfa->transitions[i].neg_classes);
2911 if (tnfa->transitions)
2912 xfree(tnfa->transitions);
2916 for (trans = tnfa->initial; trans->state; trans++)
2921 xfree(tnfa->initial);
2924 if (tnfa->submatch_data)
2926 for (i = 0; i < tnfa->num_submatches; i++)
2927 if (tnfa->submatch_data[i].parents)
2928 xfree(tnfa->submatch_data[i].parents);
2929 xfree(tnfa->submatch_data);
2932 if (tnfa->tag_directions)
2933 xfree(tnfa->tag_directions);
2934 if (tnfa->firstpos_chars)
2935 xfree(tnfa->firstpos_chars);
2936 if (tnfa->minimal_tags)
2937 xfree(tnfa->minimal_tags);