2 regcomp.c - TRE POSIX compatible regex compilation functions.
4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 /***********************************************************************
45 ***********************************************************************/
54 tre_ctype_t *neg_classes;
59 /***********************************************************************
60 from tre-ast.c and tre-ast.h
61 ***********************************************************************/
63 /* The different AST node types. */
71 /* Special subtypes of TRE_LITERAL. */
72 #define EMPTY -1 /* Empty leaf (denotes empty string). */
73 #define ASSERTION -2 /* Assertion leaf. */
74 #define TAG -3 /* Tag leaf. */
75 #define BACKREF -4 /* Back reference leaf. */
77 #define IS_SPECIAL(x) ((x)->code_min < 0)
78 #define IS_EMPTY(x) ((x)->code_min == EMPTY)
79 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
80 #define IS_TAG(x) ((x)->code_min == TAG)
81 #define IS_BACKREF(x) ((x)->code_min == BACKREF)
84 /* A generic AST node. All AST nodes consist of this node on the top
85 level with `obj' pointing to the actual content. */
87 tre_ast_type_t type; /* Type of the node. */
88 void *obj; /* Pointer to actual node. */
93 tre_pos_and_tags_t *firstpos;
94 tre_pos_and_tags_t *lastpos;
98 /* A "literal" node. These are created for assertions, back references,
99 tags, matching parameter settings, and all expressions that match one
106 tre_ctype_t *neg_classes;
109 /* A "catenation" node. These are created when two regexps are concatenated.
110 If there are more than one subexpressions in sequence, the `left' part
111 holds all but the last, and `right' part holds the last subexpression
112 (catenation is left associative). */
114 tre_ast_node_t *left;
115 tre_ast_node_t *right;
118 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
121 /* Subexpression to match. */
123 /* Minimum number of consecutive matches. */
125 /* Maximum number of consecutive matches. */
127 /* If 0, match as many characters as possible, if 1 match as few as
128 possible. Note that this does not always mean the same thing as
129 matching as many/few repetitions as possible. */
130 unsigned int minimal:1;
133 /* An "union" node. These are created for the "|" operator. */
135 tre_ast_node_t *left;
136 tre_ast_node_t *right;
140 static tre_ast_node_t *
141 tre_ast_new_node(tre_mem_t mem, int type, void *obj)
143 tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node);
149 node->submatch_id = -1;
153 static tre_ast_node_t *
154 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
156 tre_ast_node_t *node;
159 lit = tre_mem_calloc(mem, sizeof *lit);
160 node = tre_ast_new_node(mem, LITERAL, lit);
163 lit->code_min = code_min;
164 lit->code_max = code_max;
165 lit->position = position;
169 static tre_ast_node_t *
170 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal)
172 tre_ast_node_t *node;
173 tre_iteration_t *iter;
175 iter = tre_mem_calloc(mem, sizeof *iter);
176 node = tre_ast_new_node(mem, ITERATION, iter);
182 iter->minimal = minimal;
183 node->num_submatches = arg->num_submatches;
187 static tre_ast_node_t *
188 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
190 tre_ast_node_t *node;
195 un = tre_mem_calloc(mem, sizeof *un);
196 node = tre_ast_new_node(mem, UNION, un);
201 node->num_submatches = left->num_submatches + right->num_submatches;
205 static tre_ast_node_t *
206 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
208 tre_ast_node_t *node;
209 tre_catenation_t *cat;
213 cat = tre_mem_calloc(mem, sizeof *cat);
214 node = tre_ast_new_node(mem, CATENATION, cat);
219 node->num_submatches = left->num_submatches + right->num_submatches;
224 /***********************************************************************
225 from tre-stack.c and tre-stack.h
226 ***********************************************************************/
228 typedef struct tre_stack_rec tre_stack_t;
230 /* Creates a new stack object. `size' is initial size in bytes, `max_size'
231 is maximum size, and `increment' specifies how much more space will be
232 allocated with realloc() if all space gets used up. Returns the stack
233 object or NULL if out of memory. */
235 tre_stack_new(int size, int max_size, int increment);
237 /* Frees the stack object. */
239 tre_stack_destroy(tre_stack_t *s);
241 /* Returns the current number of objects in the stack. */
243 tre_stack_num_objects(tre_stack_t *s);
245 /* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
246 `value' on top of stack `s'. Returns REG_ESPACE if out of memory.
247 This tries to realloc() more space before failing if maximum size
248 has not yet been reached. Returns REG_OK if successful. */
249 #define declare_pushf(typetag, type) \
250 static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
252 declare_pushf(voidptr, void *);
253 declare_pushf(int, int);
255 /* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
256 element off of stack `s' and returns it. The stack must not be
258 #define declare_popf(typetag, type) \
259 static type tre_stack_pop_ ## typetag(tre_stack_t *s)
261 declare_popf(voidptr, void *);
262 declare_popf(int, int);
264 /* Just to save some typing. */
265 #define STACK_PUSH(s, typetag, value) \
268 status = tre_stack_push_ ## typetag(s, value); \
270 while (/*CONSTCOND*/0)
272 #define STACK_PUSHX(s, typetag, value) \
274 status = tre_stack_push_ ## typetag(s, value); \
275 if (status != REG_OK) \
279 #define STACK_PUSHR(s, typetag, value) \
281 reg_errcode_t _status; \
282 _status = tre_stack_push_ ## typetag(s, value); \
283 if (_status != REG_OK) \
287 union tre_stack_item {
292 struct tre_stack_rec {
297 union tre_stack_item *stack;
302 tre_stack_new(int size, int max_size, int increment)
306 s = xmalloc(sizeof(*s));
309 s->stack = xmalloc(sizeof(*s->stack) * size);
310 if (s->stack == NULL)
316 s->max_size = max_size;
317 s->increment = increment;
324 tre_stack_destroy(tre_stack_t *s)
331 tre_stack_num_objects(tre_stack_t *s)
337 tre_stack_push(tre_stack_t *s, union tre_stack_item value)
339 if (s->ptr < s->size)
341 s->stack[s->ptr] = value;
346 if (s->size >= s->max_size)
352 union tre_stack_item *new_buffer;
354 new_size = s->size + s->increment;
355 if (new_size > s->max_size)
356 new_size = s->max_size;
357 new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
358 if (new_buffer == NULL)
362 assert(new_size > s->size);
364 s->stack = new_buffer;
365 tre_stack_push(s, value);
371 #define define_pushf(typetag, type) \
372 declare_pushf(typetag, type) { \
373 union tre_stack_item item; \
374 item.typetag ## _value = value; \
375 return tre_stack_push(s, item); \
378 define_pushf(int, int)
379 define_pushf(voidptr, void *)
381 #define define_popf(typetag, type) \
382 declare_popf(typetag, type) { \
383 return s->stack[--s->ptr].typetag ## _value; \
386 define_popf(int, int)
387 define_popf(voidptr, void *)
390 /***********************************************************************
391 from tre-parse.c and tre-parse.h
392 ***********************************************************************/
396 /* Memory allocator. The AST is allocated using this. */
398 /* Stack used for keeping track of regexp syntax. */
400 /* The parsed node after a parse function returns. */
402 /* Position in the regexp pattern after a parse function returns. */
404 /* The first character of the regexp. */
406 /* Current submatch ID. */
408 /* Current position (number of literal). */
410 /* The highest back reference or -1 if none seen so far. */
412 /* Compilation flags. */
416 /* Some macros for expanding \w, \s, etc. */
417 static const struct {
419 const char *expansion;
421 {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
422 {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
423 {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
424 {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
428 /* Expands a macro delimited by `regex' and `regex_end' to `buf', which
429 must have at least `len' items. Sets buf[0] to zero if the there
430 is no match in `tre_macros'. */
431 static const char *tre_expand_macro(const char *s)
434 for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);
435 return tre_macros[i].expansion;
439 tre_compare_lit(const void *a, const void *b)
441 const tre_literal_t *const *la = a;
442 const tre_literal_t *const *lb = b;
443 /* assumes the range of valid code_min is < INT_MAX */
444 return la[0]->code_min - lb[0]->code_min;
454 static tre_literal_t *tre_new_lit(struct literals *p)
457 if (p->len >= p->cap) {
461 a = xrealloc(p->a, p->cap * sizeof *p->a);
467 *a = tre_mem_calloc(p->mem, sizeof **a);
471 static int add_icase_literals(struct literals *ls, int min, int max)
475 for (c=min; c<=max; ) {
476 /* assumes islower(c) and isupper(c) are exclusive
477 and toupper(c)!=c if islower(c).
478 multiple opposite case characters are not supported */
479 if (tre_islower(c)) {
480 b = e = tre_toupper(c);
481 for (c++, e++; c<=max; c++, e++)
482 if (tre_toupper(c) != e) break;
483 } else if (tre_isupper(c)) {
484 b = e = tre_tolower(c);
485 for (c++, e++; c<=max; c++, e++)
486 if (tre_tolower(c) != e) break;
491 lit = tre_new_lit(ls);
502 /* Maximum number of character classes in a negated bracket expression. */
503 #define MAX_NEG_CLASSES 64
508 tre_ctype_t a[MAX_NEG_CLASSES];
511 // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
515 Bracket = '[' List ']' | '[^' List ']'
516 List = Term | List Term
517 Term = Char | Range | Chclass | Eqclass
518 Range = Char '-' Char | Char '-' '-'
519 Char = Coll | coll_single
521 Coll = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]'
522 Eqclass = '[=' coll_single '=]' | '[=' coll_multi '=]'
523 Chclass = '[:' class ':]'
525 coll_single is a single char collating element but it can be
526 '-' only at the beginning or end of a List and
527 ']' only at the beginning of a List and
528 '^' anywhere except after the openning '['
531 static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, struct literals *ls, struct neg *neg)
533 const char *start = s;
541 len = mbtowc(&wc, s, -1);
543 return *s ? REG_BADPAT : REG_EBRACK;
544 if (*s == ']' && s != start) {
548 if (*s == '-' && s != start && s[1] != ']' &&
549 /* extension: [a-z--@] is accepted as [a-z]|[--@] */
550 (s[1] != '-' || s[2] == ']'))
552 if (*s == '[' && (s[1] == '.' || s[1] == '='))
553 /* collating symbols and equivalence classes are not supported */
555 if (*s == '[' && s[1] == ':') {
556 char tmp[CHARCLASS_NAME_MAX+1];
558 for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) {
562 class = tre_ctype(tmp);
566 if (!class || s[len+1] != ']')
574 if (*s == '-' && s[1] != ']') {
576 len = mbtowc(&wc, s, -1);
578 /* XXX - Should use collation order instead of
579 encoding values in character ranges. */
580 if (len <= 0 || min > max)
586 if (class && neg->negate) {
587 if (neg->len >= MAX_NEG_CLASSES)
589 neg->a[neg->len++] = class;
591 tre_literal_t *lit = tre_new_lit(ls);
599 /* Add opposite-case codepoints if REG_ICASE is present.
600 It seems that POSIX requires that bracket negation
601 should happen before case-folding, but most practical
602 implementations do it the other way around. Changing
603 the order would need efficient representation of
604 case-fold ranges and bracket range sets even with
605 simple patterns so this is ok for now. */
606 if (ctx->cflags & REG_ICASE && !class)
607 if (add_icase_literals(ls, min, max))
613 static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s)
615 int i, max, min, negmax, negmin;
616 tre_ast_node_t *node = 0, *n;
626 ls.a = xmalloc(ls.cap * sizeof *ls.a);
630 neg.negate = *s == '^';
634 err = parse_bracket_terms(ctx, s, &ls, &neg);
636 goto parse_bracket_done;
639 /* Sort the array if we need to negate it. */
640 qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
641 /* extra lit for the last negated range */
642 lit = tre_new_lit(&ls);
645 goto parse_bracket_done;
647 lit->code_min = TRE_CHAR_MAX+1;
648 lit->code_max = TRE_CHAR_MAX+1;
650 /* negated classes */
652 nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a);
655 goto parse_bracket_done;
657 memcpy(nc, neg.a, neg.len*sizeof *neg.a);
662 /* Build a union of the items in the array, negated if necessary. */
664 for (i = 0; i < ls.len; i++) {
671 negmin = MAX(max + 1, negmin);
675 lit->code_min = negmin;
676 lit->code_max = negmax;
679 lit->position = ctx->position;
680 lit->neg_classes = nc;
681 n = tre_ast_new_node(ctx->mem, LITERAL, lit);
682 node = tre_ast_new_union(ctx->mem, node, n);
696 static const char *parse_dup_count(const char *s, int *n)
703 *n = 10 * *n + (*s - '0');
705 if (!isdigit(*s) || *n > RE_DUP_MAX)
711 static const char *parse_dup(const char *s, int ere, int *pmin, int *pmax)
715 s = parse_dup_count(s, &min);
717 s = parse_dup_count(s+1, &max);
722 (max < min && max >= 0) ||
726 (!ere && *s++ != '\\') ||
735 static int hexval(unsigned c)
737 if (c-'0'<10) return c-'0';
739 if (c-'a'<6) return c-'a'+10;
743 static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int subid)
745 if (node->submatch_id >= 0) {
746 tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
749 n = tre_ast_new_catenation(ctx->mem, n, node);
752 n->num_submatches = node->num_submatches;
755 node->submatch_id = subid;
756 node->num_submatches++;
763 Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^' Branch '$'
764 Branch = Atom | Branch Atom
765 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch '\)' | back_ref
766 Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count '\}'
768 (leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
771 Regex = Branch | Regex '|' Branch
772 Branch = Atom | Branch Atom
773 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex ')' | '^' | '$'
774 Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count ',' Count '}'
776 (a*+?, ^*, $+, \X, {, (|a) are unspecified)
779 static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
781 int len, ere = ctx->cflags & REG_EXTENDED;
783 tre_ast_node_t *node;
787 return parse_bracket(ctx, s+1);
789 p = tre_expand_macro(s+1);
791 /* assume \X expansion is a single atom */
792 reg_errcode_t err = parse_atom(ctx, p);
796 /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
801 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
804 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
807 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
810 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
820 for (i=0; i<len && v<0x110000; i++) {
831 node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
837 /* extension: treat \+, \? as repetitions in BRE */
838 /* reject repetitions after empty expression in BRE */
842 /* extension: treat \| as alternation in BRE */
844 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
850 if (!ere && (unsigned)*s-'1' < 9) {
853 node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++);
854 ctx->max_backref = MAX(val, ctx->max_backref);
856 /* extension: accept unknown escaped char
864 if (ctx->cflags & REG_NEWLINE) {
865 tre_ast_node_t *tmp1, *tmp2;
866 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++);
867 tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX, ctx->position++);
869 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
873 node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++);
878 /* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
879 if (!ere && s != ctx->re)
881 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
885 /* '$' is special everywhere in EREs, and in the end of the string in BREs. */
888 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
896 /* reject repetitions after empty expression in ERE */
903 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
907 len = mbtowc(&wc, s, -1);
910 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) {
911 tre_ast_node_t *tmp1, *tmp2;
912 /* multiple opposite case characters are not supported */
913 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc), ctx->position);
914 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc), ctx->position);
916 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
920 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
934 #define PUSHPTR(err, s, v) do { \
935 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \
939 #define PUSHINT(err, s, v) do { \
940 if ((err = tre_stack_push_int(s, v)) != REG_OK) \
944 static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
946 tre_ast_node_t *nbranch=0, *nunion=0;
947 int ere = ctx->cflags & REG_EXTENDED;
948 const char *s = ctx->re;
952 tre_stack_t *stack = ctx->stack;
954 PUSHINT(err, stack, subid++);
956 if ((!ere && *s == '\\' && s[1] == '(') ||
957 (ere && *s == '(')) {
958 PUSHPTR(err, stack, nunion);
959 PUSHPTR(err, stack, nbranch);
960 PUSHINT(err, stack, subid++);
965 nbranch = nunion = 0;
968 if ((!ere && *s == '\\' && s[1] == ')') ||
969 (ere && *s == ')' && depth)) {
970 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
974 err = parse_atom(ctx, s);
981 /* extension: repetitions are rejected after an empty node
982 eg. (+), |*, {2}, but assertions are not treated as empty
983 so ^* or $? are accepted currently. */
987 if (*s!='\\' && *s!='*') {
990 if (*s!='+' && *s!='?' && *s!='{')
995 /* extension: treat \+, \? as repetitions in BRE */
996 if (*s=='\\' && s[1]!='+' && s[1]!='?' && s[1]!='{')
1001 /* extension: multiple consecutive *+?{,} is unspecified,
1002 but (a+)+ has to be supported so accepting a++ makes
1003 sense, note however that the RE_DUP_MAX limit can be
1004 circumvented: (a{255}){255} uses a lot of memory.. */
1006 s = parse_dup(s+1, ere, &min, &max);
1019 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
1021 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
1026 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
1027 if ((ere && *s == '|') ||
1028 (ere && *s == ')' && depth) ||
1029 (!ere && *s == '\\' && s[1] == ')') ||
1030 /* extension: treat \| as alternation in BRE */
1031 (!ere && *s == '\\' && s[1] == '|') ||
1033 /* extension: empty branch is unspecified (), (|a), (a|)
1034 here they are not rejected but match on empty string */
1036 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
1039 if (c == '\\' && s[1] == '|') {
1041 } else if (c == '|') {
1045 if (!depth) return REG_EPAREN;
1047 } else if (c == ')')
1050 err = marksub(ctx, nunion, tre_stack_pop_int(stack));
1053 if (!c && depth<0) {
1054 ctx->submatch_id = subid;
1059 nbranch = tre_stack_pop_voidptr(stack);
1060 nunion = tre_stack_pop_voidptr(stack);
1068 /***********************************************************************
1070 ***********************************************************************/
1075 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1080 Algorithms to setup tags so that submatch addressing can be done.
1084 /* Inserts a catenation node to the root of the tree given in `node'.
1085 As the left child a new tag with number `tag_id' to `node' is added,
1086 and the right child is the old root. */
1087 static reg_errcode_t
1088 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1090 tre_catenation_t *c;
1092 c = tre_mem_alloc(mem, sizeof(*c));
1095 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
1096 if (c->left == NULL)
1098 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1099 if (c->right == NULL)
1102 c->right->obj = node->obj;
1103 c->right->type = node->type;
1104 c->right->nullable = -1;
1105 c->right->submatch_id = -1;
1106 c->right->firstpos = NULL;
1107 c->right->lastpos = NULL;
1108 c->right->num_tags = 0;
1110 node->type = CATENATION;
1114 /* Inserts a catenation node to the root of the tree given in `node'.
1115 As the right child a new tag with number `tag_id' to `node' is added,
1116 and the left child is the old root. */
1117 static reg_errcode_t
1118 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1120 tre_catenation_t *c;
1122 c = tre_mem_alloc(mem, sizeof(*c));
1125 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
1126 if (c->right == NULL)
1128 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1129 if (c->left == NULL)
1132 c->left->obj = node->obj;
1133 c->left->type = node->type;
1134 c->left->nullable = -1;
1135 c->left->submatch_id = -1;
1136 c->left->firstpos = NULL;
1137 c->left->lastpos = NULL;
1138 c->left->num_tags = 0;
1140 node->type = CATENATION;
1146 ADDTAGS_AFTER_ITERATION,
1147 ADDTAGS_AFTER_UNION_LEFT,
1148 ADDTAGS_AFTER_UNION_RIGHT,
1149 ADDTAGS_AFTER_CAT_LEFT,
1150 ADDTAGS_AFTER_CAT_RIGHT,
1151 ADDTAGS_SET_SUBMATCH_END
1152 } tre_addtags_symbol_t;
1161 /* Go through `regset' and set submatch data for submatches that are
1164 tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
1168 for (i = 0; regset[i] >= 0; i++)
1170 int id = regset[i] / 2;
1171 int start = !(regset[i] % 2);
1173 tnfa->submatch_data[id].so_tag = tag;
1175 tnfa->submatch_data[id].eo_tag = tag;
1181 /* Adds tags to appropriate locations in the parse tree in `tree', so that
1182 subexpressions marked for submatch addressing can be traced. */
1183 static reg_errcode_t
1184 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
1187 reg_errcode_t status = REG_OK;
1188 tre_addtags_symbol_t symbol;
1189 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
1190 int bottom = tre_stack_num_objects(stack);
1191 /* True for first pass (counting number of needed tags) */
1192 int first_pass = (mem == NULL || tnfa == NULL);
1193 int *regset, *orig_regset;
1194 int num_tags = 0; /* Total number of tags. */
1195 int num_minimals = 0; /* Number of special minimal tags. */
1196 int tag = 0; /* The tag that is to be added next. */
1197 int next_tag = 1; /* Next tag to use after this one. */
1198 int *parents; /* Stack of submatches the current submatch is
1200 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
1201 tre_tag_states_t *saved_states;
1203 tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
1207 tnfa->minimal_tags[0] = -1;
1210 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
1214 orig_regset = regset;
1216 parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
1217 if (parents == NULL)
1224 saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
1225 if (saved_states == NULL)
1234 for (i = 0; i <= tnfa->num_submatches; i++)
1235 saved_states[i].tag = -1;
1238 STACK_PUSH(stack, voidptr, node);
1239 STACK_PUSH(stack, int, ADDTAGS_RECURSE);
1241 while (tre_stack_num_objects(stack) > bottom)
1243 if (status != REG_OK)
1246 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
1250 case ADDTAGS_SET_SUBMATCH_END:
1252 int id = tre_stack_pop_int(stack);
1255 /* Add end of this submatch to regset. */
1256 for (i = 0; regset[i] >= 0; i++);
1257 regset[i] = id * 2 + 1;
1260 /* Pop this submatch from the parents stack. */
1261 for (i = 0; parents[i] >= 0; i++);
1262 parents[i - 1] = -1;
1266 case ADDTAGS_RECURSE:
1267 node = tre_stack_pop_voidptr(stack);
1269 if (node->submatch_id >= 0)
1271 int id = node->submatch_id;
1275 /* Add start of this submatch to regset. */
1276 for (i = 0; regset[i] >= 0; i++);
1282 for (i = 0; parents[i] >= 0; i++);
1283 tnfa->submatch_data[id].parents = NULL;
1286 int *p = xmalloc(sizeof(*p) * (i + 1));
1289 status = REG_ESPACE;
1292 assert(tnfa->submatch_data[id].parents == NULL);
1293 tnfa->submatch_data[id].parents = p;
1294 for (i = 0; parents[i] >= 0; i++)
1300 /* Add end of this submatch to regset after processing this
1302 STACK_PUSHX(stack, int, node->submatch_id);
1303 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
1310 tre_literal_t *lit = node->obj;
1312 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1317 /* Regset is not empty, so add a tag before the
1318 literal or backref. */
1321 status = tre_add_tag_left(mem, node, tag);
1322 tnfa->tag_directions[tag] = direction;
1323 if (minimal_tag >= 0)
1325 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1326 tnfa->minimal_tags[i] = tag;
1327 tnfa->minimal_tags[i + 1] = minimal_tag;
1328 tnfa->minimal_tags[i + 2] = -1;
1332 tre_purge_regset(regset, tnfa, tag);
1347 assert(!IS_TAG(lit));
1353 tre_catenation_t *cat = node->obj;
1354 tre_ast_node_t *left = cat->left;
1355 tre_ast_node_t *right = cat->right;
1356 int reserved_tag = -1;
1359 /* After processing right child. */
1360 STACK_PUSHX(stack, voidptr, node);
1361 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
1363 /* Process right child. */
1364 STACK_PUSHX(stack, voidptr, right);
1365 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1367 /* After processing left child. */
1368 STACK_PUSHX(stack, int, next_tag + left->num_tags);
1369 if (left->num_tags > 0 && right->num_tags > 0)
1371 /* Reserve the next tag to the right child. */
1372 reserved_tag = next_tag;
1375 STACK_PUSHX(stack, int, reserved_tag);
1376 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
1378 /* Process left child. */
1379 STACK_PUSHX(stack, voidptr, left);
1380 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1386 tre_iteration_t *iter = node->obj;
1390 STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
1394 STACK_PUSHX(stack, int, tag);
1395 STACK_PUSHX(stack, int, iter->minimal);
1397 STACK_PUSHX(stack, voidptr, node);
1398 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
1400 STACK_PUSHX(stack, voidptr, iter->arg);
1401 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1403 /* Regset is not empty, so add a tag here. */
1404 if (regset[0] >= 0 || iter->minimal)
1409 status = tre_add_tag_left(mem, node, tag);
1411 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
1413 tnfa->tag_directions[tag] = direction;
1414 if (minimal_tag >= 0)
1416 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1417 tnfa->minimal_tags[i] = tag;
1418 tnfa->minimal_tags[i + 1] = minimal_tag;
1419 tnfa->minimal_tags[i + 2] = -1;
1423 tre_purge_regset(regset, tnfa, tag);
1431 direction = TRE_TAG_MINIMIZE;
1436 tre_union_t *uni = node->obj;
1437 tre_ast_node_t *left = uni->left;
1438 tre_ast_node_t *right = uni->right;
1444 left_tag = next_tag;
1445 right_tag = next_tag + 1;
1450 right_tag = next_tag;
1453 /* After processing right child. */
1454 STACK_PUSHX(stack, int, right_tag);
1455 STACK_PUSHX(stack, int, left_tag);
1456 STACK_PUSHX(stack, voidptr, regset);
1457 STACK_PUSHX(stack, int, regset[0] >= 0);
1458 STACK_PUSHX(stack, voidptr, node);
1459 STACK_PUSHX(stack, voidptr, right);
1460 STACK_PUSHX(stack, voidptr, left);
1461 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
1463 /* Process right child. */
1464 STACK_PUSHX(stack, voidptr, right);
1465 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1467 /* After processing left child. */
1468 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
1470 /* Process left child. */
1471 STACK_PUSHX(stack, voidptr, left);
1472 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1474 /* Regset is not empty, so add a tag here. */
1480 status = tre_add_tag_left(mem, node, tag);
1481 tnfa->tag_directions[tag] = direction;
1482 if (minimal_tag >= 0)
1484 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1485 tnfa->minimal_tags[i] = tag;
1486 tnfa->minimal_tags[i + 1] = minimal_tag;
1487 tnfa->minimal_tags[i + 2] = -1;
1491 tre_purge_regset(regset, tnfa, tag);
1500 if (node->num_submatches > 0)
1502 /* The next two tags are reserved for markers. */
1512 if (node->submatch_id >= 0)
1515 /* Push this submatch on the parents stack. */
1516 for (i = 0; parents[i] >= 0; i++);
1517 parents[i] = node->submatch_id;
1518 parents[i + 1] = -1;
1521 break; /* end case: ADDTAGS_RECURSE */
1523 case ADDTAGS_AFTER_ITERATION:
1527 node = tre_stack_pop_voidptr(stack);
1530 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
1531 + tre_stack_pop_int(stack);
1536 minimal = tre_stack_pop_int(stack);
1537 enter_tag = tre_stack_pop_int(stack);
1539 minimal_tag = enter_tag;
1545 direction = TRE_TAG_MINIMIZE;
1547 direction = TRE_TAG_MAXIMIZE;
1552 case ADDTAGS_AFTER_CAT_LEFT:
1554 int new_tag = tre_stack_pop_int(stack);
1555 next_tag = tre_stack_pop_int(stack);
1563 case ADDTAGS_AFTER_CAT_RIGHT:
1564 node = tre_stack_pop_voidptr(stack);
1566 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
1567 + ((tre_catenation_t *)node->obj)->right->num_tags;
1570 case ADDTAGS_AFTER_UNION_LEFT:
1571 /* Lift the bottom of the `regset' array so that when processing
1572 the right operand the items currently in the array are
1573 invisible. The original bottom was saved at ADDTAGS_UNION and
1574 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1575 while (*regset >= 0)
1579 case ADDTAGS_AFTER_UNION_RIGHT:
1581 int added_tags, tag_left, tag_right;
1582 tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
1583 tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
1584 node = tre_stack_pop_voidptr(stack);
1585 added_tags = tre_stack_pop_int(stack);
1588 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
1589 + ((tre_union_t *)node->obj)->right->num_tags + added_tags
1590 + ((node->num_submatches > 0) ? 2 : 0);
1592 regset = tre_stack_pop_voidptr(stack);
1593 tag_left = tre_stack_pop_int(stack);
1594 tag_right = tre_stack_pop_int(stack);
1596 /* Add tags after both children, the left child gets a smaller
1597 tag than the right child. This guarantees that we prefer
1598 the left child over the right child. */
1599 /* XXX - This is not always necessary (if the children have
1600 tags which must be seen for every match of that child). */
1601 /* XXX - Check if this is the only place where tre_add_tag_right
1602 is used. If so, use tre_add_tag_left (putting the tag before
1603 the child as opposed after the child) and throw away
1604 tre_add_tag_right. */
1605 if (node->num_submatches > 0)
1609 status = tre_add_tag_right(mem, left, tag_left);
1610 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1611 if (status == REG_OK)
1612 status = tre_add_tag_right(mem, right, tag_right);
1613 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1617 direction = TRE_TAG_MAXIMIZE;
1625 } /* end switch(symbol) */
1626 } /* end while(tre_stack_num_objects(stack) > bottom) */
1629 tre_purge_regset(regset, tnfa, tag);
1631 if (!first_pass && minimal_tag >= 0)
1634 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1635 tnfa->minimal_tags[i] = tag;
1636 tnfa->minimal_tags[i + 1] = minimal_tag;
1637 tnfa->minimal_tags[i + 2] = -1;
1642 assert(tree->num_tags == num_tags);
1643 tnfa->end_tag = num_tags;
1644 tnfa->num_tags = num_tags;
1645 tnfa->num_minimals = num_minimals;
1648 xfree(saved_states);
1655 AST to TNFA compilation routines.
1661 } tre_copyast_symbol_t;
1663 /* Flags for tre_copy_ast(). */
1664 #define COPY_REMOVE_TAGS 1
1665 #define COPY_MAXIMIZE_FIRST_TAG 2
1667 static reg_errcode_t
1668 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1669 int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1670 tre_ast_node_t **copy, int *max_pos)
1672 reg_errcode_t status = REG_OK;
1673 int bottom = tre_stack_num_objects(stack);
1676 tre_ast_node_t **result = copy;
1677 tre_copyast_symbol_t symbol;
1679 STACK_PUSH(stack, voidptr, ast);
1680 STACK_PUSH(stack, int, COPY_RECURSE);
1682 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1684 tre_ast_node_t *node;
1685 if (status != REG_OK)
1688 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1691 case COPY_SET_RESULT_PTR:
1692 result = tre_stack_pop_voidptr(stack);
1695 node = tre_stack_pop_voidptr(stack);
1700 tre_literal_t *lit = node->obj;
1701 int pos = lit->position;
1702 int min = lit->code_min;
1703 int max = lit->code_max;
1704 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1706 /* XXX - e.g. [ab] has only one position but two
1707 nodes, so we are creating holes in the state space
1708 here. Not fatal, just wastes memory. */
1712 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1714 /* Change this tag to empty. */
1718 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1721 /* Maximize the first tag. */
1722 tag_directions[max] = TRE_TAG_MAXIMIZE;
1725 *result = tre_ast_new_literal(mem, min, max, pos);
1726 if (*result == NULL)
1727 status = REG_ESPACE;
1729 tre_literal_t *p = (*result)->obj;
1730 p->class = lit->class;
1731 p->neg_classes = lit->neg_classes;
1740 tre_union_t *uni = node->obj;
1742 *result = tre_ast_new_union(mem, uni->left, uni->right);
1743 if (*result == NULL)
1745 status = REG_ESPACE;
1748 tmp = (*result)->obj;
1749 result = &tmp->left;
1750 STACK_PUSHX(stack, voidptr, uni->right);
1751 STACK_PUSHX(stack, int, COPY_RECURSE);
1752 STACK_PUSHX(stack, voidptr, &tmp->right);
1753 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1754 STACK_PUSHX(stack, voidptr, uni->left);
1755 STACK_PUSHX(stack, int, COPY_RECURSE);
1760 tre_catenation_t *cat = node->obj;
1761 tre_catenation_t *tmp;
1762 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1763 if (*result == NULL)
1765 status = REG_ESPACE;
1768 tmp = (*result)->obj;
1771 result = &tmp->left;
1773 STACK_PUSHX(stack, voidptr, cat->right);
1774 STACK_PUSHX(stack, int, COPY_RECURSE);
1775 STACK_PUSHX(stack, voidptr, &tmp->right);
1776 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1777 STACK_PUSHX(stack, voidptr, cat->left);
1778 STACK_PUSHX(stack, int, COPY_RECURSE);
1783 tre_iteration_t *iter = node->obj;
1784 STACK_PUSHX(stack, voidptr, iter->arg);
1785 STACK_PUSHX(stack, int, COPY_RECURSE);
1786 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1787 iter->max, iter->minimal);
1788 if (*result == NULL)
1790 status = REG_ESPACE;
1793 iter = (*result)->obj;
1794 result = &iter->arg;
1804 *pos_add += num_copied;
1811 } tre_expand_ast_symbol_t;
1813 /* Expands each iteration node that has a finite nonzero minimum or maximum
1814 iteration count to a catenated sequence of copies of the node. */
1815 static reg_errcode_t
1816 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1817 int *position, tre_tag_direction_t *tag_directions)
1819 reg_errcode_t status = REG_OK;
1820 int bottom = tre_stack_num_objects(stack);
1822 int pos_add_total = 0;
1826 STACK_PUSHR(stack, voidptr, ast);
1827 STACK_PUSHR(stack, int, EXPAND_RECURSE);
1828 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1830 tre_ast_node_t *node;
1831 tre_expand_ast_symbol_t symbol;
1833 if (status != REG_OK)
1836 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1837 node = tre_stack_pop_voidptr(stack);
1840 case EXPAND_RECURSE:
1845 tre_literal_t *lit= node->obj;
1846 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1848 lit->position += pos_add;
1849 if (lit->position > max_pos)
1850 max_pos = lit->position;
1856 tre_union_t *uni = node->obj;
1857 STACK_PUSHX(stack, voidptr, uni->right);
1858 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1859 STACK_PUSHX(stack, voidptr, uni->left);
1860 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1865 tre_catenation_t *cat = node->obj;
1866 STACK_PUSHX(stack, voidptr, cat->right);
1867 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1868 STACK_PUSHX(stack, voidptr, cat->left);
1869 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1874 tre_iteration_t *iter = node->obj;
1875 STACK_PUSHX(stack, int, pos_add);
1876 STACK_PUSHX(stack, voidptr, node);
1877 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1878 STACK_PUSHX(stack, voidptr, iter->arg);
1879 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1880 /* If we are going to expand this node at EXPAND_AFTER_ITER
1881 then don't increase the `pos' fields of the nodes now, it
1882 will get done when expanding. */
1883 if (iter->min > 1 || iter->max > 1)
1893 case EXPAND_AFTER_ITER:
1895 tre_iteration_t *iter = node->obj;
1897 pos_add = tre_stack_pop_int(stack);
1898 pos_add_last = pos_add;
1899 if (iter->min > 1 || iter->max > 1)
1901 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1903 int pos_add_save = pos_add;
1905 /* Create a catenated sequence of copies of the node. */
1906 for (j = 0; j < iter->min; j++)
1908 tre_ast_node_t *copy;
1909 /* Remove tags from all but the last copy. */
1910 int flags = ((j + 1 < iter->min)
1912 : COPY_MAXIMIZE_FIRST_TAG);
1913 pos_add_save = pos_add;
1914 status = tre_copy_ast(mem, stack, iter->arg, flags,
1915 &pos_add, tag_directions, ©,
1917 if (status != REG_OK)
1920 seq1 = tre_ast_new_catenation(mem, seq1, copy);
1927 if (iter->max == -1)
1929 /* No upper limit. */
1930 pos_add_save = pos_add;
1931 status = tre_copy_ast(mem, stack, iter->arg, 0,
1932 &pos_add, NULL, &seq2, &max_pos);
1933 if (status != REG_OK)
1935 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1941 for (j = iter->min; j < iter->max; j++)
1943 tre_ast_node_t *tmp, *copy;
1944 pos_add_save = pos_add;
1945 status = tre_copy_ast(mem, stack, iter->arg, 0,
1946 &pos_add, NULL, ©, &max_pos);
1947 if (status != REG_OK)
1950 seq2 = tre_ast_new_catenation(mem, copy, seq2);
1955 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1958 seq2 = tre_ast_new_union(mem, tmp, seq2);
1964 pos_add = pos_add_save;
1967 else if (seq2 != NULL)
1968 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1971 node->obj = seq1->obj;
1972 node->type = seq1->type;
1976 pos_add_total += pos_add - pos_add_last;
1977 if (iter_depth == 0)
1978 pos_add = pos_add_total;
1988 *position += pos_add_total;
1990 /* `max_pos' should never be larger than `*position' if the above
1991 code works, but just an extra safeguard let's make sure
1992 `*position' is set large enough so enough memory will be
1993 allocated for the transition table. */
1994 if (max_pos > *position)
1995 *position = max_pos;
2000 static tre_pos_and_tags_t *
2001 tre_set_empty(tre_mem_t mem)
2003 tre_pos_and_tags_t *new_set;
2005 new_set = tre_mem_calloc(mem, sizeof(*new_set));
2006 if (new_set == NULL)
2009 new_set[0].position = -1;
2010 new_set[0].code_min = -1;
2011 new_set[0].code_max = -1;
2016 static tre_pos_and_tags_t *
2017 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2018 tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
2020 tre_pos_and_tags_t *new_set;
2022 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2023 if (new_set == NULL)
2026 new_set[0].position = position;
2027 new_set[0].code_min = code_min;
2028 new_set[0].code_max = code_max;
2029 new_set[0].class = class;
2030 new_set[0].neg_classes = neg_classes;
2031 new_set[0].backref = backref;
2032 new_set[1].position = -1;
2033 new_set[1].code_min = -1;
2034 new_set[1].code_max = -1;
2039 static tre_pos_and_tags_t *
2040 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2041 int *tags, int assertions)
2044 tre_pos_and_tags_t *new_set;
2048 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2049 for (s1 = 0; set1[s1].position >= 0; s1++);
2050 for (s2 = 0; set2[s2].position >= 0; s2++);
2051 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2055 for (s1 = 0; set1[s1].position >= 0; s1++)
2057 new_set[s1].position = set1[s1].position;
2058 new_set[s1].code_min = set1[s1].code_min;
2059 new_set[s1].code_max = set1[s1].code_max;
2060 new_set[s1].assertions = set1[s1].assertions | assertions;
2061 new_set[s1].class = set1[s1].class;
2062 new_set[s1].neg_classes = set1[s1].neg_classes;
2063 new_set[s1].backref = set1[s1].backref;
2064 if (set1[s1].tags == NULL && tags == NULL)
2065 new_set[s1].tags = NULL;
2068 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2069 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2070 * (i + num_tags + 1)));
2071 if (new_tags == NULL)
2073 for (j = 0; j < i; j++)
2074 new_tags[j] = set1[s1].tags[j];
2075 for (i = 0; i < num_tags; i++)
2076 new_tags[j + i] = tags[i];
2077 new_tags[j + i] = -1;
2078 new_set[s1].tags = new_tags;
2082 for (s2 = 0; set2[s2].position >= 0; s2++)
2084 new_set[s1 + s2].position = set2[s2].position;
2085 new_set[s1 + s2].code_min = set2[s2].code_min;
2086 new_set[s1 + s2].code_max = set2[s2].code_max;
2087 /* XXX - why not | assertions here as well? */
2088 new_set[s1 + s2].assertions = set2[s2].assertions;
2089 new_set[s1 + s2].class = set2[s2].class;
2090 new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
2091 new_set[s1 + s2].backref = set2[s2].backref;
2092 if (set2[s2].tags == NULL)
2093 new_set[s1 + s2].tags = NULL;
2096 for (i = 0; set2[s2].tags[i] >= 0; i++);
2097 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2098 if (new_tags == NULL)
2100 for (j = 0; j < i; j++)
2101 new_tags[j] = set2[s2].tags[j];
2103 new_set[s1 + s2].tags = new_tags;
2106 new_set[s1 + s2].position = -1;
2110 /* Finds the empty path through `node' which is the one that should be
2111 taken according to POSIX.2 rules, and adds the tags on that path to
2112 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2113 set to the number of tags seen on the path. */
2114 static reg_errcode_t
2115 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2116 int *assertions, int *num_tags_seen)
2120 tre_catenation_t *cat;
2121 tre_iteration_t *iter;
2123 int bottom = tre_stack_num_objects(stack);
2124 reg_errcode_t status = REG_OK;
2128 status = tre_stack_push_voidptr(stack, node);
2130 /* Walk through the tree recursively. */
2131 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2133 node = tre_stack_pop_voidptr(stack);
2138 lit = (tre_literal_t *)node->obj;
2139 switch (lit->code_min)
2142 if (lit->code_max >= 0)
2146 /* Add the tag to `tags'. */
2147 for (i = 0; tags[i] >= 0; i++)
2148 if (tags[i] == lit->code_max)
2152 tags[i] = lit->code_max;
2161 assert(lit->code_max >= 1
2162 || lit->code_max <= ASSERT_LAST);
2163 if (assertions != NULL)
2164 *assertions |= lit->code_max;
2175 /* Subexpressions starting earlier take priority over ones
2176 starting later, so we prefer the left subexpression over the
2177 right subexpression. */
2178 uni = (tre_union_t *)node->obj;
2179 if (uni->left->nullable)
2180 STACK_PUSHX(stack, voidptr, uni->left)
2181 else if (uni->right->nullable)
2182 STACK_PUSHX(stack, voidptr, uni->right)
2188 /* The path must go through both children. */
2189 cat = (tre_catenation_t *)node->obj;
2190 assert(cat->left->nullable);
2191 assert(cat->right->nullable);
2192 STACK_PUSHX(stack, voidptr, cat->left);
2193 STACK_PUSHX(stack, voidptr, cat->right);
2197 /* A match with an empty string is preferred over no match at
2198 all, so we go through the argument if possible. */
2199 iter = (tre_iteration_t *)node->obj;
2200 if (iter->arg->nullable)
2201 STACK_PUSHX(stack, voidptr, iter->arg);
2217 NFL_POST_CATENATION,
2219 } tre_nfl_stack_symbol_t;
2222 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2223 the nodes of the AST `tree'. */
2224 static reg_errcode_t
2225 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2227 int bottom = tre_stack_num_objects(stack);
2229 STACK_PUSHR(stack, voidptr, tree);
2230 STACK_PUSHR(stack, int, NFL_RECURSE);
2232 while (tre_stack_num_objects(stack) > bottom)
2234 tre_nfl_stack_symbol_t symbol;
2235 tre_ast_node_t *node;
2237 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2238 node = tre_stack_pop_voidptr(stack);
2246 tre_literal_t *lit = (tre_literal_t *)node->obj;
2247 if (IS_BACKREF(lit))
2249 /* Back references: nullable = false, firstpos = {i},
2252 node->firstpos = tre_set_one(mem, lit->position, 0,
2253 TRE_CHAR_MAX, 0, NULL, -1);
2254 if (!node->firstpos)
2256 node->lastpos = tre_set_one(mem, lit->position, 0,
2257 TRE_CHAR_MAX, 0, NULL,
2258 (int)lit->code_max);
2262 else if (lit->code_min < 0)
2264 /* Tags, empty strings, params, and zero width assertions:
2265 nullable = true, firstpos = {}, and lastpos = {}. */
2267 node->firstpos = tre_set_empty(mem);
2268 if (!node->firstpos)
2270 node->lastpos = tre_set_empty(mem);
2276 /* Literal at position i: nullable = false, firstpos = {i},
2280 tre_set_one(mem, lit->position, (int)lit->code_min,
2281 (int)lit->code_max, 0, NULL, -1);
2282 if (!node->firstpos)
2284 node->lastpos = tre_set_one(mem, lit->position,
2287 lit->class, lit->neg_classes,
2296 /* Compute the attributes for the two subtrees, and after that
2298 STACK_PUSHR(stack, voidptr, node);
2299 STACK_PUSHR(stack, int, NFL_POST_UNION);
2300 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2301 STACK_PUSHR(stack, int, NFL_RECURSE);
2302 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2303 STACK_PUSHR(stack, int, NFL_RECURSE);
2307 /* Compute the attributes for the two subtrees, and after that
2309 STACK_PUSHR(stack, voidptr, node);
2310 STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2311 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2312 STACK_PUSHR(stack, int, NFL_RECURSE);
2313 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2314 STACK_PUSHR(stack, int, NFL_RECURSE);
2318 /* Compute the attributes for the subtree, and after that for
2320 STACK_PUSHR(stack, voidptr, node);
2321 STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2322 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2323 STACK_PUSHR(stack, int, NFL_RECURSE);
2326 break; /* end case: NFL_RECURSE */
2328 case NFL_POST_UNION:
2330 tre_union_t *uni = (tre_union_t *)node->obj;
2331 node->nullable = uni->left->nullable || uni->right->nullable;
2332 node->firstpos = tre_set_union(mem, uni->left->firstpos,
2333 uni->right->firstpos, NULL, 0);
2334 if (!node->firstpos)
2336 node->lastpos = tre_set_union(mem, uni->left->lastpos,
2337 uni->right->lastpos, NULL, 0);
2343 case NFL_POST_ITERATION:
2345 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2347 if (iter->min == 0 || iter->arg->nullable)
2351 node->firstpos = iter->arg->firstpos;
2352 node->lastpos = iter->arg->lastpos;
2356 case NFL_POST_CATENATION:
2358 int num_tags, *tags, assertions;
2359 reg_errcode_t status;
2360 tre_catenation_t *cat = node->obj;
2361 node->nullable = cat->left->nullable && cat->right->nullable;
2363 /* Compute firstpos. */
2364 if (cat->left->nullable)
2366 /* The left side matches the empty string. Make a first pass
2367 with tre_match_empty() to get the number of tags and
2369 status = tre_match_empty(stack, cat->left,
2370 NULL, NULL, &num_tags);
2371 if (status != REG_OK)
2373 /* Allocate arrays for the tags and parameters. */
2374 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2379 /* Second pass with tre_mach_empty() to get the list of
2380 tags and parameters. */
2381 status = tre_match_empty(stack, cat->left, tags,
2383 if (status != REG_OK)
2389 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2392 if (!node->firstpos)
2397 node->firstpos = cat->left->firstpos;
2400 /* Compute lastpos. */
2401 if (cat->right->nullable)
2403 /* The right side matches the empty string. Make a first pass
2404 with tre_match_empty() to get the number of tags and
2406 status = tre_match_empty(stack, cat->right,
2407 NULL, NULL, &num_tags);
2408 if (status != REG_OK)
2410 /* Allocate arrays for the tags and parameters. */
2411 tags = xmalloc(sizeof(int) * (num_tags + 1));
2416 /* Second pass with tre_mach_empty() to get the list of
2417 tags and parameters. */
2418 status = tre_match_empty(stack, cat->right, tags,
2420 if (status != REG_OK)
2426 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2434 node->lastpos = cat->right->lastpos;
2449 /* Adds a transition from each position in `p1' to each position in `p2'. */
2450 static reg_errcode_t
2451 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2452 tre_tnfa_transition_t *transitions,
2453 int *counts, int *offs)
2455 tre_pos_and_tags_t *orig_p2 = p2;
2456 tre_tnfa_transition_t *trans;
2457 int i, j, k, l, dup, prev_p2_pos;
2459 if (transitions != NULL)
2460 while (p1->position >= 0)
2464 while (p2->position >= 0)
2466 /* Optimization: if this position was already handled, skip it. */
2467 if (p2->position == prev_p2_pos)
2472 prev_p2_pos = p2->position;
2473 /* Set `trans' to point to the next unused transition from
2474 position `p1->position'. */
2475 trans = transitions + offs[p1->position];
2476 while (trans->state != NULL)
2479 /* If we find a previous transition from `p1->position' to
2480 `p2->position', it is overwritten. This can happen only
2481 if there are nested loops in the regexp, like in "((a)*)*".
2482 In POSIX.2 repetition using the outer loop is always
2483 preferred over using the inner loop. Therefore the
2484 transition for the inner loop is useless and can be thrown
2486 /* XXX - The same position is used for all nodes in a bracket
2487 expression, so this optimization cannot be used (it will
2488 break bracket expressions) unless I figure out a way to
2490 if (trans->state_id == p2->position)
2498 if (trans->state == NULL)
2499 (trans + 1)->state = NULL;
2500 /* Use the character ranges, assertions, etc. from `p1' for
2501 the transition from `p1' to `p2'. */
2502 trans->code_min = p1->code_min;
2503 trans->code_max = p1->code_max;
2504 trans->state = transitions + offs[p2->position];
2505 trans->state_id = p2->position;
2506 trans->assertions = p1->assertions | p2->assertions
2507 | (p1->class ? ASSERT_CHAR_CLASS : 0)
2508 | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
2509 if (p1->backref >= 0)
2511 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
2512 assert(p2->backref < 0);
2513 trans->u.backref = p1->backref;
2514 trans->assertions |= ASSERT_BACKREF;
2517 trans->u.class = p1->class;
2518 if (p1->neg_classes != NULL)
2520 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
2521 trans->neg_classes =
2522 xmalloc(sizeof(*trans->neg_classes) * (i + 1));
2523 if (trans->neg_classes == NULL)
2525 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2526 trans->neg_classes[i] = p1->neg_classes[i];
2527 trans->neg_classes[i] = (tre_ctype_t)0;
2530 trans->neg_classes = NULL;
2532 /* Find out how many tags this transition has. */
2534 if (p1->tags != NULL)
2535 while(p1->tags[i] >= 0)
2538 if (p2->tags != NULL)
2539 while(p2->tags[j] >= 0)
2542 /* If we are overwriting a transition, free the old tag array. */
2543 if (trans->tags != NULL)
2547 /* If there were any tags, allocate an array and fill it. */
2550 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2554 if (p1->tags != NULL)
2555 while(p1->tags[i] >= 0)
2557 trans->tags[i] = p1->tags[i];
2562 if (p2->tags != NULL)
2563 while (p2->tags[j] >= 0)
2565 /* Don't add duplicates. */
2567 for (k = 0; k < i; k++)
2568 if (trans->tags[k] == p2->tags[j])
2574 trans->tags[l++] = p2->tags[j];
2577 trans->tags[l] = -1;
2585 /* Compute a maximum limit for the number of transitions leaving
2587 while (p1->position >= 0)
2590 while (p2->position >= 0)
2592 counts[p1->position]++;
2600 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2601 labelled with one character range (there are no transitions on empty
2602 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2604 static reg_errcode_t
2605 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2606 int *counts, int *offs)
2609 tre_catenation_t *cat;
2610 tre_iteration_t *iter;
2611 reg_errcode_t errcode = REG_OK;
2613 /* XXX - recurse using a stack!. */
2619 uni = (tre_union_t *)node->obj;
2620 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2621 if (errcode != REG_OK)
2623 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2627 cat = (tre_catenation_t *)node->obj;
2628 /* Add a transition from each position in cat->left->lastpos
2629 to each position in cat->right->firstpos. */
2630 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2631 transitions, counts, offs);
2632 if (errcode != REG_OK)
2634 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2635 if (errcode != REG_OK)
2637 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2641 iter = (tre_iteration_t *)node->obj;
2642 assert(iter->max == -1 || iter->max == 1);
2644 if (iter->max == -1)
2646 assert(iter->min == 0 || iter->min == 1);
2647 /* Add a transition from each last position in the iterated
2648 expression to each first position. */
2649 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2650 transitions, counts, offs);
2651 if (errcode != REG_OK)
2654 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2661 #define ERROR_EXIT(err) \
2665 if (/*CONSTCOND*/1) \
2668 while (/*CONSTCOND*/0)
2672 regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
2675 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2676 tre_pos_and_tags_t *p;
2677 int *counts = NULL, *offs = NULL;
2679 tre_tnfa_transition_t *transitions, *initial;
2680 tre_tnfa_t *tnfa = NULL;
2681 tre_submatch_data_t *submatch_data;
2682 tre_tag_direction_t *tag_directions = NULL;
2683 reg_errcode_t errcode;
2686 /* Parse context. */
2687 tre_parse_ctx_t parse_ctx;
2689 /* Allocate a stack used throughout the compilation process for various
2691 stack = tre_stack_new(512, 10240, 128);
2694 /* Allocate a fast memory allocator. */
2695 mem = tre_mem_new();
2698 tre_stack_destroy(stack);
2702 /* Parse the regexp. */
2703 memset(&parse_ctx, 0, sizeof(parse_ctx));
2704 parse_ctx.mem = mem;
2705 parse_ctx.stack = stack;
2706 parse_ctx.re = regex;
2707 parse_ctx.cflags = cflags;
2708 parse_ctx.max_backref = -1;
2709 errcode = tre_parse(&parse_ctx);
2710 if (errcode != REG_OK)
2711 ERROR_EXIT(errcode);
2712 preg->re_nsub = parse_ctx.submatch_id - 1;
2716 tre_ast_print(tree);
2717 #endif /* TRE_DEBUG */
2719 /* Referring to nonexistent subexpressions is illegal. */
2720 if (parse_ctx.max_backref > (int)preg->re_nsub)
2721 ERROR_EXIT(REG_ESUBREG);
2723 /* Allocate the TNFA struct. */
2724 tnfa = xcalloc(1, sizeof(tre_tnfa_t));
2726 ERROR_EXIT(REG_ESPACE);
2727 tnfa->have_backrefs = parse_ctx.max_backref >= 0;
2728 tnfa->have_approx = 0;
2729 tnfa->num_submatches = parse_ctx.submatch_id;
2731 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
2732 regexp does not have back references, this can be skipped. */
2733 if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
2736 /* Figure out how many tags we will need. */
2737 errcode = tre_add_tags(NULL, stack, tree, tnfa);
2738 if (errcode != REG_OK)
2739 ERROR_EXIT(errcode);
2741 if (tnfa->num_tags > 0)
2743 tag_directions = xmalloc(sizeof(*tag_directions)
2744 * (tnfa->num_tags + 1));
2745 if (tag_directions == NULL)
2746 ERROR_EXIT(REG_ESPACE);
2747 tnfa->tag_directions = tag_directions;
2748 memset(tag_directions, -1,
2749 sizeof(*tag_directions) * (tnfa->num_tags + 1));
2751 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
2752 sizeof(*tnfa->minimal_tags));
2753 if (tnfa->minimal_tags == NULL)
2754 ERROR_EXIT(REG_ESPACE);
2756 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
2757 sizeof(*submatch_data));
2758 if (submatch_data == NULL)
2759 ERROR_EXIT(REG_ESPACE);
2760 tnfa->submatch_data = submatch_data;
2762 errcode = tre_add_tags(mem, stack, tree, tnfa);
2763 if (errcode != REG_OK)
2764 ERROR_EXIT(errcode);
2768 /* Expand iteration nodes. */
2769 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
2771 if (errcode != REG_OK)
2772 ERROR_EXIT(errcode);
2774 /* Add a dummy node for the final state.
2775 XXX - For certain patterns this dummy node can be optimized away,
2776 for example "a*" or "ab*". Figure out a simple way to detect
2777 this possibility. */
2779 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
2780 if (tmp_ast_r == NULL)
2781 ERROR_EXIT(REG_ESPACE);
2783 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
2785 ERROR_EXIT(REG_ESPACE);
2787 errcode = tre_compute_nfl(mem, stack, tree);
2788 if (errcode != REG_OK)
2789 ERROR_EXIT(errcode);
2791 counts = xmalloc(sizeof(int) * parse_ctx.position);
2793 ERROR_EXIT(REG_ESPACE);
2795 offs = xmalloc(sizeof(int) * parse_ctx.position);
2797 ERROR_EXIT(REG_ESPACE);
2799 for (i = 0; i < parse_ctx.position; i++)
2801 tre_ast_to_tnfa(tree, NULL, counts, NULL);
2804 for (i = 0; i < parse_ctx.position; i++)
2807 add += counts[i] + 1;
2810 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
2811 if (transitions == NULL)
2812 ERROR_EXIT(REG_ESPACE);
2813 tnfa->transitions = transitions;
2814 tnfa->num_transitions = add;
2816 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2817 if (errcode != REG_OK)
2818 ERROR_EXIT(errcode);
2820 tnfa->firstpos_chars = NULL;
2824 while (p->position >= 0)
2830 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2831 if (initial == NULL)
2832 ERROR_EXIT(REG_ESPACE);
2833 tnfa->initial = initial;
2836 for (p = tree->firstpos; p->position >= 0; p++)
2838 initial[i].state = transitions + offs[p->position];
2839 initial[i].state_id = p->position;
2840 initial[i].tags = NULL;
2841 /* Copy the arrays p->tags, and p->params, they are allocated
2842 from a tre_mem object. */
2846 for (j = 0; p->tags[j] >= 0; j++);
2847 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
2848 if (!initial[i].tags)
2849 ERROR_EXIT(REG_ESPACE);
2850 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2852 initial[i].assertions = p->assertions;
2855 initial[i].state = NULL;
2857 tnfa->num_transitions = add;
2858 tnfa->final = transitions + offs[tree->lastpos[0].position];
2859 tnfa->num_states = parse_ctx.position;
2860 tnfa->cflags = cflags;
2862 tre_mem_destroy(mem);
2863 tre_stack_destroy(stack);
2867 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2871 /* Free everything that was allocated and return the error code. */
2872 tre_mem_destroy(mem);
2874 tre_stack_destroy(stack);
2879 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2888 regfree(regex_t *preg)
2892 tre_tnfa_transition_t *trans;
2894 tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2898 for (i = 0; i < tnfa->num_transitions; i++)
2899 if (tnfa->transitions[i].state)
2901 if (tnfa->transitions[i].tags)
2902 xfree(tnfa->transitions[i].tags);
2903 if (tnfa->transitions[i].neg_classes)
2904 xfree(tnfa->transitions[i].neg_classes);
2906 if (tnfa->transitions)
2907 xfree(tnfa->transitions);
2911 for (trans = tnfa->initial; trans->state; trans++)
2916 xfree(tnfa->initial);
2919 if (tnfa->submatch_data)
2921 for (i = 0; i < tnfa->num_submatches; i++)
2922 if (tnfa->submatch_data[i].parents)
2923 xfree(tnfa->submatch_data[i].parents);
2924 xfree(tnfa->submatch_data);
2927 if (tnfa->tag_directions)
2928 xfree(tnfa->tag_directions);
2929 if (tnfa->firstpos_chars)
2930 xfree(tnfa->firstpos_chars);
2931 if (tnfa->minimal_tags)
2932 xfree(tnfa->minimal_tags);