2 regcomp.c - TRE POSIX compatible regex compilation functions.
4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 /***********************************************************************
45 ***********************************************************************/
54 tre_ctype_t *neg_classes;
59 /***********************************************************************
60 from tre-ast.c and tre-ast.h
61 ***********************************************************************/
63 /* The different AST node types. */
71 /* Special subtypes of TRE_LITERAL. */
72 #define EMPTY -1 /* Empty leaf (denotes empty string). */
73 #define ASSERTION -2 /* Assertion leaf. */
74 #define TAG -3 /* Tag leaf. */
75 #define BACKREF -4 /* Back reference leaf. */
77 #define IS_SPECIAL(x) ((x)->code_min < 0)
78 #define IS_EMPTY(x) ((x)->code_min == EMPTY)
79 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
80 #define IS_TAG(x) ((x)->code_min == TAG)
81 #define IS_BACKREF(x) ((x)->code_min == BACKREF)
84 /* A generic AST node. All AST nodes consist of this node on the top
85 level with `obj' pointing to the actual content. */
87 tre_ast_type_t type; /* Type of the node. */
88 void *obj; /* Pointer to actual node. */
93 tre_pos_and_tags_t *firstpos;
94 tre_pos_and_tags_t *lastpos;
98 /* A "literal" node. These are created for assertions, back references,
99 tags, matching parameter settings, and all expressions that match one
106 tre_ctype_t *neg_classes;
109 /* A "catenation" node. These are created when two regexps are concatenated.
110 If there are more than one subexpressions in sequence, the `left' part
111 holds all but the last, and `right' part holds the last subexpression
112 (catenation is left associative). */
114 tre_ast_node_t *left;
115 tre_ast_node_t *right;
118 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
121 /* Subexpression to match. */
123 /* Minimum number of consecutive matches. */
125 /* Maximum number of consecutive matches. */
127 /* If 0, match as many characters as possible, if 1 match as few as
128 possible. Note that this does not always mean the same thing as
129 matching as many/few repetitions as possible. */
130 unsigned int minimal:1;
133 /* An "union" node. These are created for the "|" operator. */
135 tre_ast_node_t *left;
136 tre_ast_node_t *right;
140 static tre_ast_node_t *
141 tre_ast_new_node(tre_mem_t mem, int type, void *obj)
143 tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node);
149 node->submatch_id = -1;
153 static tre_ast_node_t *
154 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
156 tre_ast_node_t *node;
159 lit = tre_mem_calloc(mem, sizeof *lit);
160 node = tre_ast_new_node(mem, LITERAL, lit);
163 lit->code_min = code_min;
164 lit->code_max = code_max;
165 lit->position = position;
169 static tre_ast_node_t *
170 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal)
172 tre_ast_node_t *node;
173 tre_iteration_t *iter;
175 iter = tre_mem_calloc(mem, sizeof *iter);
176 node = tre_ast_new_node(mem, ITERATION, iter);
182 iter->minimal = minimal;
183 node->num_submatches = arg->num_submatches;
187 static tre_ast_node_t *
188 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
190 tre_ast_node_t *node;
195 un = tre_mem_calloc(mem, sizeof *un);
196 node = tre_ast_new_node(mem, UNION, un);
201 node->num_submatches = left->num_submatches + right->num_submatches;
205 static tre_ast_node_t *
206 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
208 tre_ast_node_t *node;
209 tre_catenation_t *cat;
213 cat = tre_mem_calloc(mem, sizeof *cat);
214 node = tre_ast_new_node(mem, CATENATION, cat);
219 node->num_submatches = left->num_submatches + right->num_submatches;
224 /***********************************************************************
225 from tre-stack.c and tre-stack.h
226 ***********************************************************************/
228 typedef struct tre_stack_rec tre_stack_t;
230 /* Creates a new stack object. `size' is initial size in bytes, `max_size'
231 is maximum size, and `increment' specifies how much more space will be
232 allocated with realloc() if all space gets used up. Returns the stack
233 object or NULL if out of memory. */
235 tre_stack_new(int size, int max_size, int increment);
237 /* Frees the stack object. */
239 tre_stack_destroy(tre_stack_t *s);
241 /* Returns the current number of objects in the stack. */
243 tre_stack_num_objects(tre_stack_t *s);
245 /* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
246 `value' on top of stack `s'. Returns REG_ESPACE if out of memory.
247 This tries to realloc() more space before failing if maximum size
248 has not yet been reached. Returns REG_OK if successful. */
249 #define declare_pushf(typetag, type) \
250 static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
252 declare_pushf(voidptr, void *);
253 declare_pushf(int, int);
255 /* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
256 element off of stack `s' and returns it. The stack must not be
258 #define declare_popf(typetag, type) \
259 static type tre_stack_pop_ ## typetag(tre_stack_t *s)
261 declare_popf(voidptr, void *);
262 declare_popf(int, int);
264 /* Just to save some typing. */
265 #define STACK_PUSH(s, typetag, value) \
268 status = tre_stack_push_ ## typetag(s, value); \
270 while (/*CONSTCOND*/0)
272 #define STACK_PUSHX(s, typetag, value) \
274 status = tre_stack_push_ ## typetag(s, value); \
275 if (status != REG_OK) \
279 #define STACK_PUSHR(s, typetag, value) \
281 reg_errcode_t _status; \
282 _status = tre_stack_push_ ## typetag(s, value); \
283 if (_status != REG_OK) \
287 union tre_stack_item {
292 struct tre_stack_rec {
297 union tre_stack_item *stack;
302 tre_stack_new(int size, int max_size, int increment)
306 s = xmalloc(sizeof(*s));
309 s->stack = xmalloc(sizeof(*s->stack) * size);
310 if (s->stack == NULL)
316 s->max_size = max_size;
317 s->increment = increment;
324 tre_stack_destroy(tre_stack_t *s)
331 tre_stack_num_objects(tre_stack_t *s)
337 tre_stack_push(tre_stack_t *s, union tre_stack_item value)
339 if (s->ptr < s->size)
341 s->stack[s->ptr] = value;
346 if (s->size >= s->max_size)
352 union tre_stack_item *new_buffer;
354 new_size = s->size + s->increment;
355 if (new_size > s->max_size)
356 new_size = s->max_size;
357 new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
358 if (new_buffer == NULL)
362 assert(new_size > s->size);
364 s->stack = new_buffer;
365 tre_stack_push(s, value);
371 #define define_pushf(typetag, type) \
372 declare_pushf(typetag, type) { \
373 union tre_stack_item item; \
374 item.typetag ## _value = value; \
375 return tre_stack_push(s, item); \
378 define_pushf(int, int)
379 define_pushf(voidptr, void *)
381 #define define_popf(typetag, type) \
382 declare_popf(typetag, type) { \
383 return s->stack[--s->ptr].typetag ## _value; \
386 define_popf(int, int)
387 define_popf(voidptr, void *)
390 /***********************************************************************
391 from tre-parse.c and tre-parse.h
392 ***********************************************************************/
396 /* Memory allocator. The AST is allocated using this. */
398 /* Stack used for keeping track of regexp syntax. */
400 /* The parsed node after a parse function returns. */
402 /* Position in the regexp pattern after a parse function returns. */
404 /* The first character of the regexp. */
406 /* Current submatch ID. */
408 /* Current position (number of literal). */
410 /* The highest back reference or -1 if none seen so far. */
412 /* Compilation flags. */
416 /* Some macros for expanding \w, \s, etc. */
417 static const struct {
419 const char *expansion;
421 {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
422 {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
423 {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
424 {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
428 /* Expands a macro delimited by `regex' and `regex_end' to `buf', which
429 must have at least `len' items. Sets buf[0] to zero if the there
430 is no match in `tre_macros'. */
431 static const char *tre_expand_macro(const char *s)
434 for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);
435 return tre_macros[i].expansion;
439 tre_compare_lit(const void *a, const void *b)
441 const tre_literal_t *const *la = a;
442 const tre_literal_t *const *lb = b;
443 /* assumes the range of valid code_min is < INT_MAX */
444 return la[0]->code_min - lb[0]->code_min;
454 static tre_literal_t *tre_new_lit(struct literals *p)
457 if (p->len >= p->cap) {
461 a = xrealloc(p->a, p->cap * sizeof *p->a);
467 *a = tre_mem_calloc(p->mem, sizeof **a);
471 static int add_icase_literals(struct literals *ls, int min, int max)
475 for (c=min; c<=max; ) {
476 /* assumes islower(c) and isupper(c) are exclusive
477 and toupper(c)!=c if islower(c).
478 multiple opposite case characters are not supported */
479 if (tre_islower(c)) {
480 b = e = tre_toupper(c);
481 for (c++, e++; c<=max; c++, e++)
482 if (tre_toupper(c) != e) break;
483 } else if (tre_isupper(c)) {
484 b = e = tre_tolower(c);
485 for (c++, e++; c<=max; c++, e++)
486 if (tre_tolower(c) != e) break;
491 lit = tre_new_lit(ls);
502 /* Maximum number of character classes in a negated bracket expression. */
503 #define MAX_NEG_CLASSES 64
508 tre_ctype_t a[MAX_NEG_CLASSES];
511 // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
515 Bracket = '[' List ']' | '[^' List ']'
516 List = Term | List Term
517 Term = Char | Range | Chclass | Eqclass
518 Range = Char '-' Char | Char '-' '-'
519 Char = Coll | coll_single
521 Coll = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]'
522 Eqclass = '[=' coll_single '=]' | '[=' coll_multi '=]'
523 Chclass = '[:' class ':]'
525 coll_single is a single char collating element but it can be
526 '-' only at the beginning or end of a List and
527 ']' only at the beginning of a List and
528 '^' anywhere except after the openning '['
531 static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, struct literals *ls, struct neg *neg)
533 const char *start = s;
541 len = mbtowc(&wc, s, -1);
543 return *s ? REG_BADPAT : REG_EBRACK;
544 if (*s == ']' && s != start) {
548 if (*s == '-' && s != start && s[1] != ']' &&
549 /* extension: [a-z--@] is accepted as [a-z]|[--@] */
550 (s[1] != '-' || s[2] == ']'))
552 if (*s == '[' && (s[1] == '.' || s[1] == '='))
553 /* collating symbols and equivalence classes are not supported */
555 if (*s == '[' && s[1] == ':') {
556 char tmp[CHARCLASS_NAME_MAX+1];
558 for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) {
562 class = tre_ctype(tmp);
566 if (!class || s[len+1] != ']')
574 if (*s == '-' && s[1] != ']') {
576 len = mbtowc(&wc, s, -1);
578 /* XXX - Should use collation order instead of
579 encoding values in character ranges. */
580 if (len <= 0 || min > max)
586 if (class && neg->negate) {
587 if (neg->len >= MAX_NEG_CLASSES)
589 neg->a[neg->len++] = class;
591 tre_literal_t *lit = tre_new_lit(ls);
599 /* Add opposite-case codepoints if REG_ICASE is present.
600 It seems that POSIX requires that bracket negation
601 should happen before case-folding, but most practical
602 implementations do it the other way around. Changing
603 the order would need efficient representation of
604 case-fold ranges and bracket range sets even with
605 simple patterns so this is ok for now. */
606 if (ctx->cflags & REG_ICASE && !class)
607 if (add_icase_literals(ls, min, max))
613 static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s)
615 int i, max, min, negmax, negmin;
616 tre_ast_node_t *node = 0, *n;
626 ls.a = xmalloc(ls.cap * sizeof *ls.a);
630 neg.negate = *s == '^';
634 err = parse_bracket_terms(ctx, s, &ls, &neg);
636 goto parse_bracket_done;
639 /* Sort the array if we need to negate it. */
640 qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
641 /* extra lit for the last negated range */
642 lit = tre_new_lit(&ls);
645 goto parse_bracket_done;
647 lit->code_min = TRE_CHAR_MAX+1;
648 lit->code_max = TRE_CHAR_MAX+1;
650 /* negated classes */
652 nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a);
655 goto parse_bracket_done;
657 memcpy(nc, neg.a, neg.len*sizeof *neg.a);
662 /* Build a union of the items in the array, negated if necessary. */
664 for (i = 0; i < ls.len; i++) {
671 negmin = MAX(max + 1, negmin);
675 lit->code_min = negmin;
676 lit->code_max = negmax;
679 lit->position = ctx->position;
680 lit->neg_classes = nc;
681 n = tre_ast_new_node(ctx->mem, LITERAL, lit);
682 node = tre_ast_new_union(ctx->mem, node, n);
696 static const char *parse_dup_count(const char *s, int *n)
703 *n = 10 * *n + (*s - '0');
705 if (!isdigit(*s) || *n > RE_DUP_MAX)
711 static reg_errcode_t parse_dup(tre_parse_ctx_t *ctx, const char *s)
715 s = parse_dup_count(s, &min);
717 s = parse_dup_count(s+1, &max);
722 (max < min && max >= 0) ||
726 (!(ctx->cflags & REG_EXTENDED) && *s++ != '\\') ||
731 if (min == 0 && max == 0)
732 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
734 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
741 static int hexval(unsigned c)
743 if (c-'0'<10) return c-'0';
745 if (c-'a'<6) return c-'a'+10;
749 static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int subid)
751 if (node->submatch_id >= 0) {
752 tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
755 n = tre_ast_new_catenation(ctx->mem, n, node);
758 n->num_submatches = node->num_submatches;
761 node->submatch_id = subid;
762 node->num_submatches++;
769 Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^' Branch '$'
770 Branch = Atom | Branch Atom
771 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch '\)' | back_ref
772 Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count '\}'
774 (leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
777 Regex = Branch | Regex '|' Branch
778 Branch = Atom | Branch Atom
779 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex ')' | '^' | '$'
780 Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count ',' Count '}'
782 (a*+?, ^*, $+, \X, {, (|a) are unspecified)
785 static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
787 int len, ere = ctx->cflags & REG_EXTENDED;
789 tre_ast_node_t *node;
793 return parse_bracket(ctx, s+1);
795 p = tre_expand_macro(s+1);
797 /* assume \X expansion is a single atom */
798 reg_errcode_t err = parse_atom(ctx, p);
802 /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
807 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
810 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
813 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
816 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
826 for (i=0; i<len && v<0x110000; i++) {
837 node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
841 /* reject repetitions after empty expression in BRE */
845 /* extension: treat \| as alternation in BRE */
847 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
853 if (!ere && (unsigned)*s-'1' < 9) {
856 node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++);
857 ctx->max_backref = MAX(val, ctx->max_backref);
859 /* extension: accept unknown escaped char
867 if (ctx->cflags & REG_NEWLINE) {
868 tre_ast_node_t *tmp1, *tmp2;
869 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++);
870 tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX, ctx->position++);
872 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
876 node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++);
881 /* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
882 if (!ere && s != ctx->re)
884 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
888 /* '$' is special everywhere in EREs, and in the end of the string in BREs. */
891 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
899 /* reject repetitions after empty expression in ERE */
906 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
910 len = mbtowc(&wc, s, -1);
913 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) {
914 tre_ast_node_t *tmp1, *tmp2;
915 /* multiple opposite case characters are not supported */
916 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc), ctx->position);
917 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc), ctx->position);
919 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
923 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
937 #define PUSHPTR(err, s, v) do { \
938 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \
942 #define PUSHINT(err, s, v) do { \
943 if ((err = tre_stack_push_int(s, v)) != REG_OK) \
947 static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
949 tre_ast_node_t *nbranch=0, *nunion=0;
950 int ere = ctx->cflags & REG_EXTENDED;
951 const char *s = ctx->re;
955 tre_stack_t *stack = ctx->stack;
957 PUSHINT(err, stack, subid++);
959 if ((!ere && *s == '\\' && s[1] == '(') ||
960 (ere && *s == '(')) {
961 PUSHPTR(err, stack, nunion);
962 PUSHPTR(err, stack, nbranch);
963 PUSHINT(err, stack, subid++);
968 nbranch = nunion = 0;
971 if ((!ere && *s == '\\' && s[1] == ')') ||
972 (ere && *s == ')' && depth)) {
973 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
977 err = parse_atom(ctx, s);
984 /* extension: repetitions are rejected after an empty node
985 eg. (+), |*, {2}, but assertions are not treated as empty
986 so ^* or $? are accepted currently. */
988 if (*s!='\\' && *s!='*') {
991 if (*s!='+' && *s!='?' && *s!='{')
996 if (*s=='\\' && s[1]!='{')
1001 /* extension: multiple consecutive *+?{,} is unspecified,
1002 but (a+)+ has to be supported so accepting a++ makes
1003 sense, note however that the RE_DUP_MAX limit can be
1004 circumvented: (a{255}){255} uses a lot of memory.. */
1006 err = parse_dup(ctx, s+1);
1017 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
1023 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
1024 if ((ere && *s == '|') ||
1025 (ere && *s == ')' && depth) ||
1026 (!ere && *s == '\\' && s[1] == ')') ||
1027 /* extension: treat \| as alternation in BRE */
1028 (!ere && *s == '\\' && s[1] == '|') ||
1030 /* extension: empty branch is unspecified (), (|a), (a|)
1031 here they are not rejected but match on empty string */
1033 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
1036 if (c == '\\' && s[1] == '|') {
1038 } else if (c == '|') {
1042 if (!depth) return REG_EPAREN;
1044 } else if (c == ')')
1047 err = marksub(ctx, nunion, tre_stack_pop_int(stack));
1050 if (!c && depth<0) {
1051 ctx->submatch_id = subid;
1056 nbranch = tre_stack_pop_voidptr(stack);
1057 nunion = tre_stack_pop_voidptr(stack);
1065 /***********************************************************************
1067 ***********************************************************************/
1072 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1077 Algorithms to setup tags so that submatch addressing can be done.
1081 /* Inserts a catenation node to the root of the tree given in `node'.
1082 As the left child a new tag with number `tag_id' to `node' is added,
1083 and the right child is the old root. */
1084 static reg_errcode_t
1085 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1087 tre_catenation_t *c;
1089 c = tre_mem_alloc(mem, sizeof(*c));
1092 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
1093 if (c->left == NULL)
1095 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1096 if (c->right == NULL)
1099 c->right->obj = node->obj;
1100 c->right->type = node->type;
1101 c->right->nullable = -1;
1102 c->right->submatch_id = -1;
1103 c->right->firstpos = NULL;
1104 c->right->lastpos = NULL;
1105 c->right->num_tags = 0;
1107 node->type = CATENATION;
1111 /* Inserts a catenation node to the root of the tree given in `node'.
1112 As the right child a new tag with number `tag_id' to `node' is added,
1113 and the left child is the old root. */
1114 static reg_errcode_t
1115 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1117 tre_catenation_t *c;
1119 c = tre_mem_alloc(mem, sizeof(*c));
1122 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
1123 if (c->right == NULL)
1125 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1126 if (c->left == NULL)
1129 c->left->obj = node->obj;
1130 c->left->type = node->type;
1131 c->left->nullable = -1;
1132 c->left->submatch_id = -1;
1133 c->left->firstpos = NULL;
1134 c->left->lastpos = NULL;
1135 c->left->num_tags = 0;
1137 node->type = CATENATION;
1143 ADDTAGS_AFTER_ITERATION,
1144 ADDTAGS_AFTER_UNION_LEFT,
1145 ADDTAGS_AFTER_UNION_RIGHT,
1146 ADDTAGS_AFTER_CAT_LEFT,
1147 ADDTAGS_AFTER_CAT_RIGHT,
1148 ADDTAGS_SET_SUBMATCH_END
1149 } tre_addtags_symbol_t;
1158 /* Go through `regset' and set submatch data for submatches that are
1161 tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
1165 for (i = 0; regset[i] >= 0; i++)
1167 int id = regset[i] / 2;
1168 int start = !(regset[i] % 2);
1170 tnfa->submatch_data[id].so_tag = tag;
1172 tnfa->submatch_data[id].eo_tag = tag;
1178 /* Adds tags to appropriate locations in the parse tree in `tree', so that
1179 subexpressions marked for submatch addressing can be traced. */
1180 static reg_errcode_t
1181 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
1184 reg_errcode_t status = REG_OK;
1185 tre_addtags_symbol_t symbol;
1186 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
1187 int bottom = tre_stack_num_objects(stack);
1188 /* True for first pass (counting number of needed tags) */
1189 int first_pass = (mem == NULL || tnfa == NULL);
1190 int *regset, *orig_regset;
1191 int num_tags = 0; /* Total number of tags. */
1192 int num_minimals = 0; /* Number of special minimal tags. */
1193 int tag = 0; /* The tag that is to be added next. */
1194 int next_tag = 1; /* Next tag to use after this one. */
1195 int *parents; /* Stack of submatches the current submatch is
1197 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
1198 tre_tag_states_t *saved_states;
1200 tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
1204 tnfa->minimal_tags[0] = -1;
1207 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
1211 orig_regset = regset;
1213 parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
1214 if (parents == NULL)
1221 saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
1222 if (saved_states == NULL)
1231 for (i = 0; i <= tnfa->num_submatches; i++)
1232 saved_states[i].tag = -1;
1235 STACK_PUSH(stack, voidptr, node);
1236 STACK_PUSH(stack, int, ADDTAGS_RECURSE);
1238 while (tre_stack_num_objects(stack) > bottom)
1240 if (status != REG_OK)
1243 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
1247 case ADDTAGS_SET_SUBMATCH_END:
1249 int id = tre_stack_pop_int(stack);
1252 /* Add end of this submatch to regset. */
1253 for (i = 0; regset[i] >= 0; i++);
1254 regset[i] = id * 2 + 1;
1257 /* Pop this submatch from the parents stack. */
1258 for (i = 0; parents[i] >= 0; i++);
1259 parents[i - 1] = -1;
1263 case ADDTAGS_RECURSE:
1264 node = tre_stack_pop_voidptr(stack);
1266 if (node->submatch_id >= 0)
1268 int id = node->submatch_id;
1272 /* Add start of this submatch to regset. */
1273 for (i = 0; regset[i] >= 0; i++);
1279 for (i = 0; parents[i] >= 0; i++);
1280 tnfa->submatch_data[id].parents = NULL;
1283 int *p = xmalloc(sizeof(*p) * (i + 1));
1286 status = REG_ESPACE;
1289 assert(tnfa->submatch_data[id].parents == NULL);
1290 tnfa->submatch_data[id].parents = p;
1291 for (i = 0; parents[i] >= 0; i++)
1297 /* Add end of this submatch to regset after processing this
1299 STACK_PUSHX(stack, int, node->submatch_id);
1300 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
1307 tre_literal_t *lit = node->obj;
1309 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1314 /* Regset is not empty, so add a tag before the
1315 literal or backref. */
1318 status = tre_add_tag_left(mem, node, tag);
1319 tnfa->tag_directions[tag] = direction;
1320 if (minimal_tag >= 0)
1322 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1323 tnfa->minimal_tags[i] = tag;
1324 tnfa->minimal_tags[i + 1] = minimal_tag;
1325 tnfa->minimal_tags[i + 2] = -1;
1329 tre_purge_regset(regset, tnfa, tag);
1344 assert(!IS_TAG(lit));
1350 tre_catenation_t *cat = node->obj;
1351 tre_ast_node_t *left = cat->left;
1352 tre_ast_node_t *right = cat->right;
1353 int reserved_tag = -1;
1356 /* After processing right child. */
1357 STACK_PUSHX(stack, voidptr, node);
1358 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
1360 /* Process right child. */
1361 STACK_PUSHX(stack, voidptr, right);
1362 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1364 /* After processing left child. */
1365 STACK_PUSHX(stack, int, next_tag + left->num_tags);
1366 if (left->num_tags > 0 && right->num_tags > 0)
1368 /* Reserve the next tag to the right child. */
1369 reserved_tag = next_tag;
1372 STACK_PUSHX(stack, int, reserved_tag);
1373 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
1375 /* Process left child. */
1376 STACK_PUSHX(stack, voidptr, left);
1377 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1383 tre_iteration_t *iter = node->obj;
1387 STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
1391 STACK_PUSHX(stack, int, tag);
1392 STACK_PUSHX(stack, int, iter->minimal);
1394 STACK_PUSHX(stack, voidptr, node);
1395 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
1397 STACK_PUSHX(stack, voidptr, iter->arg);
1398 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1400 /* Regset is not empty, so add a tag here. */
1401 if (regset[0] >= 0 || iter->minimal)
1406 status = tre_add_tag_left(mem, node, tag);
1408 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
1410 tnfa->tag_directions[tag] = direction;
1411 if (minimal_tag >= 0)
1413 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1414 tnfa->minimal_tags[i] = tag;
1415 tnfa->minimal_tags[i + 1] = minimal_tag;
1416 tnfa->minimal_tags[i + 2] = -1;
1420 tre_purge_regset(regset, tnfa, tag);
1428 direction = TRE_TAG_MINIMIZE;
1433 tre_union_t *uni = node->obj;
1434 tre_ast_node_t *left = uni->left;
1435 tre_ast_node_t *right = uni->right;
1441 left_tag = next_tag;
1442 right_tag = next_tag + 1;
1447 right_tag = next_tag;
1450 /* After processing right child. */
1451 STACK_PUSHX(stack, int, right_tag);
1452 STACK_PUSHX(stack, int, left_tag);
1453 STACK_PUSHX(stack, voidptr, regset);
1454 STACK_PUSHX(stack, int, regset[0] >= 0);
1455 STACK_PUSHX(stack, voidptr, node);
1456 STACK_PUSHX(stack, voidptr, right);
1457 STACK_PUSHX(stack, voidptr, left);
1458 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
1460 /* Process right child. */
1461 STACK_PUSHX(stack, voidptr, right);
1462 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1464 /* After processing left child. */
1465 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
1467 /* Process left child. */
1468 STACK_PUSHX(stack, voidptr, left);
1469 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1471 /* Regset is not empty, so add a tag here. */
1477 status = tre_add_tag_left(mem, node, tag);
1478 tnfa->tag_directions[tag] = direction;
1479 if (minimal_tag >= 0)
1481 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1482 tnfa->minimal_tags[i] = tag;
1483 tnfa->minimal_tags[i + 1] = minimal_tag;
1484 tnfa->minimal_tags[i + 2] = -1;
1488 tre_purge_regset(regset, tnfa, tag);
1497 if (node->num_submatches > 0)
1499 /* The next two tags are reserved for markers. */
1509 if (node->submatch_id >= 0)
1512 /* Push this submatch on the parents stack. */
1513 for (i = 0; parents[i] >= 0; i++);
1514 parents[i] = node->submatch_id;
1515 parents[i + 1] = -1;
1518 break; /* end case: ADDTAGS_RECURSE */
1520 case ADDTAGS_AFTER_ITERATION:
1524 node = tre_stack_pop_voidptr(stack);
1527 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
1528 + tre_stack_pop_int(stack);
1533 minimal = tre_stack_pop_int(stack);
1534 enter_tag = tre_stack_pop_int(stack);
1536 minimal_tag = enter_tag;
1542 direction = TRE_TAG_MINIMIZE;
1544 direction = TRE_TAG_MAXIMIZE;
1549 case ADDTAGS_AFTER_CAT_LEFT:
1551 int new_tag = tre_stack_pop_int(stack);
1552 next_tag = tre_stack_pop_int(stack);
1560 case ADDTAGS_AFTER_CAT_RIGHT:
1561 node = tre_stack_pop_voidptr(stack);
1563 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
1564 + ((tre_catenation_t *)node->obj)->right->num_tags;
1567 case ADDTAGS_AFTER_UNION_LEFT:
1568 /* Lift the bottom of the `regset' array so that when processing
1569 the right operand the items currently in the array are
1570 invisible. The original bottom was saved at ADDTAGS_UNION and
1571 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1572 while (*regset >= 0)
1576 case ADDTAGS_AFTER_UNION_RIGHT:
1578 int added_tags, tag_left, tag_right;
1579 tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
1580 tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
1581 node = tre_stack_pop_voidptr(stack);
1582 added_tags = tre_stack_pop_int(stack);
1585 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
1586 + ((tre_union_t *)node->obj)->right->num_tags + added_tags
1587 + ((node->num_submatches > 0) ? 2 : 0);
1589 regset = tre_stack_pop_voidptr(stack);
1590 tag_left = tre_stack_pop_int(stack);
1591 tag_right = tre_stack_pop_int(stack);
1593 /* Add tags after both children, the left child gets a smaller
1594 tag than the right child. This guarantees that we prefer
1595 the left child over the right child. */
1596 /* XXX - This is not always necessary (if the children have
1597 tags which must be seen for every match of that child). */
1598 /* XXX - Check if this is the only place where tre_add_tag_right
1599 is used. If so, use tre_add_tag_left (putting the tag before
1600 the child as opposed after the child) and throw away
1601 tre_add_tag_right. */
1602 if (node->num_submatches > 0)
1606 status = tre_add_tag_right(mem, left, tag_left);
1607 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1608 if (status == REG_OK)
1609 status = tre_add_tag_right(mem, right, tag_right);
1610 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1614 direction = TRE_TAG_MAXIMIZE;
1622 } /* end switch(symbol) */
1623 } /* end while(tre_stack_num_objects(stack) > bottom) */
1626 tre_purge_regset(regset, tnfa, tag);
1628 if (!first_pass && minimal_tag >= 0)
1631 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1632 tnfa->minimal_tags[i] = tag;
1633 tnfa->minimal_tags[i + 1] = minimal_tag;
1634 tnfa->minimal_tags[i + 2] = -1;
1639 assert(tree->num_tags == num_tags);
1640 tnfa->end_tag = num_tags;
1641 tnfa->num_tags = num_tags;
1642 tnfa->num_minimals = num_minimals;
1645 xfree(saved_states);
1652 AST to TNFA compilation routines.
1658 } tre_copyast_symbol_t;
1660 /* Flags for tre_copy_ast(). */
1661 #define COPY_REMOVE_TAGS 1
1662 #define COPY_MAXIMIZE_FIRST_TAG 2
1664 static reg_errcode_t
1665 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1666 int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1667 tre_ast_node_t **copy, int *max_pos)
1669 reg_errcode_t status = REG_OK;
1670 int bottom = tre_stack_num_objects(stack);
1673 tre_ast_node_t **result = copy;
1674 tre_copyast_symbol_t symbol;
1676 STACK_PUSH(stack, voidptr, ast);
1677 STACK_PUSH(stack, int, COPY_RECURSE);
1679 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1681 tre_ast_node_t *node;
1682 if (status != REG_OK)
1685 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1688 case COPY_SET_RESULT_PTR:
1689 result = tre_stack_pop_voidptr(stack);
1692 node = tre_stack_pop_voidptr(stack);
1697 tre_literal_t *lit = node->obj;
1698 int pos = lit->position;
1699 int min = lit->code_min;
1700 int max = lit->code_max;
1701 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1703 /* XXX - e.g. [ab] has only one position but two
1704 nodes, so we are creating holes in the state space
1705 here. Not fatal, just wastes memory. */
1709 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1711 /* Change this tag to empty. */
1715 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1718 /* Maximize the first tag. */
1719 tag_directions[max] = TRE_TAG_MAXIMIZE;
1722 *result = tre_ast_new_literal(mem, min, max, pos);
1723 if (*result == NULL)
1724 status = REG_ESPACE;
1726 tre_literal_t *p = (*result)->obj;
1727 p->class = lit->class;
1728 p->neg_classes = lit->neg_classes;
1737 tre_union_t *uni = node->obj;
1739 *result = tre_ast_new_union(mem, uni->left, uni->right);
1740 if (*result == NULL)
1742 status = REG_ESPACE;
1745 tmp = (*result)->obj;
1746 result = &tmp->left;
1747 STACK_PUSHX(stack, voidptr, uni->right);
1748 STACK_PUSHX(stack, int, COPY_RECURSE);
1749 STACK_PUSHX(stack, voidptr, &tmp->right);
1750 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1751 STACK_PUSHX(stack, voidptr, uni->left);
1752 STACK_PUSHX(stack, int, COPY_RECURSE);
1757 tre_catenation_t *cat = node->obj;
1758 tre_catenation_t *tmp;
1759 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1760 if (*result == NULL)
1762 status = REG_ESPACE;
1765 tmp = (*result)->obj;
1768 result = &tmp->left;
1770 STACK_PUSHX(stack, voidptr, cat->right);
1771 STACK_PUSHX(stack, int, COPY_RECURSE);
1772 STACK_PUSHX(stack, voidptr, &tmp->right);
1773 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1774 STACK_PUSHX(stack, voidptr, cat->left);
1775 STACK_PUSHX(stack, int, COPY_RECURSE);
1780 tre_iteration_t *iter = node->obj;
1781 STACK_PUSHX(stack, voidptr, iter->arg);
1782 STACK_PUSHX(stack, int, COPY_RECURSE);
1783 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1784 iter->max, iter->minimal);
1785 if (*result == NULL)
1787 status = REG_ESPACE;
1790 iter = (*result)->obj;
1791 result = &iter->arg;
1801 *pos_add += num_copied;
1808 } tre_expand_ast_symbol_t;
1810 /* Expands each iteration node that has a finite nonzero minimum or maximum
1811 iteration count to a catenated sequence of copies of the node. */
1812 static reg_errcode_t
1813 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1814 int *position, tre_tag_direction_t *tag_directions)
1816 reg_errcode_t status = REG_OK;
1817 int bottom = tre_stack_num_objects(stack);
1819 int pos_add_total = 0;
1823 STACK_PUSHR(stack, voidptr, ast);
1824 STACK_PUSHR(stack, int, EXPAND_RECURSE);
1825 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1827 tre_ast_node_t *node;
1828 tre_expand_ast_symbol_t symbol;
1830 if (status != REG_OK)
1833 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1834 node = tre_stack_pop_voidptr(stack);
1837 case EXPAND_RECURSE:
1842 tre_literal_t *lit= node->obj;
1843 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1845 lit->position += pos_add;
1846 if (lit->position > max_pos)
1847 max_pos = lit->position;
1853 tre_union_t *uni = node->obj;
1854 STACK_PUSHX(stack, voidptr, uni->right);
1855 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1856 STACK_PUSHX(stack, voidptr, uni->left);
1857 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1862 tre_catenation_t *cat = node->obj;
1863 STACK_PUSHX(stack, voidptr, cat->right);
1864 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1865 STACK_PUSHX(stack, voidptr, cat->left);
1866 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1871 tre_iteration_t *iter = node->obj;
1872 STACK_PUSHX(stack, int, pos_add);
1873 STACK_PUSHX(stack, voidptr, node);
1874 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1875 STACK_PUSHX(stack, voidptr, iter->arg);
1876 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1877 /* If we are going to expand this node at EXPAND_AFTER_ITER
1878 then don't increase the `pos' fields of the nodes now, it
1879 will get done when expanding. */
1880 if (iter->min > 1 || iter->max > 1)
1890 case EXPAND_AFTER_ITER:
1892 tre_iteration_t *iter = node->obj;
1894 pos_add = tre_stack_pop_int(stack);
1895 pos_add_last = pos_add;
1896 if (iter->min > 1 || iter->max > 1)
1898 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1900 int pos_add_save = pos_add;
1902 /* Create a catenated sequence of copies of the node. */
1903 for (j = 0; j < iter->min; j++)
1905 tre_ast_node_t *copy;
1906 /* Remove tags from all but the last copy. */
1907 int flags = ((j + 1 < iter->min)
1909 : COPY_MAXIMIZE_FIRST_TAG);
1910 pos_add_save = pos_add;
1911 status = tre_copy_ast(mem, stack, iter->arg, flags,
1912 &pos_add, tag_directions, ©,
1914 if (status != REG_OK)
1917 seq1 = tre_ast_new_catenation(mem, seq1, copy);
1924 if (iter->max == -1)
1926 /* No upper limit. */
1927 pos_add_save = pos_add;
1928 status = tre_copy_ast(mem, stack, iter->arg, 0,
1929 &pos_add, NULL, &seq2, &max_pos);
1930 if (status != REG_OK)
1932 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1938 for (j = iter->min; j < iter->max; j++)
1940 tre_ast_node_t *tmp, *copy;
1941 pos_add_save = pos_add;
1942 status = tre_copy_ast(mem, stack, iter->arg, 0,
1943 &pos_add, NULL, ©, &max_pos);
1944 if (status != REG_OK)
1947 seq2 = tre_ast_new_catenation(mem, copy, seq2);
1952 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1955 seq2 = tre_ast_new_union(mem, tmp, seq2);
1961 pos_add = pos_add_save;
1964 else if (seq2 != NULL)
1965 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1968 node->obj = seq1->obj;
1969 node->type = seq1->type;
1973 pos_add_total += pos_add - pos_add_last;
1974 if (iter_depth == 0)
1975 pos_add = pos_add_total;
1985 *position += pos_add_total;
1987 /* `max_pos' should never be larger than `*position' if the above
1988 code works, but just an extra safeguard let's make sure
1989 `*position' is set large enough so enough memory will be
1990 allocated for the transition table. */
1991 if (max_pos > *position)
1992 *position = max_pos;
1997 static tre_pos_and_tags_t *
1998 tre_set_empty(tre_mem_t mem)
2000 tre_pos_and_tags_t *new_set;
2002 new_set = tre_mem_calloc(mem, sizeof(*new_set));
2003 if (new_set == NULL)
2006 new_set[0].position = -1;
2007 new_set[0].code_min = -1;
2008 new_set[0].code_max = -1;
2013 static tre_pos_and_tags_t *
2014 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2015 tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
2017 tre_pos_and_tags_t *new_set;
2019 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2020 if (new_set == NULL)
2023 new_set[0].position = position;
2024 new_set[0].code_min = code_min;
2025 new_set[0].code_max = code_max;
2026 new_set[0].class = class;
2027 new_set[0].neg_classes = neg_classes;
2028 new_set[0].backref = backref;
2029 new_set[1].position = -1;
2030 new_set[1].code_min = -1;
2031 new_set[1].code_max = -1;
2036 static tre_pos_and_tags_t *
2037 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2038 int *tags, int assertions)
2041 tre_pos_and_tags_t *new_set;
2045 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2046 for (s1 = 0; set1[s1].position >= 0; s1++);
2047 for (s2 = 0; set2[s2].position >= 0; s2++);
2048 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2052 for (s1 = 0; set1[s1].position >= 0; s1++)
2054 new_set[s1].position = set1[s1].position;
2055 new_set[s1].code_min = set1[s1].code_min;
2056 new_set[s1].code_max = set1[s1].code_max;
2057 new_set[s1].assertions = set1[s1].assertions | assertions;
2058 new_set[s1].class = set1[s1].class;
2059 new_set[s1].neg_classes = set1[s1].neg_classes;
2060 new_set[s1].backref = set1[s1].backref;
2061 if (set1[s1].tags == NULL && tags == NULL)
2062 new_set[s1].tags = NULL;
2065 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2066 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2067 * (i + num_tags + 1)));
2068 if (new_tags == NULL)
2070 for (j = 0; j < i; j++)
2071 new_tags[j] = set1[s1].tags[j];
2072 for (i = 0; i < num_tags; i++)
2073 new_tags[j + i] = tags[i];
2074 new_tags[j + i] = -1;
2075 new_set[s1].tags = new_tags;
2079 for (s2 = 0; set2[s2].position >= 0; s2++)
2081 new_set[s1 + s2].position = set2[s2].position;
2082 new_set[s1 + s2].code_min = set2[s2].code_min;
2083 new_set[s1 + s2].code_max = set2[s2].code_max;
2084 /* XXX - why not | assertions here as well? */
2085 new_set[s1 + s2].assertions = set2[s2].assertions;
2086 new_set[s1 + s2].class = set2[s2].class;
2087 new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
2088 new_set[s1 + s2].backref = set2[s2].backref;
2089 if (set2[s2].tags == NULL)
2090 new_set[s1 + s2].tags = NULL;
2093 for (i = 0; set2[s2].tags[i] >= 0; i++);
2094 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2095 if (new_tags == NULL)
2097 for (j = 0; j < i; j++)
2098 new_tags[j] = set2[s2].tags[j];
2100 new_set[s1 + s2].tags = new_tags;
2103 new_set[s1 + s2].position = -1;
2107 /* Finds the empty path through `node' which is the one that should be
2108 taken according to POSIX.2 rules, and adds the tags on that path to
2109 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2110 set to the number of tags seen on the path. */
2111 static reg_errcode_t
2112 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2113 int *assertions, int *num_tags_seen)
2117 tre_catenation_t *cat;
2118 tre_iteration_t *iter;
2120 int bottom = tre_stack_num_objects(stack);
2121 reg_errcode_t status = REG_OK;
2125 status = tre_stack_push_voidptr(stack, node);
2127 /* Walk through the tree recursively. */
2128 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2130 node = tre_stack_pop_voidptr(stack);
2135 lit = (tre_literal_t *)node->obj;
2136 switch (lit->code_min)
2139 if (lit->code_max >= 0)
2143 /* Add the tag to `tags'. */
2144 for (i = 0; tags[i] >= 0; i++)
2145 if (tags[i] == lit->code_max)
2149 tags[i] = lit->code_max;
2158 assert(lit->code_max >= 1
2159 || lit->code_max <= ASSERT_LAST);
2160 if (assertions != NULL)
2161 *assertions |= lit->code_max;
2172 /* Subexpressions starting earlier take priority over ones
2173 starting later, so we prefer the left subexpression over the
2174 right subexpression. */
2175 uni = (tre_union_t *)node->obj;
2176 if (uni->left->nullable)
2177 STACK_PUSHX(stack, voidptr, uni->left)
2178 else if (uni->right->nullable)
2179 STACK_PUSHX(stack, voidptr, uni->right)
2185 /* The path must go through both children. */
2186 cat = (tre_catenation_t *)node->obj;
2187 assert(cat->left->nullable);
2188 assert(cat->right->nullable);
2189 STACK_PUSHX(stack, voidptr, cat->left);
2190 STACK_PUSHX(stack, voidptr, cat->right);
2194 /* A match with an empty string is preferred over no match at
2195 all, so we go through the argument if possible. */
2196 iter = (tre_iteration_t *)node->obj;
2197 if (iter->arg->nullable)
2198 STACK_PUSHX(stack, voidptr, iter->arg);
2214 NFL_POST_CATENATION,
2216 } tre_nfl_stack_symbol_t;
2219 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2220 the nodes of the AST `tree'. */
2221 static reg_errcode_t
2222 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2224 int bottom = tre_stack_num_objects(stack);
2226 STACK_PUSHR(stack, voidptr, tree);
2227 STACK_PUSHR(stack, int, NFL_RECURSE);
2229 while (tre_stack_num_objects(stack) > bottom)
2231 tre_nfl_stack_symbol_t symbol;
2232 tre_ast_node_t *node;
2234 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2235 node = tre_stack_pop_voidptr(stack);
2243 tre_literal_t *lit = (tre_literal_t *)node->obj;
2244 if (IS_BACKREF(lit))
2246 /* Back references: nullable = false, firstpos = {i},
2249 node->firstpos = tre_set_one(mem, lit->position, 0,
2250 TRE_CHAR_MAX, 0, NULL, -1);
2251 if (!node->firstpos)
2253 node->lastpos = tre_set_one(mem, lit->position, 0,
2254 TRE_CHAR_MAX, 0, NULL,
2255 (int)lit->code_max);
2259 else if (lit->code_min < 0)
2261 /* Tags, empty strings, params, and zero width assertions:
2262 nullable = true, firstpos = {}, and lastpos = {}. */
2264 node->firstpos = tre_set_empty(mem);
2265 if (!node->firstpos)
2267 node->lastpos = tre_set_empty(mem);
2273 /* Literal at position i: nullable = false, firstpos = {i},
2277 tre_set_one(mem, lit->position, (int)lit->code_min,
2278 (int)lit->code_max, 0, NULL, -1);
2279 if (!node->firstpos)
2281 node->lastpos = tre_set_one(mem, lit->position,
2284 lit->class, lit->neg_classes,
2293 /* Compute the attributes for the two subtrees, and after that
2295 STACK_PUSHR(stack, voidptr, node);
2296 STACK_PUSHR(stack, int, NFL_POST_UNION);
2297 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2298 STACK_PUSHR(stack, int, NFL_RECURSE);
2299 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2300 STACK_PUSHR(stack, int, NFL_RECURSE);
2304 /* Compute the attributes for the two subtrees, and after that
2306 STACK_PUSHR(stack, voidptr, node);
2307 STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2308 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2309 STACK_PUSHR(stack, int, NFL_RECURSE);
2310 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2311 STACK_PUSHR(stack, int, NFL_RECURSE);
2315 /* Compute the attributes for the subtree, and after that for
2317 STACK_PUSHR(stack, voidptr, node);
2318 STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2319 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2320 STACK_PUSHR(stack, int, NFL_RECURSE);
2323 break; /* end case: NFL_RECURSE */
2325 case NFL_POST_UNION:
2327 tre_union_t *uni = (tre_union_t *)node->obj;
2328 node->nullable = uni->left->nullable || uni->right->nullable;
2329 node->firstpos = tre_set_union(mem, uni->left->firstpos,
2330 uni->right->firstpos, NULL, 0);
2331 if (!node->firstpos)
2333 node->lastpos = tre_set_union(mem, uni->left->lastpos,
2334 uni->right->lastpos, NULL, 0);
2340 case NFL_POST_ITERATION:
2342 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2344 if (iter->min == 0 || iter->arg->nullable)
2348 node->firstpos = iter->arg->firstpos;
2349 node->lastpos = iter->arg->lastpos;
2353 case NFL_POST_CATENATION:
2355 int num_tags, *tags, assertions;
2356 reg_errcode_t status;
2357 tre_catenation_t *cat = node->obj;
2358 node->nullable = cat->left->nullable && cat->right->nullable;
2360 /* Compute firstpos. */
2361 if (cat->left->nullable)
2363 /* The left side matches the empty string. Make a first pass
2364 with tre_match_empty() to get the number of tags and
2366 status = tre_match_empty(stack, cat->left,
2367 NULL, NULL, &num_tags);
2368 if (status != REG_OK)
2370 /* Allocate arrays for the tags and parameters. */
2371 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2376 /* Second pass with tre_mach_empty() to get the list of
2377 tags and parameters. */
2378 status = tre_match_empty(stack, cat->left, tags,
2380 if (status != REG_OK)
2386 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2389 if (!node->firstpos)
2394 node->firstpos = cat->left->firstpos;
2397 /* Compute lastpos. */
2398 if (cat->right->nullable)
2400 /* The right side matches the empty string. Make a first pass
2401 with tre_match_empty() to get the number of tags and
2403 status = tre_match_empty(stack, cat->right,
2404 NULL, NULL, &num_tags);
2405 if (status != REG_OK)
2407 /* Allocate arrays for the tags and parameters. */
2408 tags = xmalloc(sizeof(int) * (num_tags + 1));
2413 /* Second pass with tre_mach_empty() to get the list of
2414 tags and parameters. */
2415 status = tre_match_empty(stack, cat->right, tags,
2417 if (status != REG_OK)
2423 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2431 node->lastpos = cat->right->lastpos;
2446 /* Adds a transition from each position in `p1' to each position in `p2'. */
2447 static reg_errcode_t
2448 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2449 tre_tnfa_transition_t *transitions,
2450 int *counts, int *offs)
2452 tre_pos_and_tags_t *orig_p2 = p2;
2453 tre_tnfa_transition_t *trans;
2454 int i, j, k, l, dup, prev_p2_pos;
2456 if (transitions != NULL)
2457 while (p1->position >= 0)
2461 while (p2->position >= 0)
2463 /* Optimization: if this position was already handled, skip it. */
2464 if (p2->position == prev_p2_pos)
2469 prev_p2_pos = p2->position;
2470 /* Set `trans' to point to the next unused transition from
2471 position `p1->position'. */
2472 trans = transitions + offs[p1->position];
2473 while (trans->state != NULL)
2476 /* If we find a previous transition from `p1->position' to
2477 `p2->position', it is overwritten. This can happen only
2478 if there are nested loops in the regexp, like in "((a)*)*".
2479 In POSIX.2 repetition using the outer loop is always
2480 preferred over using the inner loop. Therefore the
2481 transition for the inner loop is useless and can be thrown
2483 /* XXX - The same position is used for all nodes in a bracket
2484 expression, so this optimization cannot be used (it will
2485 break bracket expressions) unless I figure out a way to
2487 if (trans->state_id == p2->position)
2495 if (trans->state == NULL)
2496 (trans + 1)->state = NULL;
2497 /* Use the character ranges, assertions, etc. from `p1' for
2498 the transition from `p1' to `p2'. */
2499 trans->code_min = p1->code_min;
2500 trans->code_max = p1->code_max;
2501 trans->state = transitions + offs[p2->position];
2502 trans->state_id = p2->position;
2503 trans->assertions = p1->assertions | p2->assertions
2504 | (p1->class ? ASSERT_CHAR_CLASS : 0)
2505 | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
2506 if (p1->backref >= 0)
2508 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
2509 assert(p2->backref < 0);
2510 trans->u.backref = p1->backref;
2511 trans->assertions |= ASSERT_BACKREF;
2514 trans->u.class = p1->class;
2515 if (p1->neg_classes != NULL)
2517 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
2518 trans->neg_classes =
2519 xmalloc(sizeof(*trans->neg_classes) * (i + 1));
2520 if (trans->neg_classes == NULL)
2522 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2523 trans->neg_classes[i] = p1->neg_classes[i];
2524 trans->neg_classes[i] = (tre_ctype_t)0;
2527 trans->neg_classes = NULL;
2529 /* Find out how many tags this transition has. */
2531 if (p1->tags != NULL)
2532 while(p1->tags[i] >= 0)
2535 if (p2->tags != NULL)
2536 while(p2->tags[j] >= 0)
2539 /* If we are overwriting a transition, free the old tag array. */
2540 if (trans->tags != NULL)
2544 /* If there were any tags, allocate an array and fill it. */
2547 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2551 if (p1->tags != NULL)
2552 while(p1->tags[i] >= 0)
2554 trans->tags[i] = p1->tags[i];
2559 if (p2->tags != NULL)
2560 while (p2->tags[j] >= 0)
2562 /* Don't add duplicates. */
2564 for (k = 0; k < i; k++)
2565 if (trans->tags[k] == p2->tags[j])
2571 trans->tags[l++] = p2->tags[j];
2574 trans->tags[l] = -1;
2582 /* Compute a maximum limit for the number of transitions leaving
2584 while (p1->position >= 0)
2587 while (p2->position >= 0)
2589 counts[p1->position]++;
2597 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2598 labelled with one character range (there are no transitions on empty
2599 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2601 static reg_errcode_t
2602 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2603 int *counts, int *offs)
2606 tre_catenation_t *cat;
2607 tre_iteration_t *iter;
2608 reg_errcode_t errcode = REG_OK;
2610 /* XXX - recurse using a stack!. */
2616 uni = (tre_union_t *)node->obj;
2617 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2618 if (errcode != REG_OK)
2620 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2624 cat = (tre_catenation_t *)node->obj;
2625 /* Add a transition from each position in cat->left->lastpos
2626 to each position in cat->right->firstpos. */
2627 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2628 transitions, counts, offs);
2629 if (errcode != REG_OK)
2631 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2632 if (errcode != REG_OK)
2634 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2638 iter = (tre_iteration_t *)node->obj;
2639 assert(iter->max == -1 || iter->max == 1);
2641 if (iter->max == -1)
2643 assert(iter->min == 0 || iter->min == 1);
2644 /* Add a transition from each last position in the iterated
2645 expression to each first position. */
2646 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2647 transitions, counts, offs);
2648 if (errcode != REG_OK)
2651 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2658 #define ERROR_EXIT(err) \
2662 if (/*CONSTCOND*/1) \
2665 while (/*CONSTCOND*/0)
2669 regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
2672 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2673 tre_pos_and_tags_t *p;
2674 int *counts = NULL, *offs = NULL;
2676 tre_tnfa_transition_t *transitions, *initial;
2677 tre_tnfa_t *tnfa = NULL;
2678 tre_submatch_data_t *submatch_data;
2679 tre_tag_direction_t *tag_directions = NULL;
2680 reg_errcode_t errcode;
2683 /* Parse context. */
2684 tre_parse_ctx_t parse_ctx;
2686 /* Allocate a stack used throughout the compilation process for various
2688 stack = tre_stack_new(512, 10240, 128);
2691 /* Allocate a fast memory allocator. */
2692 mem = tre_mem_new();
2695 tre_stack_destroy(stack);
2699 /* Parse the regexp. */
2700 memset(&parse_ctx, 0, sizeof(parse_ctx));
2701 parse_ctx.mem = mem;
2702 parse_ctx.stack = stack;
2703 parse_ctx.re = regex;
2704 parse_ctx.cflags = cflags;
2705 parse_ctx.max_backref = -1;
2706 errcode = tre_parse(&parse_ctx);
2707 if (errcode != REG_OK)
2708 ERROR_EXIT(errcode);
2709 preg->re_nsub = parse_ctx.submatch_id - 1;
2713 tre_ast_print(tree);
2714 #endif /* TRE_DEBUG */
2716 /* Referring to nonexistent subexpressions is illegal. */
2717 if (parse_ctx.max_backref > (int)preg->re_nsub)
2718 ERROR_EXIT(REG_ESUBREG);
2720 /* Allocate the TNFA struct. */
2721 tnfa = xcalloc(1, sizeof(tre_tnfa_t));
2723 ERROR_EXIT(REG_ESPACE);
2724 tnfa->have_backrefs = parse_ctx.max_backref >= 0;
2725 tnfa->have_approx = 0;
2726 tnfa->num_submatches = parse_ctx.submatch_id;
2728 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
2729 regexp does not have back references, this can be skipped. */
2730 if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
2733 /* Figure out how many tags we will need. */
2734 errcode = tre_add_tags(NULL, stack, tree, tnfa);
2735 if (errcode != REG_OK)
2736 ERROR_EXIT(errcode);
2738 if (tnfa->num_tags > 0)
2740 tag_directions = xmalloc(sizeof(*tag_directions)
2741 * (tnfa->num_tags + 1));
2742 if (tag_directions == NULL)
2743 ERROR_EXIT(REG_ESPACE);
2744 tnfa->tag_directions = tag_directions;
2745 memset(tag_directions, -1,
2746 sizeof(*tag_directions) * (tnfa->num_tags + 1));
2748 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
2749 sizeof(*tnfa->minimal_tags));
2750 if (tnfa->minimal_tags == NULL)
2751 ERROR_EXIT(REG_ESPACE);
2753 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
2754 sizeof(*submatch_data));
2755 if (submatch_data == NULL)
2756 ERROR_EXIT(REG_ESPACE);
2757 tnfa->submatch_data = submatch_data;
2759 errcode = tre_add_tags(mem, stack, tree, tnfa);
2760 if (errcode != REG_OK)
2761 ERROR_EXIT(errcode);
2765 /* Expand iteration nodes. */
2766 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
2768 if (errcode != REG_OK)
2769 ERROR_EXIT(errcode);
2771 /* Add a dummy node for the final state.
2772 XXX - For certain patterns this dummy node can be optimized away,
2773 for example "a*" or "ab*". Figure out a simple way to detect
2774 this possibility. */
2776 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
2777 if (tmp_ast_r == NULL)
2778 ERROR_EXIT(REG_ESPACE);
2780 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
2782 ERROR_EXIT(REG_ESPACE);
2784 errcode = tre_compute_nfl(mem, stack, tree);
2785 if (errcode != REG_OK)
2786 ERROR_EXIT(errcode);
2788 counts = xmalloc(sizeof(int) * parse_ctx.position);
2790 ERROR_EXIT(REG_ESPACE);
2792 offs = xmalloc(sizeof(int) * parse_ctx.position);
2794 ERROR_EXIT(REG_ESPACE);
2796 for (i = 0; i < parse_ctx.position; i++)
2798 tre_ast_to_tnfa(tree, NULL, counts, NULL);
2801 for (i = 0; i < parse_ctx.position; i++)
2804 add += counts[i] + 1;
2807 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
2808 if (transitions == NULL)
2809 ERROR_EXIT(REG_ESPACE);
2810 tnfa->transitions = transitions;
2811 tnfa->num_transitions = add;
2813 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2814 if (errcode != REG_OK)
2815 ERROR_EXIT(errcode);
2817 tnfa->firstpos_chars = NULL;
2821 while (p->position >= 0)
2827 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2828 if (initial == NULL)
2829 ERROR_EXIT(REG_ESPACE);
2830 tnfa->initial = initial;
2833 for (p = tree->firstpos; p->position >= 0; p++)
2835 initial[i].state = transitions + offs[p->position];
2836 initial[i].state_id = p->position;
2837 initial[i].tags = NULL;
2838 /* Copy the arrays p->tags, and p->params, they are allocated
2839 from a tre_mem object. */
2843 for (j = 0; p->tags[j] >= 0; j++);
2844 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
2845 if (!initial[i].tags)
2846 ERROR_EXIT(REG_ESPACE);
2847 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2849 initial[i].assertions = p->assertions;
2852 initial[i].state = NULL;
2854 tnfa->num_transitions = add;
2855 tnfa->final = transitions + offs[tree->lastpos[0].position];
2856 tnfa->num_states = parse_ctx.position;
2857 tnfa->cflags = cflags;
2859 tre_mem_destroy(mem);
2860 tre_stack_destroy(stack);
2864 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2868 /* Free everything that was allocated and return the error code. */
2869 tre_mem_destroy(mem);
2871 tre_stack_destroy(stack);
2876 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2885 regfree(regex_t *preg)
2889 tre_tnfa_transition_t *trans;
2891 tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2895 for (i = 0; i < tnfa->num_transitions; i++)
2896 if (tnfa->transitions[i].state)
2898 if (tnfa->transitions[i].tags)
2899 xfree(tnfa->transitions[i].tags);
2900 if (tnfa->transitions[i].neg_classes)
2901 xfree(tnfa->transitions[i].neg_classes);
2903 if (tnfa->transitions)
2904 xfree(tnfa->transitions);
2908 for (trans = tnfa->initial; trans->state; trans++)
2913 xfree(tnfa->initial);
2916 if (tnfa->submatch_data)
2918 for (i = 0; i < tnfa->num_submatches; i++)
2919 if (tnfa->submatch_data[i].parents)
2920 xfree(tnfa->submatch_data[i].parents);
2921 xfree(tnfa->submatch_data);
2924 if (tnfa->tag_directions)
2925 xfree(tnfa->tag_directions);
2926 if (tnfa->firstpos_chars)
2927 xfree(tnfa->firstpos_chars);
2928 if (tnfa->minimal_tags)
2929 xfree(tnfa->minimal_tags);