2 regcomp.c - TRE POSIX compatible regex compilation functions.
4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 /***********************************************************************
45 ***********************************************************************/
54 tre_ctype_t *neg_classes;
59 /***********************************************************************
60 from tre-ast.c and tre-ast.h
61 ***********************************************************************/
63 /* The different AST node types. */
71 /* Special subtypes of TRE_LITERAL. */
72 #define EMPTY -1 /* Empty leaf (denotes empty string). */
73 #define ASSERTION -2 /* Assertion leaf. */
74 #define TAG -3 /* Tag leaf. */
75 #define BACKREF -4 /* Back reference leaf. */
77 #define IS_SPECIAL(x) ((x)->code_min < 0)
78 #define IS_EMPTY(x) ((x)->code_min == EMPTY)
79 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
80 #define IS_TAG(x) ((x)->code_min == TAG)
81 #define IS_BACKREF(x) ((x)->code_min == BACKREF)
84 /* A generic AST node. All AST nodes consist of this node on the top
85 level with `obj' pointing to the actual content. */
87 tre_ast_type_t type; /* Type of the node. */
88 void *obj; /* Pointer to actual node. */
93 tre_pos_and_tags_t *firstpos;
94 tre_pos_and_tags_t *lastpos;
98 /* A "literal" node. These are created for assertions, back references,
99 tags, matching parameter settings, and all expressions that match one
106 tre_ctype_t *neg_classes;
109 /* A "catenation" node. These are created when two regexps are concatenated.
110 If there are more than one subexpressions in sequence, the `left' part
111 holds all but the last, and `right' part holds the last subexpression
112 (catenation is left associative). */
114 tre_ast_node_t *left;
115 tre_ast_node_t *right;
118 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
121 /* Subexpression to match. */
123 /* Minimum number of consecutive matches. */
125 /* Maximum number of consecutive matches. */
127 /* If 0, match as many characters as possible, if 1 match as few as
128 possible. Note that this does not always mean the same thing as
129 matching as many/few repetitions as possible. */
130 unsigned int minimal:1;
133 /* An "union" node. These are created for the "|" operator. */
135 tre_ast_node_t *left;
136 tre_ast_node_t *right;
140 static tre_ast_node_t *
141 tre_ast_new_node(tre_mem_t mem, int type, void *obj)
143 tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node);
149 node->submatch_id = -1;
153 static tre_ast_node_t *
154 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
156 tre_ast_node_t *node;
159 lit = tre_mem_calloc(mem, sizeof *lit);
160 node = tre_ast_new_node(mem, LITERAL, lit);
163 lit->code_min = code_min;
164 lit->code_max = code_max;
165 lit->position = position;
169 static tre_ast_node_t *
170 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal)
172 tre_ast_node_t *node;
173 tre_iteration_t *iter;
175 iter = tre_mem_calloc(mem, sizeof *iter);
176 node = tre_ast_new_node(mem, ITERATION, iter);
182 iter->minimal = minimal;
183 node->num_submatches = arg->num_submatches;
187 static tre_ast_node_t *
188 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
190 tre_ast_node_t *node;
195 un = tre_mem_calloc(mem, sizeof *un);
196 node = tre_ast_new_node(mem, UNION, un);
201 node->num_submatches = left->num_submatches + right->num_submatches;
205 static tre_ast_node_t *
206 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
208 tre_ast_node_t *node;
209 tre_catenation_t *cat;
213 cat = tre_mem_calloc(mem, sizeof *cat);
214 node = tre_ast_new_node(mem, CATENATION, cat);
219 node->num_submatches = left->num_submatches + right->num_submatches;
224 /***********************************************************************
225 from tre-stack.c and tre-stack.h
226 ***********************************************************************/
228 typedef struct tre_stack_rec tre_stack_t;
230 /* Creates a new stack object. `size' is initial size in bytes, `max_size'
231 is maximum size, and `increment' specifies how much more space will be
232 allocated with realloc() if all space gets used up. Returns the stack
233 object or NULL if out of memory. */
235 tre_stack_new(int size, int max_size, int increment);
237 /* Frees the stack object. */
239 tre_stack_destroy(tre_stack_t *s);
241 /* Returns the current number of objects in the stack. */
243 tre_stack_num_objects(tre_stack_t *s);
245 /* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
246 `value' on top of stack `s'. Returns REG_ESPACE if out of memory.
247 This tries to realloc() more space before failing if maximum size
248 has not yet been reached. Returns REG_OK if successful. */
249 #define declare_pushf(typetag, type) \
250 static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
252 declare_pushf(voidptr, void *);
253 declare_pushf(int, int);
255 /* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
256 element off of stack `s' and returns it. The stack must not be
258 #define declare_popf(typetag, type) \
259 static type tre_stack_pop_ ## typetag(tre_stack_t *s)
261 declare_popf(voidptr, void *);
262 declare_popf(int, int);
264 /* Just to save some typing. */
265 #define STACK_PUSH(s, typetag, value) \
268 status = tre_stack_push_ ## typetag(s, value); \
270 while (/*CONSTCOND*/0)
272 #define STACK_PUSHX(s, typetag, value) \
274 status = tre_stack_push_ ## typetag(s, value); \
275 if (status != REG_OK) \
279 #define STACK_PUSHR(s, typetag, value) \
281 reg_errcode_t _status; \
282 _status = tre_stack_push_ ## typetag(s, value); \
283 if (_status != REG_OK) \
287 union tre_stack_item {
292 struct tre_stack_rec {
297 union tre_stack_item *stack;
302 tre_stack_new(int size, int max_size, int increment)
306 s = xmalloc(sizeof(*s));
309 s->stack = xmalloc(sizeof(*s->stack) * size);
310 if (s->stack == NULL)
316 s->max_size = max_size;
317 s->increment = increment;
324 tre_stack_destroy(tre_stack_t *s)
331 tre_stack_num_objects(tre_stack_t *s)
337 tre_stack_push(tre_stack_t *s, union tre_stack_item value)
339 if (s->ptr < s->size)
341 s->stack[s->ptr] = value;
346 if (s->size >= s->max_size)
352 union tre_stack_item *new_buffer;
354 new_size = s->size + s->increment;
355 if (new_size > s->max_size)
356 new_size = s->max_size;
357 new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
358 if (new_buffer == NULL)
362 assert(new_size > s->size);
364 s->stack = new_buffer;
365 tre_stack_push(s, value);
371 #define define_pushf(typetag, type) \
372 declare_pushf(typetag, type) { \
373 union tre_stack_item item; \
374 item.typetag ## _value = value; \
375 return tre_stack_push(s, item); \
378 define_pushf(int, int)
379 define_pushf(voidptr, void *)
381 #define define_popf(typetag, type) \
382 declare_popf(typetag, type) { \
383 return s->stack[--s->ptr].typetag ## _value; \
386 define_popf(int, int)
387 define_popf(voidptr, void *)
390 /***********************************************************************
391 from tre-parse.c and tre-parse.h
392 ***********************************************************************/
396 /* Memory allocator. The AST is allocated using this. */
398 /* Stack used for keeping track of regexp syntax. */
400 /* The parsed node after a parse function returns. */
402 /* Position in the regexp pattern after a parse function returns. */
404 /* The first character of the regexp. */
406 /* Current submatch ID. */
408 /* Current position (number of literal). */
410 /* The highest back reference or -1 if none seen so far. */
412 /* Compilation flags. */
416 /* Some macros for expanding \w, \s, etc. */
417 static const struct {
419 const char *expansion;
421 {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
422 {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
423 {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
424 {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
428 /* Expands a macro delimited by `regex' and `regex_end' to `buf', which
429 must have at least `len' items. Sets buf[0] to zero if the there
430 is no match in `tre_macros'. */
431 static const char *tre_expand_macro(const char *s)
434 for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);
435 return tre_macros[i].expansion;
439 tre_compare_lit(const void *a, const void *b)
441 const tre_literal_t *const *la = a;
442 const tre_literal_t *const *lb = b;
443 /* assumes the range of valid code_min is < INT_MAX */
444 return la[0]->code_min - lb[0]->code_min;
454 static tre_literal_t *tre_new_lit(struct literals *p)
457 if (p->len >= p->cap) {
461 a = xrealloc(p->a, p->cap * sizeof *p->a);
467 *a = tre_mem_calloc(p->mem, sizeof **a);
471 static int add_icase_literals(struct literals *ls, int min, int max)
475 for (c=min; c<=max; ) {
476 /* assumes islower(c) and isupper(c) are exclusive
477 and toupper(c)!=c if islower(c).
478 multiple opposite case characters are not supported */
479 if (tre_islower(c)) {
480 b = e = tre_toupper(c);
481 for (c++, e++; c<=max; c++, e++)
482 if (tre_toupper(c) != e) break;
483 } else if (tre_isupper(c)) {
484 b = e = tre_tolower(c);
485 for (c++, e++; c<=max; c++, e++)
486 if (tre_tolower(c) != e) break;
491 lit = tre_new_lit(ls);
502 /* Maximum number of character classes in a negated bracket expression. */
503 #define MAX_NEG_CLASSES 64
508 tre_ctype_t a[MAX_NEG_CLASSES];
511 // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
515 Bracket = '[' List ']' | '[^' List ']'
516 List = Term | List Term
517 Term = Char | Range | Chclass | Eqclass
518 Range = Char '-' Char | Char '-' '-'
519 Char = Coll | coll_single
521 Coll = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]'
522 Eqclass = '[=' coll_single '=]' | '[=' coll_multi '=]'
523 Chclass = '[:' class ':]'
525 coll_single is a single char collating element but it can be
526 '-' only at the beginning or end of a List and
527 ']' only at the beginning of a List and
528 '^' anywhere except after the openning '['
531 static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, struct literals *ls, struct neg *neg)
533 const char *start = s;
541 len = mbtowc(&wc, s, -1);
543 return *s ? REG_BADPAT : REG_EBRACK;
544 if (*s == ']' && s != start) {
548 if (*s == '-' && s != start && s[1] != ']' &&
549 /* extension: [a-z--@] is accepted as [a-z]|[--@] */
550 (s[1] != '-' || s[2] == ']'))
552 if (*s == '[' && (s[1] == '.' || s[1] == '='))
553 /* collating symbols and equivalence classes are not supported */
555 if (*s == '[' && s[1] == ':') {
556 char tmp[CHARCLASS_NAME_MAX+1];
558 for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) {
562 class = tre_ctype(tmp);
566 if (!class || s[len+1] != ']')
574 if (*s == '-' && s[1] != ']') {
576 len = mbtowc(&wc, s, -1);
578 /* XXX - Should use collation order instead of
579 encoding values in character ranges. */
580 if (len <= 0 || min > max)
586 if (class && neg->negate) {
587 if (neg->len >= MAX_NEG_CLASSES)
589 neg->a[neg->len++] = class;
591 tre_literal_t *lit = tre_new_lit(ls);
599 /* Add opposite-case codepoints if REG_ICASE is present.
600 It seems that POSIX requires that bracket negation
601 should happen before case-folding, but most practical
602 implementations do it the other way around. Changing
603 the order would need efficient representation of
604 case-fold ranges and bracket range sets even with
605 simple patterns so this is ok for now. */
606 if (ctx->cflags & REG_ICASE && !class)
607 if (add_icase_literals(ls, min, max))
613 static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s)
615 int i, max, min, negmax, negmin;
616 tre_ast_node_t *node = 0, *n;
626 ls.a = xmalloc(ls.cap * sizeof *ls.a);
630 neg.negate = *s == '^';
634 err = parse_bracket_terms(ctx, s, &ls, &neg);
636 goto parse_bracket_done;
639 /* Sort the array if we need to negate it. */
640 qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
641 /* extra lit for the last negated range */
642 lit = tre_new_lit(&ls);
645 goto parse_bracket_done;
647 lit->code_min = TRE_CHAR_MAX+1;
648 lit->code_max = TRE_CHAR_MAX+1;
650 /* negated classes */
652 nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a);
655 goto parse_bracket_done;
657 memcpy(nc, neg.a, neg.len*sizeof *neg.a);
662 /* Build a union of the items in the array, negated if necessary. */
664 for (i = 0; i < ls.len; i++) {
671 negmin = MAX(max + 1, negmin);
675 lit->code_min = negmin;
676 lit->code_max = negmax;
679 lit->position = ctx->position;
680 lit->neg_classes = nc;
681 n = tre_ast_new_node(ctx->mem, LITERAL, lit);
682 node = tre_ast_new_union(ctx->mem, node, n);
696 static const char *parse_dup_count(const char *s, int *n)
703 *n = 10 * *n + (*s - '0');
705 if (!isdigit(*s) || *n > RE_DUP_MAX)
711 static reg_errcode_t parse_dup(tre_parse_ctx_t *ctx, const char *s)
715 s = parse_dup_count(s, &min);
717 s = parse_dup_count(s+1, &max);
722 (max < min && max >= 0) ||
726 (!(ctx->cflags & REG_EXTENDED) && *s++ != '\\') ||
731 if (min == 0 && max == 0)
732 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
734 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
741 static int hexval(unsigned c)
743 if (c-'0'<10) return c-'0';
745 if (c-'a'<6) return c-'a'+10;
749 static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int subid)
751 if (node->submatch_id >= 0) {
752 tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
755 n = tre_ast_new_catenation(ctx->mem, n, node);
758 n->num_submatches = node->num_submatches;
761 node->submatch_id = subid;
762 node->num_submatches++;
769 Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^' Branch '$'
770 Branch = Atom | Branch Atom
771 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch '\)' | back_ref
772 Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count '\}'
774 (leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
777 Regex = Branch | Regex '|' Branch
778 Branch = Atom | Branch Atom
779 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex ')' | '^' | '$'
780 Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count ',' Count '}'
782 (a*+?, ^*, $+, \X, {, (|a) are unspecified)
785 static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
787 int len, ere = ctx->cflags & REG_EXTENDED;
789 tre_ast_node_t *node;
793 return parse_bracket(ctx, s+1);
795 p = tre_expand_macro(s+1);
797 /* assume \X expansion is a single atom */
798 reg_errcode_t err = parse_atom(ctx, p);
802 /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
807 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
810 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
813 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
816 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
826 for (i=0; i<len && v<0x110000; i++) {
837 node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
841 /* reject repetitions after empty expression in BRE */
845 /* extension: treat \| as alternation in BRE */
847 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
853 if (!ere && (unsigned)*s-'1' < 9) {
856 node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++);
857 ctx->max_backref = MAX(val, ctx->max_backref);
859 /* extension: accept unknown escaped char
867 if (ctx->cflags & REG_NEWLINE) {
868 tre_ast_node_t *tmp1, *tmp2;
869 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++);
870 tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX, ctx->position++);
872 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
876 node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++);
881 /* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
882 if (!ere && s != ctx->re)
884 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
888 /* '$' is special everywhere in EREs, and in the end of the string in BREs. */
891 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
899 /* reject repetitions after empty expression in ERE */
906 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
910 len = mbtowc(&wc, s, -1);
913 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) {
914 tre_ast_node_t *tmp1, *tmp2;
915 /* multiple opposite case characters are not supported */
916 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc), ctx->position);
917 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc), ctx->position);
919 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
923 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
937 #define PUSHPTR(err, s, v) do { \
938 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \
942 #define PUSHINT(err, s, v) do { \
943 if ((err = tre_stack_push_int(s, v)) != REG_OK) \
947 static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
949 tre_ast_node_t *nbranch=0, *nunion=0;
950 int ere = ctx->cflags & REG_EXTENDED;
951 const char *s = ctx->re;
955 tre_stack_t *stack = ctx->stack;
957 PUSHINT(err, stack, subid++);
959 if ((!ere && *s == '\\' && s[1] == '(') ||
960 (ere && *s == '(')) {
961 PUSHPTR(err, stack, nunion);
962 PUSHPTR(err, stack, nbranch);
963 PUSHINT(err, stack, subid++);
968 nbranch = nunion = 0;
971 if ((!ere && *s == '\\' && s[1] == ')') ||
972 (ere && *s == ')' && depth)) {
973 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
977 err = parse_atom(ctx, s);
984 /* extension: repetitions are rejected after an empty node
985 eg. (+), |*, {2}, but assertions are not treated as empty
986 so ^* or $? are accepted currently. */
1000 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
1003 /* extension: multiple consecutive *+?{,} is unspecified,
1004 but (a+)+ has to be supported so accepting a++ makes
1005 sense, note however that the RE_DUP_MAX limit can be
1006 circumvented: (a{255}){255} uses a lot of memory.. */
1009 if (ere || s[1] != '{')
1017 err = parse_dup(ctx, s+1);
1024 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
1025 if ((ere && *s == '|') ||
1026 (ere && *s == ')' && depth) ||
1027 (!ere && *s == '\\' && s[1] == ')') ||
1028 /* extension: treat \| as alternation in BRE */
1029 (!ere && *s == '\\' && s[1] == '|') ||
1031 /* extension: empty branch is unspecified (), (|a), (a|)
1032 here they are not rejected but match on empty string */
1034 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
1037 if (c == '\\' && s[1] == '|') {
1039 } else if (c == '|') {
1043 if (!depth) return REG_EPAREN;
1045 } else if (c == ')')
1048 err = marksub(ctx, nunion, tre_stack_pop_int(stack));
1051 if (!c && depth<0) {
1052 ctx->submatch_id = subid;
1057 nbranch = tre_stack_pop_voidptr(stack);
1058 nunion = tre_stack_pop_voidptr(stack);
1066 /***********************************************************************
1068 ***********************************************************************/
1073 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1078 Algorithms to setup tags so that submatch addressing can be done.
1082 /* Inserts a catenation node to the root of the tree given in `node'.
1083 As the left child a new tag with number `tag_id' to `node' is added,
1084 and the right child is the old root. */
1085 static reg_errcode_t
1086 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1088 tre_catenation_t *c;
1090 c = tre_mem_alloc(mem, sizeof(*c));
1093 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
1094 if (c->left == NULL)
1096 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1097 if (c->right == NULL)
1100 c->right->obj = node->obj;
1101 c->right->type = node->type;
1102 c->right->nullable = -1;
1103 c->right->submatch_id = -1;
1104 c->right->firstpos = NULL;
1105 c->right->lastpos = NULL;
1106 c->right->num_tags = 0;
1108 node->type = CATENATION;
1112 /* Inserts a catenation node to the root of the tree given in `node'.
1113 As the right child a new tag with number `tag_id' to `node' is added,
1114 and the left child is the old root. */
1115 static reg_errcode_t
1116 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1118 tre_catenation_t *c;
1120 c = tre_mem_alloc(mem, sizeof(*c));
1123 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
1124 if (c->right == NULL)
1126 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1127 if (c->left == NULL)
1130 c->left->obj = node->obj;
1131 c->left->type = node->type;
1132 c->left->nullable = -1;
1133 c->left->submatch_id = -1;
1134 c->left->firstpos = NULL;
1135 c->left->lastpos = NULL;
1136 c->left->num_tags = 0;
1138 node->type = CATENATION;
1144 ADDTAGS_AFTER_ITERATION,
1145 ADDTAGS_AFTER_UNION_LEFT,
1146 ADDTAGS_AFTER_UNION_RIGHT,
1147 ADDTAGS_AFTER_CAT_LEFT,
1148 ADDTAGS_AFTER_CAT_RIGHT,
1149 ADDTAGS_SET_SUBMATCH_END
1150 } tre_addtags_symbol_t;
1159 /* Go through `regset' and set submatch data for submatches that are
1162 tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
1166 for (i = 0; regset[i] >= 0; i++)
1168 int id = regset[i] / 2;
1169 int start = !(regset[i] % 2);
1171 tnfa->submatch_data[id].so_tag = tag;
1173 tnfa->submatch_data[id].eo_tag = tag;
1179 /* Adds tags to appropriate locations in the parse tree in `tree', so that
1180 subexpressions marked for submatch addressing can be traced. */
1181 static reg_errcode_t
1182 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
1185 reg_errcode_t status = REG_OK;
1186 tre_addtags_symbol_t symbol;
1187 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
1188 int bottom = tre_stack_num_objects(stack);
1189 /* True for first pass (counting number of needed tags) */
1190 int first_pass = (mem == NULL || tnfa == NULL);
1191 int *regset, *orig_regset;
1192 int num_tags = 0; /* Total number of tags. */
1193 int num_minimals = 0; /* Number of special minimal tags. */
1194 int tag = 0; /* The tag that is to be added next. */
1195 int next_tag = 1; /* Next tag to use after this one. */
1196 int *parents; /* Stack of submatches the current submatch is
1198 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
1199 tre_tag_states_t *saved_states;
1201 tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
1205 tnfa->minimal_tags[0] = -1;
1208 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
1212 orig_regset = regset;
1214 parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
1215 if (parents == NULL)
1222 saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
1223 if (saved_states == NULL)
1232 for (i = 0; i <= tnfa->num_submatches; i++)
1233 saved_states[i].tag = -1;
1236 STACK_PUSH(stack, voidptr, node);
1237 STACK_PUSH(stack, int, ADDTAGS_RECURSE);
1239 while (tre_stack_num_objects(stack) > bottom)
1241 if (status != REG_OK)
1244 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
1248 case ADDTAGS_SET_SUBMATCH_END:
1250 int id = tre_stack_pop_int(stack);
1253 /* Add end of this submatch to regset. */
1254 for (i = 0; regset[i] >= 0; i++);
1255 regset[i] = id * 2 + 1;
1258 /* Pop this submatch from the parents stack. */
1259 for (i = 0; parents[i] >= 0; i++);
1260 parents[i - 1] = -1;
1264 case ADDTAGS_RECURSE:
1265 node = tre_stack_pop_voidptr(stack);
1267 if (node->submatch_id >= 0)
1269 int id = node->submatch_id;
1273 /* Add start of this submatch to regset. */
1274 for (i = 0; regset[i] >= 0; i++);
1280 for (i = 0; parents[i] >= 0; i++);
1281 tnfa->submatch_data[id].parents = NULL;
1284 int *p = xmalloc(sizeof(*p) * (i + 1));
1287 status = REG_ESPACE;
1290 assert(tnfa->submatch_data[id].parents == NULL);
1291 tnfa->submatch_data[id].parents = p;
1292 for (i = 0; parents[i] >= 0; i++)
1298 /* Add end of this submatch to regset after processing this
1300 STACK_PUSHX(stack, int, node->submatch_id);
1301 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
1308 tre_literal_t *lit = node->obj;
1310 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1315 /* Regset is not empty, so add a tag before the
1316 literal or backref. */
1319 status = tre_add_tag_left(mem, node, tag);
1320 tnfa->tag_directions[tag] = direction;
1321 if (minimal_tag >= 0)
1323 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1324 tnfa->minimal_tags[i] = tag;
1325 tnfa->minimal_tags[i + 1] = minimal_tag;
1326 tnfa->minimal_tags[i + 2] = -1;
1330 tre_purge_regset(regset, tnfa, tag);
1345 assert(!IS_TAG(lit));
1351 tre_catenation_t *cat = node->obj;
1352 tre_ast_node_t *left = cat->left;
1353 tre_ast_node_t *right = cat->right;
1354 int reserved_tag = -1;
1357 /* After processing right child. */
1358 STACK_PUSHX(stack, voidptr, node);
1359 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
1361 /* Process right child. */
1362 STACK_PUSHX(stack, voidptr, right);
1363 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1365 /* After processing left child. */
1366 STACK_PUSHX(stack, int, next_tag + left->num_tags);
1367 if (left->num_tags > 0 && right->num_tags > 0)
1369 /* Reserve the next tag to the right child. */
1370 reserved_tag = next_tag;
1373 STACK_PUSHX(stack, int, reserved_tag);
1374 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
1376 /* Process left child. */
1377 STACK_PUSHX(stack, voidptr, left);
1378 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1384 tre_iteration_t *iter = node->obj;
1388 STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
1392 STACK_PUSHX(stack, int, tag);
1393 STACK_PUSHX(stack, int, iter->minimal);
1395 STACK_PUSHX(stack, voidptr, node);
1396 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
1398 STACK_PUSHX(stack, voidptr, iter->arg);
1399 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1401 /* Regset is not empty, so add a tag here. */
1402 if (regset[0] >= 0 || iter->minimal)
1407 status = tre_add_tag_left(mem, node, tag);
1409 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
1411 tnfa->tag_directions[tag] = direction;
1412 if (minimal_tag >= 0)
1414 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1415 tnfa->minimal_tags[i] = tag;
1416 tnfa->minimal_tags[i + 1] = minimal_tag;
1417 tnfa->minimal_tags[i + 2] = -1;
1421 tre_purge_regset(regset, tnfa, tag);
1429 direction = TRE_TAG_MINIMIZE;
1434 tre_union_t *uni = node->obj;
1435 tre_ast_node_t *left = uni->left;
1436 tre_ast_node_t *right = uni->right;
1442 left_tag = next_tag;
1443 right_tag = next_tag + 1;
1448 right_tag = next_tag;
1451 /* After processing right child. */
1452 STACK_PUSHX(stack, int, right_tag);
1453 STACK_PUSHX(stack, int, left_tag);
1454 STACK_PUSHX(stack, voidptr, regset);
1455 STACK_PUSHX(stack, int, regset[0] >= 0);
1456 STACK_PUSHX(stack, voidptr, node);
1457 STACK_PUSHX(stack, voidptr, right);
1458 STACK_PUSHX(stack, voidptr, left);
1459 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
1461 /* Process right child. */
1462 STACK_PUSHX(stack, voidptr, right);
1463 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1465 /* After processing left child. */
1466 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
1468 /* Process left child. */
1469 STACK_PUSHX(stack, voidptr, left);
1470 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1472 /* Regset is not empty, so add a tag here. */
1478 status = tre_add_tag_left(mem, node, tag);
1479 tnfa->tag_directions[tag] = direction;
1480 if (minimal_tag >= 0)
1482 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1483 tnfa->minimal_tags[i] = tag;
1484 tnfa->minimal_tags[i + 1] = minimal_tag;
1485 tnfa->minimal_tags[i + 2] = -1;
1489 tre_purge_regset(regset, tnfa, tag);
1498 if (node->num_submatches > 0)
1500 /* The next two tags are reserved for markers. */
1510 if (node->submatch_id >= 0)
1513 /* Push this submatch on the parents stack. */
1514 for (i = 0; parents[i] >= 0; i++);
1515 parents[i] = node->submatch_id;
1516 parents[i + 1] = -1;
1519 break; /* end case: ADDTAGS_RECURSE */
1521 case ADDTAGS_AFTER_ITERATION:
1525 node = tre_stack_pop_voidptr(stack);
1528 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
1529 + tre_stack_pop_int(stack);
1534 minimal = tre_stack_pop_int(stack);
1535 enter_tag = tre_stack_pop_int(stack);
1537 minimal_tag = enter_tag;
1543 direction = TRE_TAG_MINIMIZE;
1545 direction = TRE_TAG_MAXIMIZE;
1550 case ADDTAGS_AFTER_CAT_LEFT:
1552 int new_tag = tre_stack_pop_int(stack);
1553 next_tag = tre_stack_pop_int(stack);
1561 case ADDTAGS_AFTER_CAT_RIGHT:
1562 node = tre_stack_pop_voidptr(stack);
1564 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
1565 + ((tre_catenation_t *)node->obj)->right->num_tags;
1568 case ADDTAGS_AFTER_UNION_LEFT:
1569 /* Lift the bottom of the `regset' array so that when processing
1570 the right operand the items currently in the array are
1571 invisible. The original bottom was saved at ADDTAGS_UNION and
1572 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1573 while (*regset >= 0)
1577 case ADDTAGS_AFTER_UNION_RIGHT:
1579 int added_tags, tag_left, tag_right;
1580 tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
1581 tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
1582 node = tre_stack_pop_voidptr(stack);
1583 added_tags = tre_stack_pop_int(stack);
1586 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
1587 + ((tre_union_t *)node->obj)->right->num_tags + added_tags
1588 + ((node->num_submatches > 0) ? 2 : 0);
1590 regset = tre_stack_pop_voidptr(stack);
1591 tag_left = tre_stack_pop_int(stack);
1592 tag_right = tre_stack_pop_int(stack);
1594 /* Add tags after both children, the left child gets a smaller
1595 tag than the right child. This guarantees that we prefer
1596 the left child over the right child. */
1597 /* XXX - This is not always necessary (if the children have
1598 tags which must be seen for every match of that child). */
1599 /* XXX - Check if this is the only place where tre_add_tag_right
1600 is used. If so, use tre_add_tag_left (putting the tag before
1601 the child as opposed after the child) and throw away
1602 tre_add_tag_right. */
1603 if (node->num_submatches > 0)
1607 status = tre_add_tag_right(mem, left, tag_left);
1608 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1609 if (status == REG_OK)
1610 status = tre_add_tag_right(mem, right, tag_right);
1611 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1615 direction = TRE_TAG_MAXIMIZE;
1623 } /* end switch(symbol) */
1624 } /* end while(tre_stack_num_objects(stack) > bottom) */
1627 tre_purge_regset(regset, tnfa, tag);
1629 if (!first_pass && minimal_tag >= 0)
1632 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1633 tnfa->minimal_tags[i] = tag;
1634 tnfa->minimal_tags[i + 1] = minimal_tag;
1635 tnfa->minimal_tags[i + 2] = -1;
1640 assert(tree->num_tags == num_tags);
1641 tnfa->end_tag = num_tags;
1642 tnfa->num_tags = num_tags;
1643 tnfa->num_minimals = num_minimals;
1646 xfree(saved_states);
1653 AST to TNFA compilation routines.
1659 } tre_copyast_symbol_t;
1661 /* Flags for tre_copy_ast(). */
1662 #define COPY_REMOVE_TAGS 1
1663 #define COPY_MAXIMIZE_FIRST_TAG 2
1665 static reg_errcode_t
1666 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1667 int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1668 tre_ast_node_t **copy, int *max_pos)
1670 reg_errcode_t status = REG_OK;
1671 int bottom = tre_stack_num_objects(stack);
1674 tre_ast_node_t **result = copy;
1675 tre_copyast_symbol_t symbol;
1677 STACK_PUSH(stack, voidptr, ast);
1678 STACK_PUSH(stack, int, COPY_RECURSE);
1680 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1682 tre_ast_node_t *node;
1683 if (status != REG_OK)
1686 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1689 case COPY_SET_RESULT_PTR:
1690 result = tre_stack_pop_voidptr(stack);
1693 node = tre_stack_pop_voidptr(stack);
1698 tre_literal_t *lit = node->obj;
1699 int pos = lit->position;
1700 int min = lit->code_min;
1701 int max = lit->code_max;
1702 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1704 /* XXX - e.g. [ab] has only one position but two
1705 nodes, so we are creating holes in the state space
1706 here. Not fatal, just wastes memory. */
1710 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1712 /* Change this tag to empty. */
1716 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1719 /* Maximize the first tag. */
1720 tag_directions[max] = TRE_TAG_MAXIMIZE;
1723 *result = tre_ast_new_literal(mem, min, max, pos);
1724 if (*result == NULL)
1725 status = REG_ESPACE;
1727 tre_literal_t *p = (*result)->obj;
1728 p->class = lit->class;
1729 p->neg_classes = lit->neg_classes;
1738 tre_union_t *uni = node->obj;
1740 *result = tre_ast_new_union(mem, uni->left, uni->right);
1741 if (*result == NULL)
1743 status = REG_ESPACE;
1746 tmp = (*result)->obj;
1747 result = &tmp->left;
1748 STACK_PUSHX(stack, voidptr, uni->right);
1749 STACK_PUSHX(stack, int, COPY_RECURSE);
1750 STACK_PUSHX(stack, voidptr, &tmp->right);
1751 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1752 STACK_PUSHX(stack, voidptr, uni->left);
1753 STACK_PUSHX(stack, int, COPY_RECURSE);
1758 tre_catenation_t *cat = node->obj;
1759 tre_catenation_t *tmp;
1760 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1761 if (*result == NULL)
1763 status = REG_ESPACE;
1766 tmp = (*result)->obj;
1769 result = &tmp->left;
1771 STACK_PUSHX(stack, voidptr, cat->right);
1772 STACK_PUSHX(stack, int, COPY_RECURSE);
1773 STACK_PUSHX(stack, voidptr, &tmp->right);
1774 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1775 STACK_PUSHX(stack, voidptr, cat->left);
1776 STACK_PUSHX(stack, int, COPY_RECURSE);
1781 tre_iteration_t *iter = node->obj;
1782 STACK_PUSHX(stack, voidptr, iter->arg);
1783 STACK_PUSHX(stack, int, COPY_RECURSE);
1784 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1785 iter->max, iter->minimal);
1786 if (*result == NULL)
1788 status = REG_ESPACE;
1791 iter = (*result)->obj;
1792 result = &iter->arg;
1802 *pos_add += num_copied;
1809 } tre_expand_ast_symbol_t;
1811 /* Expands each iteration node that has a finite nonzero minimum or maximum
1812 iteration count to a catenated sequence of copies of the node. */
1813 static reg_errcode_t
1814 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1815 int *position, tre_tag_direction_t *tag_directions)
1817 reg_errcode_t status = REG_OK;
1818 int bottom = tre_stack_num_objects(stack);
1820 int pos_add_total = 0;
1824 STACK_PUSHR(stack, voidptr, ast);
1825 STACK_PUSHR(stack, int, EXPAND_RECURSE);
1826 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1828 tre_ast_node_t *node;
1829 tre_expand_ast_symbol_t symbol;
1831 if (status != REG_OK)
1834 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1835 node = tre_stack_pop_voidptr(stack);
1838 case EXPAND_RECURSE:
1843 tre_literal_t *lit= node->obj;
1844 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1846 lit->position += pos_add;
1847 if (lit->position > max_pos)
1848 max_pos = lit->position;
1854 tre_union_t *uni = node->obj;
1855 STACK_PUSHX(stack, voidptr, uni->right);
1856 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1857 STACK_PUSHX(stack, voidptr, uni->left);
1858 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1863 tre_catenation_t *cat = node->obj;
1864 STACK_PUSHX(stack, voidptr, cat->right);
1865 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1866 STACK_PUSHX(stack, voidptr, cat->left);
1867 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1872 tre_iteration_t *iter = node->obj;
1873 STACK_PUSHX(stack, int, pos_add);
1874 STACK_PUSHX(stack, voidptr, node);
1875 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1876 STACK_PUSHX(stack, voidptr, iter->arg);
1877 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1878 /* If we are going to expand this node at EXPAND_AFTER_ITER
1879 then don't increase the `pos' fields of the nodes now, it
1880 will get done when expanding. */
1881 if (iter->min > 1 || iter->max > 1)
1891 case EXPAND_AFTER_ITER:
1893 tre_iteration_t *iter = node->obj;
1895 pos_add = tre_stack_pop_int(stack);
1896 pos_add_last = pos_add;
1897 if (iter->min > 1 || iter->max > 1)
1899 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1901 int pos_add_save = pos_add;
1903 /* Create a catenated sequence of copies of the node. */
1904 for (j = 0; j < iter->min; j++)
1906 tre_ast_node_t *copy;
1907 /* Remove tags from all but the last copy. */
1908 int flags = ((j + 1 < iter->min)
1910 : COPY_MAXIMIZE_FIRST_TAG);
1911 pos_add_save = pos_add;
1912 status = tre_copy_ast(mem, stack, iter->arg, flags,
1913 &pos_add, tag_directions, ©,
1915 if (status != REG_OK)
1918 seq1 = tre_ast_new_catenation(mem, seq1, copy);
1925 if (iter->max == -1)
1927 /* No upper limit. */
1928 pos_add_save = pos_add;
1929 status = tre_copy_ast(mem, stack, iter->arg, 0,
1930 &pos_add, NULL, &seq2, &max_pos);
1931 if (status != REG_OK)
1933 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1939 for (j = iter->min; j < iter->max; j++)
1941 tre_ast_node_t *tmp, *copy;
1942 pos_add_save = pos_add;
1943 status = tre_copy_ast(mem, stack, iter->arg, 0,
1944 &pos_add, NULL, ©, &max_pos);
1945 if (status != REG_OK)
1948 seq2 = tre_ast_new_catenation(mem, copy, seq2);
1953 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1956 seq2 = tre_ast_new_union(mem, tmp, seq2);
1962 pos_add = pos_add_save;
1965 else if (seq2 != NULL)
1966 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1969 node->obj = seq1->obj;
1970 node->type = seq1->type;
1974 pos_add_total += pos_add - pos_add_last;
1975 if (iter_depth == 0)
1976 pos_add = pos_add_total;
1986 *position += pos_add_total;
1988 /* `max_pos' should never be larger than `*position' if the above
1989 code works, but just an extra safeguard let's make sure
1990 `*position' is set large enough so enough memory will be
1991 allocated for the transition table. */
1992 if (max_pos > *position)
1993 *position = max_pos;
1998 static tre_pos_and_tags_t *
1999 tre_set_empty(tre_mem_t mem)
2001 tre_pos_and_tags_t *new_set;
2003 new_set = tre_mem_calloc(mem, sizeof(*new_set));
2004 if (new_set == NULL)
2007 new_set[0].position = -1;
2008 new_set[0].code_min = -1;
2009 new_set[0].code_max = -1;
2014 static tre_pos_and_tags_t *
2015 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2016 tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
2018 tre_pos_and_tags_t *new_set;
2020 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2021 if (new_set == NULL)
2024 new_set[0].position = position;
2025 new_set[0].code_min = code_min;
2026 new_set[0].code_max = code_max;
2027 new_set[0].class = class;
2028 new_set[0].neg_classes = neg_classes;
2029 new_set[0].backref = backref;
2030 new_set[1].position = -1;
2031 new_set[1].code_min = -1;
2032 new_set[1].code_max = -1;
2037 static tre_pos_and_tags_t *
2038 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2039 int *tags, int assertions)
2042 tre_pos_and_tags_t *new_set;
2046 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2047 for (s1 = 0; set1[s1].position >= 0; s1++);
2048 for (s2 = 0; set2[s2].position >= 0; s2++);
2049 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2053 for (s1 = 0; set1[s1].position >= 0; s1++)
2055 new_set[s1].position = set1[s1].position;
2056 new_set[s1].code_min = set1[s1].code_min;
2057 new_set[s1].code_max = set1[s1].code_max;
2058 new_set[s1].assertions = set1[s1].assertions | assertions;
2059 new_set[s1].class = set1[s1].class;
2060 new_set[s1].neg_classes = set1[s1].neg_classes;
2061 new_set[s1].backref = set1[s1].backref;
2062 if (set1[s1].tags == NULL && tags == NULL)
2063 new_set[s1].tags = NULL;
2066 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2067 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2068 * (i + num_tags + 1)));
2069 if (new_tags == NULL)
2071 for (j = 0; j < i; j++)
2072 new_tags[j] = set1[s1].tags[j];
2073 for (i = 0; i < num_tags; i++)
2074 new_tags[j + i] = tags[i];
2075 new_tags[j + i] = -1;
2076 new_set[s1].tags = new_tags;
2080 for (s2 = 0; set2[s2].position >= 0; s2++)
2082 new_set[s1 + s2].position = set2[s2].position;
2083 new_set[s1 + s2].code_min = set2[s2].code_min;
2084 new_set[s1 + s2].code_max = set2[s2].code_max;
2085 /* XXX - why not | assertions here as well? */
2086 new_set[s1 + s2].assertions = set2[s2].assertions;
2087 new_set[s1 + s2].class = set2[s2].class;
2088 new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
2089 new_set[s1 + s2].backref = set2[s2].backref;
2090 if (set2[s2].tags == NULL)
2091 new_set[s1 + s2].tags = NULL;
2094 for (i = 0; set2[s2].tags[i] >= 0; i++);
2095 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2096 if (new_tags == NULL)
2098 for (j = 0; j < i; j++)
2099 new_tags[j] = set2[s2].tags[j];
2101 new_set[s1 + s2].tags = new_tags;
2104 new_set[s1 + s2].position = -1;
2108 /* Finds the empty path through `node' which is the one that should be
2109 taken according to POSIX.2 rules, and adds the tags on that path to
2110 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2111 set to the number of tags seen on the path. */
2112 static reg_errcode_t
2113 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2114 int *assertions, int *num_tags_seen)
2118 tre_catenation_t *cat;
2119 tre_iteration_t *iter;
2121 int bottom = tre_stack_num_objects(stack);
2122 reg_errcode_t status = REG_OK;
2126 status = tre_stack_push_voidptr(stack, node);
2128 /* Walk through the tree recursively. */
2129 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2131 node = tre_stack_pop_voidptr(stack);
2136 lit = (tre_literal_t *)node->obj;
2137 switch (lit->code_min)
2140 if (lit->code_max >= 0)
2144 /* Add the tag to `tags'. */
2145 for (i = 0; tags[i] >= 0; i++)
2146 if (tags[i] == lit->code_max)
2150 tags[i] = lit->code_max;
2159 assert(lit->code_max >= 1
2160 || lit->code_max <= ASSERT_LAST);
2161 if (assertions != NULL)
2162 *assertions |= lit->code_max;
2173 /* Subexpressions starting earlier take priority over ones
2174 starting later, so we prefer the left subexpression over the
2175 right subexpression. */
2176 uni = (tre_union_t *)node->obj;
2177 if (uni->left->nullable)
2178 STACK_PUSHX(stack, voidptr, uni->left)
2179 else if (uni->right->nullable)
2180 STACK_PUSHX(stack, voidptr, uni->right)
2186 /* The path must go through both children. */
2187 cat = (tre_catenation_t *)node->obj;
2188 assert(cat->left->nullable);
2189 assert(cat->right->nullable);
2190 STACK_PUSHX(stack, voidptr, cat->left);
2191 STACK_PUSHX(stack, voidptr, cat->right);
2195 /* A match with an empty string is preferred over no match at
2196 all, so we go through the argument if possible. */
2197 iter = (tre_iteration_t *)node->obj;
2198 if (iter->arg->nullable)
2199 STACK_PUSHX(stack, voidptr, iter->arg);
2215 NFL_POST_CATENATION,
2217 } tre_nfl_stack_symbol_t;
2220 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2221 the nodes of the AST `tree'. */
2222 static reg_errcode_t
2223 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2225 int bottom = tre_stack_num_objects(stack);
2227 STACK_PUSHR(stack, voidptr, tree);
2228 STACK_PUSHR(stack, int, NFL_RECURSE);
2230 while (tre_stack_num_objects(stack) > bottom)
2232 tre_nfl_stack_symbol_t symbol;
2233 tre_ast_node_t *node;
2235 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2236 node = tre_stack_pop_voidptr(stack);
2244 tre_literal_t *lit = (tre_literal_t *)node->obj;
2245 if (IS_BACKREF(lit))
2247 /* Back references: nullable = false, firstpos = {i},
2250 node->firstpos = tre_set_one(mem, lit->position, 0,
2251 TRE_CHAR_MAX, 0, NULL, -1);
2252 if (!node->firstpos)
2254 node->lastpos = tre_set_one(mem, lit->position, 0,
2255 TRE_CHAR_MAX, 0, NULL,
2256 (int)lit->code_max);
2260 else if (lit->code_min < 0)
2262 /* Tags, empty strings, params, and zero width assertions:
2263 nullable = true, firstpos = {}, and lastpos = {}. */
2265 node->firstpos = tre_set_empty(mem);
2266 if (!node->firstpos)
2268 node->lastpos = tre_set_empty(mem);
2274 /* Literal at position i: nullable = false, firstpos = {i},
2278 tre_set_one(mem, lit->position, (int)lit->code_min,
2279 (int)lit->code_max, 0, NULL, -1);
2280 if (!node->firstpos)
2282 node->lastpos = tre_set_one(mem, lit->position,
2285 lit->class, lit->neg_classes,
2294 /* Compute the attributes for the two subtrees, and after that
2296 STACK_PUSHR(stack, voidptr, node);
2297 STACK_PUSHR(stack, int, NFL_POST_UNION);
2298 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2299 STACK_PUSHR(stack, int, NFL_RECURSE);
2300 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2301 STACK_PUSHR(stack, int, NFL_RECURSE);
2305 /* Compute the attributes for the two subtrees, and after that
2307 STACK_PUSHR(stack, voidptr, node);
2308 STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2309 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2310 STACK_PUSHR(stack, int, NFL_RECURSE);
2311 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2312 STACK_PUSHR(stack, int, NFL_RECURSE);
2316 /* Compute the attributes for the subtree, and after that for
2318 STACK_PUSHR(stack, voidptr, node);
2319 STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2320 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2321 STACK_PUSHR(stack, int, NFL_RECURSE);
2324 break; /* end case: NFL_RECURSE */
2326 case NFL_POST_UNION:
2328 tre_union_t *uni = (tre_union_t *)node->obj;
2329 node->nullable = uni->left->nullable || uni->right->nullable;
2330 node->firstpos = tre_set_union(mem, uni->left->firstpos,
2331 uni->right->firstpos, NULL, 0);
2332 if (!node->firstpos)
2334 node->lastpos = tre_set_union(mem, uni->left->lastpos,
2335 uni->right->lastpos, NULL, 0);
2341 case NFL_POST_ITERATION:
2343 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2345 if (iter->min == 0 || iter->arg->nullable)
2349 node->firstpos = iter->arg->firstpos;
2350 node->lastpos = iter->arg->lastpos;
2354 case NFL_POST_CATENATION:
2356 int num_tags, *tags, assertions;
2357 reg_errcode_t status;
2358 tre_catenation_t *cat = node->obj;
2359 node->nullable = cat->left->nullable && cat->right->nullable;
2361 /* Compute firstpos. */
2362 if (cat->left->nullable)
2364 /* The left side matches the empty string. Make a first pass
2365 with tre_match_empty() to get the number of tags and
2367 status = tre_match_empty(stack, cat->left,
2368 NULL, NULL, &num_tags);
2369 if (status != REG_OK)
2371 /* Allocate arrays for the tags and parameters. */
2372 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2377 /* Second pass with tre_mach_empty() to get the list of
2378 tags and parameters. */
2379 status = tre_match_empty(stack, cat->left, tags,
2381 if (status != REG_OK)
2387 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2390 if (!node->firstpos)
2395 node->firstpos = cat->left->firstpos;
2398 /* Compute lastpos. */
2399 if (cat->right->nullable)
2401 /* The right side matches the empty string. Make a first pass
2402 with tre_match_empty() to get the number of tags and
2404 status = tre_match_empty(stack, cat->right,
2405 NULL, NULL, &num_tags);
2406 if (status != REG_OK)
2408 /* Allocate arrays for the tags and parameters. */
2409 tags = xmalloc(sizeof(int) * (num_tags + 1));
2414 /* Second pass with tre_mach_empty() to get the list of
2415 tags and parameters. */
2416 status = tre_match_empty(stack, cat->right, tags,
2418 if (status != REG_OK)
2424 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2432 node->lastpos = cat->right->lastpos;
2447 /* Adds a transition from each position in `p1' to each position in `p2'. */
2448 static reg_errcode_t
2449 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2450 tre_tnfa_transition_t *transitions,
2451 int *counts, int *offs)
2453 tre_pos_and_tags_t *orig_p2 = p2;
2454 tre_tnfa_transition_t *trans;
2455 int i, j, k, l, dup, prev_p2_pos;
2457 if (transitions != NULL)
2458 while (p1->position >= 0)
2462 while (p2->position >= 0)
2464 /* Optimization: if this position was already handled, skip it. */
2465 if (p2->position == prev_p2_pos)
2470 prev_p2_pos = p2->position;
2471 /* Set `trans' to point to the next unused transition from
2472 position `p1->position'. */
2473 trans = transitions + offs[p1->position];
2474 while (trans->state != NULL)
2477 /* If we find a previous transition from `p1->position' to
2478 `p2->position', it is overwritten. This can happen only
2479 if there are nested loops in the regexp, like in "((a)*)*".
2480 In POSIX.2 repetition using the outer loop is always
2481 preferred over using the inner loop. Therefore the
2482 transition for the inner loop is useless and can be thrown
2484 /* XXX - The same position is used for all nodes in a bracket
2485 expression, so this optimization cannot be used (it will
2486 break bracket expressions) unless I figure out a way to
2488 if (trans->state_id == p2->position)
2496 if (trans->state == NULL)
2497 (trans + 1)->state = NULL;
2498 /* Use the character ranges, assertions, etc. from `p1' for
2499 the transition from `p1' to `p2'. */
2500 trans->code_min = p1->code_min;
2501 trans->code_max = p1->code_max;
2502 trans->state = transitions + offs[p2->position];
2503 trans->state_id = p2->position;
2504 trans->assertions = p1->assertions | p2->assertions
2505 | (p1->class ? ASSERT_CHAR_CLASS : 0)
2506 | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
2507 if (p1->backref >= 0)
2509 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
2510 assert(p2->backref < 0);
2511 trans->u.backref = p1->backref;
2512 trans->assertions |= ASSERT_BACKREF;
2515 trans->u.class = p1->class;
2516 if (p1->neg_classes != NULL)
2518 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
2519 trans->neg_classes =
2520 xmalloc(sizeof(*trans->neg_classes) * (i + 1));
2521 if (trans->neg_classes == NULL)
2523 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2524 trans->neg_classes[i] = p1->neg_classes[i];
2525 trans->neg_classes[i] = (tre_ctype_t)0;
2528 trans->neg_classes = NULL;
2530 /* Find out how many tags this transition has. */
2532 if (p1->tags != NULL)
2533 while(p1->tags[i] >= 0)
2536 if (p2->tags != NULL)
2537 while(p2->tags[j] >= 0)
2540 /* If we are overwriting a transition, free the old tag array. */
2541 if (trans->tags != NULL)
2545 /* If there were any tags, allocate an array and fill it. */
2548 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2552 if (p1->tags != NULL)
2553 while(p1->tags[i] >= 0)
2555 trans->tags[i] = p1->tags[i];
2560 if (p2->tags != NULL)
2561 while (p2->tags[j] >= 0)
2563 /* Don't add duplicates. */
2565 for (k = 0; k < i; k++)
2566 if (trans->tags[k] == p2->tags[j])
2572 trans->tags[l++] = p2->tags[j];
2575 trans->tags[l] = -1;
2583 /* Compute a maximum limit for the number of transitions leaving
2585 while (p1->position >= 0)
2588 while (p2->position >= 0)
2590 counts[p1->position]++;
2598 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2599 labelled with one character range (there are no transitions on empty
2600 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2602 static reg_errcode_t
2603 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2604 int *counts, int *offs)
2607 tre_catenation_t *cat;
2608 tre_iteration_t *iter;
2609 reg_errcode_t errcode = REG_OK;
2611 /* XXX - recurse using a stack!. */
2617 uni = (tre_union_t *)node->obj;
2618 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2619 if (errcode != REG_OK)
2621 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2625 cat = (tre_catenation_t *)node->obj;
2626 /* Add a transition from each position in cat->left->lastpos
2627 to each position in cat->right->firstpos. */
2628 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2629 transitions, counts, offs);
2630 if (errcode != REG_OK)
2632 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2633 if (errcode != REG_OK)
2635 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2639 iter = (tre_iteration_t *)node->obj;
2640 assert(iter->max == -1 || iter->max == 1);
2642 if (iter->max == -1)
2644 assert(iter->min == 0 || iter->min == 1);
2645 /* Add a transition from each last position in the iterated
2646 expression to each first position. */
2647 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2648 transitions, counts, offs);
2649 if (errcode != REG_OK)
2652 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2659 #define ERROR_EXIT(err) \
2663 if (/*CONSTCOND*/1) \
2666 while (/*CONSTCOND*/0)
2670 regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
2673 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2674 tre_pos_and_tags_t *p;
2675 int *counts = NULL, *offs = NULL;
2677 tre_tnfa_transition_t *transitions, *initial;
2678 tre_tnfa_t *tnfa = NULL;
2679 tre_submatch_data_t *submatch_data;
2680 tre_tag_direction_t *tag_directions = NULL;
2681 reg_errcode_t errcode;
2684 /* Parse context. */
2685 tre_parse_ctx_t parse_ctx;
2687 /* Allocate a stack used throughout the compilation process for various
2689 stack = tre_stack_new(512, 10240, 128);
2692 /* Allocate a fast memory allocator. */
2693 mem = tre_mem_new();
2696 tre_stack_destroy(stack);
2700 /* Parse the regexp. */
2701 memset(&parse_ctx, 0, sizeof(parse_ctx));
2702 parse_ctx.mem = mem;
2703 parse_ctx.stack = stack;
2704 parse_ctx.re = regex;
2705 parse_ctx.cflags = cflags;
2706 parse_ctx.max_backref = -1;
2707 errcode = tre_parse(&parse_ctx);
2708 if (errcode != REG_OK)
2709 ERROR_EXIT(errcode);
2710 preg->re_nsub = parse_ctx.submatch_id - 1;
2714 tre_ast_print(tree);
2715 #endif /* TRE_DEBUG */
2717 /* Referring to nonexistent subexpressions is illegal. */
2718 if (parse_ctx.max_backref > (int)preg->re_nsub)
2719 ERROR_EXIT(REG_ESUBREG);
2721 /* Allocate the TNFA struct. */
2722 tnfa = xcalloc(1, sizeof(tre_tnfa_t));
2724 ERROR_EXIT(REG_ESPACE);
2725 tnfa->have_backrefs = parse_ctx.max_backref >= 0;
2726 tnfa->have_approx = 0;
2727 tnfa->num_submatches = parse_ctx.submatch_id;
2729 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
2730 regexp does not have back references, this can be skipped. */
2731 if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
2734 /* Figure out how many tags we will need. */
2735 errcode = tre_add_tags(NULL, stack, tree, tnfa);
2736 if (errcode != REG_OK)
2737 ERROR_EXIT(errcode);
2739 if (tnfa->num_tags > 0)
2741 tag_directions = xmalloc(sizeof(*tag_directions)
2742 * (tnfa->num_tags + 1));
2743 if (tag_directions == NULL)
2744 ERROR_EXIT(REG_ESPACE);
2745 tnfa->tag_directions = tag_directions;
2746 memset(tag_directions, -1,
2747 sizeof(*tag_directions) * (tnfa->num_tags + 1));
2749 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
2750 sizeof(*tnfa->minimal_tags));
2751 if (tnfa->minimal_tags == NULL)
2752 ERROR_EXIT(REG_ESPACE);
2754 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
2755 sizeof(*submatch_data));
2756 if (submatch_data == NULL)
2757 ERROR_EXIT(REG_ESPACE);
2758 tnfa->submatch_data = submatch_data;
2760 errcode = tre_add_tags(mem, stack, tree, tnfa);
2761 if (errcode != REG_OK)
2762 ERROR_EXIT(errcode);
2766 /* Expand iteration nodes. */
2767 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
2769 if (errcode != REG_OK)
2770 ERROR_EXIT(errcode);
2772 /* Add a dummy node for the final state.
2773 XXX - For certain patterns this dummy node can be optimized away,
2774 for example "a*" or "ab*". Figure out a simple way to detect
2775 this possibility. */
2777 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
2778 if (tmp_ast_r == NULL)
2779 ERROR_EXIT(REG_ESPACE);
2781 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
2783 ERROR_EXIT(REG_ESPACE);
2785 errcode = tre_compute_nfl(mem, stack, tree);
2786 if (errcode != REG_OK)
2787 ERROR_EXIT(errcode);
2789 counts = xmalloc(sizeof(int) * parse_ctx.position);
2791 ERROR_EXIT(REG_ESPACE);
2793 offs = xmalloc(sizeof(int) * parse_ctx.position);
2795 ERROR_EXIT(REG_ESPACE);
2797 for (i = 0; i < parse_ctx.position; i++)
2799 tre_ast_to_tnfa(tree, NULL, counts, NULL);
2802 for (i = 0; i < parse_ctx.position; i++)
2805 add += counts[i] + 1;
2808 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
2809 if (transitions == NULL)
2810 ERROR_EXIT(REG_ESPACE);
2811 tnfa->transitions = transitions;
2812 tnfa->num_transitions = add;
2814 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2815 if (errcode != REG_OK)
2816 ERROR_EXIT(errcode);
2818 tnfa->firstpos_chars = NULL;
2822 while (p->position >= 0)
2828 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2829 if (initial == NULL)
2830 ERROR_EXIT(REG_ESPACE);
2831 tnfa->initial = initial;
2834 for (p = tree->firstpos; p->position >= 0; p++)
2836 initial[i].state = transitions + offs[p->position];
2837 initial[i].state_id = p->position;
2838 initial[i].tags = NULL;
2839 /* Copy the arrays p->tags, and p->params, they are allocated
2840 from a tre_mem object. */
2844 for (j = 0; p->tags[j] >= 0; j++);
2845 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
2846 if (!initial[i].tags)
2847 ERROR_EXIT(REG_ESPACE);
2848 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2850 initial[i].assertions = p->assertions;
2853 initial[i].state = NULL;
2855 tnfa->num_transitions = add;
2856 tnfa->final = transitions + offs[tree->lastpos[0].position];
2857 tnfa->num_states = parse_ctx.position;
2858 tnfa->cflags = cflags;
2860 tre_mem_destroy(mem);
2861 tre_stack_destroy(stack);
2865 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2869 /* Free everything that was allocated and return the error code. */
2870 tre_mem_destroy(mem);
2872 tre_stack_destroy(stack);
2877 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2886 regfree(regex_t *preg)
2890 tre_tnfa_transition_t *trans;
2892 tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2896 for (i = 0; i < tnfa->num_transitions; i++)
2897 if (tnfa->transitions[i].state)
2899 if (tnfa->transitions[i].tags)
2900 xfree(tnfa->transitions[i].tags);
2901 if (tnfa->transitions[i].neg_classes)
2902 xfree(tnfa->transitions[i].neg_classes);
2904 if (tnfa->transitions)
2905 xfree(tnfa->transitions);
2909 for (trans = tnfa->initial; trans->state; trans++)
2914 xfree(tnfa->initial);
2917 if (tnfa->submatch_data)
2919 for (i = 0; i < tnfa->num_submatches; i++)
2920 if (tnfa->submatch_data[i].parents)
2921 xfree(tnfa->submatch_data[i].parents);
2922 xfree(tnfa->submatch_data);
2925 if (tnfa->tag_directions)
2926 xfree(tnfa->tag_directions);
2927 if (tnfa->firstpos_chars)
2928 xfree(tnfa->firstpos_chars);
2929 if (tnfa->minimal_tags)
2930 xfree(tnfa->minimal_tags);