2 regcomp.c - TRE POSIX compatible regex compilation functions.
4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 /***********************************************************************
45 ***********************************************************************/
54 tre_ctype_t *neg_classes;
59 /***********************************************************************
60 from tre-ast.c and tre-ast.h
61 ***********************************************************************/
63 /* The different AST node types. */
71 /* Special subtypes of TRE_LITERAL. */
72 #define EMPTY -1 /* Empty leaf (denotes empty string). */
73 #define ASSERTION -2 /* Assertion leaf. */
74 #define TAG -3 /* Tag leaf. */
75 #define BACKREF -4 /* Back reference leaf. */
77 #define IS_SPECIAL(x) ((x)->code_min < 0)
78 #define IS_EMPTY(x) ((x)->code_min == EMPTY)
79 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
80 #define IS_TAG(x) ((x)->code_min == TAG)
81 #define IS_BACKREF(x) ((x)->code_min == BACKREF)
84 /* A generic AST node. All AST nodes consist of this node on the top
85 level with `obj' pointing to the actual content. */
87 tre_ast_type_t type; /* Type of the node. */
88 void *obj; /* Pointer to actual node. */
93 tre_pos_and_tags_t *firstpos;
94 tre_pos_and_tags_t *lastpos;
98 /* A "literal" node. These are created for assertions, back references,
99 tags, matching parameter settings, and all expressions that match one
106 tre_ctype_t *neg_classes;
109 /* A "catenation" node. These are created when two regexps are concatenated.
110 If there are more than one subexpressions in sequence, the `left' part
111 holds all but the last, and `right' part holds the last subexpression
112 (catenation is left associative). */
114 tre_ast_node_t *left;
115 tre_ast_node_t *right;
118 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
121 /* Subexpression to match. */
123 /* Minimum number of consecutive matches. */
125 /* Maximum number of consecutive matches. */
127 /* If 0, match as many characters as possible, if 1 match as few as
128 possible. Note that this does not always mean the same thing as
129 matching as many/few repetitions as possible. */
130 unsigned int minimal:1;
133 /* An "union" node. These are created for the "|" operator. */
135 tre_ast_node_t *left;
136 tre_ast_node_t *right;
140 static tre_ast_node_t *
141 tre_ast_new_node(tre_mem_t mem, int type, void *obj)
143 tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node);
149 node->submatch_id = -1;
153 static tre_ast_node_t *
154 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
156 tre_ast_node_t *node;
159 lit = tre_mem_calloc(mem, sizeof *lit);
160 node = tre_ast_new_node(mem, LITERAL, lit);
163 lit->code_min = code_min;
164 lit->code_max = code_max;
165 lit->position = position;
169 static tre_ast_node_t *
170 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal)
172 tre_ast_node_t *node;
173 tre_iteration_t *iter;
175 iter = tre_mem_calloc(mem, sizeof *iter);
176 node = tre_ast_new_node(mem, ITERATION, iter);
182 iter->minimal = minimal;
183 node->num_submatches = arg->num_submatches;
187 static tre_ast_node_t *
188 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
190 tre_ast_node_t *node;
195 un = tre_mem_calloc(mem, sizeof *un);
196 node = tre_ast_new_node(mem, UNION, un);
201 node->num_submatches = left->num_submatches + right->num_submatches;
205 static tre_ast_node_t *
206 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
208 tre_ast_node_t *node;
209 tre_catenation_t *cat;
213 cat = tre_mem_calloc(mem, sizeof *cat);
214 node = tre_ast_new_node(mem, CATENATION, cat);
219 node->num_submatches = left->num_submatches + right->num_submatches;
224 /***********************************************************************
225 from tre-stack.c and tre-stack.h
226 ***********************************************************************/
228 typedef struct tre_stack_rec tre_stack_t;
230 /* Creates a new stack object. `size' is initial size in bytes, `max_size'
231 is maximum size, and `increment' specifies how much more space will be
232 allocated with realloc() if all space gets used up. Returns the stack
233 object or NULL if out of memory. */
235 tre_stack_new(int size, int max_size, int increment);
237 /* Frees the stack object. */
239 tre_stack_destroy(tre_stack_t *s);
241 /* Returns the current number of objects in the stack. */
243 tre_stack_num_objects(tre_stack_t *s);
245 /* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
246 `value' on top of stack `s'. Returns REG_ESPACE if out of memory.
247 This tries to realloc() more space before failing if maximum size
248 has not yet been reached. Returns REG_OK if successful. */
249 #define declare_pushf(typetag, type) \
250 static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
252 declare_pushf(voidptr, void *);
253 declare_pushf(int, int);
255 /* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
256 element off of stack `s' and returns it. The stack must not be
258 #define declare_popf(typetag, type) \
259 static type tre_stack_pop_ ## typetag(tre_stack_t *s)
261 declare_popf(voidptr, void *);
262 declare_popf(int, int);
264 /* Just to save some typing. */
265 #define STACK_PUSH(s, typetag, value) \
268 status = tre_stack_push_ ## typetag(s, value); \
270 while (/*CONSTCOND*/0)
272 #define STACK_PUSHX(s, typetag, value) \
274 status = tre_stack_push_ ## typetag(s, value); \
275 if (status != REG_OK) \
279 #define STACK_PUSHR(s, typetag, value) \
281 reg_errcode_t _status; \
282 _status = tre_stack_push_ ## typetag(s, value); \
283 if (_status != REG_OK) \
287 union tre_stack_item {
292 struct tre_stack_rec {
297 union tre_stack_item *stack;
302 tre_stack_new(int size, int max_size, int increment)
306 s = xmalloc(sizeof(*s));
309 s->stack = xmalloc(sizeof(*s->stack) * size);
310 if (s->stack == NULL)
316 s->max_size = max_size;
317 s->increment = increment;
324 tre_stack_destroy(tre_stack_t *s)
331 tre_stack_num_objects(tre_stack_t *s)
337 tre_stack_push(tre_stack_t *s, union tre_stack_item value)
339 if (s->ptr < s->size)
341 s->stack[s->ptr] = value;
346 if (s->size >= s->max_size)
352 union tre_stack_item *new_buffer;
354 new_size = s->size + s->increment;
355 if (new_size > s->max_size)
356 new_size = s->max_size;
357 new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
358 if (new_buffer == NULL)
362 assert(new_size > s->size);
364 s->stack = new_buffer;
365 tre_stack_push(s, value);
371 #define define_pushf(typetag, type) \
372 declare_pushf(typetag, type) { \
373 union tre_stack_item item; \
374 item.typetag ## _value = value; \
375 return tre_stack_push(s, item); \
378 define_pushf(int, int)
379 define_pushf(voidptr, void *)
381 #define define_popf(typetag, type) \
382 declare_popf(typetag, type) { \
383 return s->stack[--s->ptr].typetag ## _value; \
386 define_popf(int, int)
387 define_popf(voidptr, void *)
390 /***********************************************************************
391 from tre-parse.c and tre-parse.h
392 ***********************************************************************/
396 /* Memory allocator. The AST is allocated using this. */
398 /* Stack used for keeping track of regexp syntax. */
400 /* The parsed node after a parse function returns. */
402 /* Position in the regexp pattern after a parse function returns. */
404 /* The first character of the regexp. */
406 /* Current submatch ID. */
408 /* Current position (number of literal). */
410 /* The highest back reference or -1 if none seen so far. */
412 /* Compilation flags. */
416 /* Some macros for expanding \w, \s, etc. */
417 static const struct {
419 const char *expansion;
421 {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
422 {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
423 {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
424 {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
428 /* Expands a macro delimited by `regex' and `regex_end' to `buf', which
429 must have at least `len' items. Sets buf[0] to zero if the there
430 is no match in `tre_macros'. */
431 static const char *tre_expand_macro(const char *s)
434 for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);
435 return tre_macros[i].expansion;
439 tre_compare_lit(const void *a, const void *b)
441 const tre_literal_t *const *la = a;
442 const tre_literal_t *const *lb = b;
443 /* assumes the range of valid code_min is < INT_MAX */
444 return la[0]->code_min - lb[0]->code_min;
454 static tre_literal_t *tre_new_lit(struct literals *p)
457 if (p->len >= p->cap) {
461 a = xrealloc(p->a, p->cap * sizeof *p->a);
467 *a = tre_mem_calloc(p->mem, sizeof **a);
471 static int add_icase_literals(struct literals *ls, int min, int max)
475 for (c=min; c<=max; ) {
476 /* assumes islower(c) and isupper(c) are exclusive
477 and toupper(c)!=c if islower(c).
478 multiple opposite case characters are not supported */
479 if (tre_islower(c)) {
480 b = e = tre_toupper(c);
481 for (c++, e++; c<=max; c++, e++)
482 if (tre_toupper(c) != e) break;
483 } else if (tre_isupper(c)) {
484 b = e = tre_tolower(c);
485 for (c++, e++; c<=max; c++, e++)
486 if (tre_tolower(c) != e) break;
491 lit = tre_new_lit(ls);
502 /* Maximum number of character classes in a negated bracket expression. */
503 #define MAX_NEG_CLASSES 64
508 tre_ctype_t a[MAX_NEG_CLASSES];
511 // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
515 Bracket = '[' List ']' | '[^' List ']'
516 List = Term | List Term
517 Term = Char | Range | Chclass | Eqclass
518 Range = Char '-' Char | Char '-' '-'
519 Char = Coll | coll_single
521 Coll = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]'
522 Eqclass = '[=' coll_single '=]' | '[=' coll_multi '=]'
523 Chclass = '[:' class ':]'
525 coll_single is a single char collating element but it can be
526 '-' only at the beginning or end of a List and
527 ']' only at the beginning of a List and
528 '^' anywhere except after the openning '['
531 static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, struct literals *ls, struct neg *neg)
533 const char *start = s;
541 len = mbtowc(&wc, s, -1);
543 return *s ? REG_BADPAT : REG_EBRACK;
544 if (*s == ']' && s != start) {
548 if (*s == '-' && s != start && s[1] != ']' &&
549 /* extension: [a-z--@] is accepted as [a-z]|[--@] */
550 (s[1] != '-' || s[2] == ']'))
552 if (*s == '[' && (s[1] == '.' || s[1] == '='))
553 /* collating symbols and equivalence classes are not supported */
555 if (*s == '[' && s[1] == ':') {
556 char tmp[CHARCLASS_NAME_MAX+1];
558 for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) {
562 class = tre_ctype(tmp);
566 if (!class || s[len+1] != ']')
574 if (*s == '-' && s[1] != ']') {
576 len = mbtowc(&wc, s, -1);
578 /* XXX - Should use collation order instead of
579 encoding values in character ranges. */
580 if (len <= 0 || min > max)
586 if (class && neg->negate) {
587 if (neg->len >= MAX_NEG_CLASSES)
589 neg->a[neg->len++] = class;
591 tre_literal_t *lit = tre_new_lit(ls);
599 /* Add opposite-case codepoints if REG_ICASE is present.
600 It seems that POSIX requires that bracket negation
601 should happen before case-folding, but most practical
602 implementations do it the other way around. Changing
603 the order would need efficient representation of
604 case-fold ranges and bracket range sets even with
605 simple patterns so this is ok for now. */
606 if (ctx->cflags & REG_ICASE && !class)
607 if (add_icase_literals(ls, min, max))
613 static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s)
615 int i, max, min, negmax, negmin;
616 tre_ast_node_t *node = 0, *n;
626 ls.a = xmalloc(ls.cap * sizeof *ls.a);
630 neg.negate = *s == '^';
634 err = parse_bracket_terms(ctx, s, &ls, &neg);
636 goto parse_bracket_done;
639 /* Sort the array if we need to negate it. */
640 qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
641 /* extra lit for the last negated range */
642 lit = tre_new_lit(&ls);
645 goto parse_bracket_done;
647 lit->code_min = TRE_CHAR_MAX+1;
648 lit->code_max = TRE_CHAR_MAX+1;
650 /* negated classes */
652 nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a);
655 goto parse_bracket_done;
657 memcpy(nc, neg.a, neg.len*sizeof *neg.a);
662 /* Build a union of the items in the array, negated if necessary. */
664 for (i = 0; i < ls.len; i++) {
671 negmin = MAX(max + 1, negmin);
675 lit->code_min = negmin;
676 lit->code_max = negmax;
679 lit->position = ctx->position;
680 lit->neg_classes = nc;
681 n = tre_ast_new_node(ctx->mem, LITERAL, lit);
682 node = tre_ast_new_union(ctx->mem, node, n);
696 static const char *parse_dup_count(const char *s, int *n)
703 *n = 10 * *n + (*s - '0');
705 if (!isdigit(*s) || *n > RE_DUP_MAX)
711 static const char *parse_dup(const char *s, int ere, int *pmin, int *pmax)
715 s = parse_dup_count(s, &min);
717 s = parse_dup_count(s+1, &max);
722 (max < min && max >= 0) ||
726 (!ere && *s++ != '\\') ||
735 static int hexval(unsigned c)
737 if (c-'0'<10) return c-'0';
739 if (c-'a'<6) return c-'a'+10;
743 static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int subid)
745 if (node->submatch_id >= 0) {
746 tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
749 n = tre_ast_new_catenation(ctx->mem, n, node);
752 n->num_submatches = node->num_submatches;
755 node->submatch_id = subid;
756 node->num_submatches++;
763 Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^' Branch '$'
764 Branch = Atom | Branch Atom
765 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch '\)' | back_ref
766 Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count '\}'
768 (leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
771 Regex = Branch | Regex '|' Branch
772 Branch = Atom | Branch Atom
773 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex ')' | '^' | '$'
774 Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count ',' Count '}'
776 (a*+?, ^*, $+, \X, {, (|a) are unspecified)
779 static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
781 int len, ere = ctx->cflags & REG_EXTENDED;
783 tre_ast_node_t *node;
787 return parse_bracket(ctx, s+1);
789 p = tre_expand_macro(s+1);
791 /* assume \X expansion is a single atom */
792 reg_errcode_t err = parse_atom(ctx, p);
796 /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
801 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
804 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
807 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
810 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
820 for (i=0; i<len && v<0x110000; i++) {
831 node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
837 /* extension: treat \+, \? as repetitions in BRE */
838 /* reject repetitions after empty expression in BRE */
842 /* extension: treat \| as alternation in BRE */
844 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
850 if (!ere && (unsigned)*s-'1' < 9) {
853 node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++);
854 ctx->max_backref = MAX(val, ctx->max_backref);
856 /* extension: accept unknown escaped char
864 if (ctx->cflags & REG_NEWLINE) {
865 tre_ast_node_t *tmp1, *tmp2;
866 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++);
867 tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX, ctx->position++);
869 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
873 node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++);
878 /* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
879 if (!ere && s != ctx->re)
881 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
885 /* '$' is special everywhere in EREs, and in the end of the string in BREs. */
888 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
895 /* reject repetitions after empty expression in ERE */
902 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
906 len = mbtowc(&wc, s, -1);
909 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) {
910 tre_ast_node_t *tmp1, *tmp2;
911 /* multiple opposite case characters are not supported */
912 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc), ctx->position);
913 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc), ctx->position);
915 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
919 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
933 #define PUSHPTR(err, s, v) do { \
934 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \
938 #define PUSHINT(err, s, v) do { \
939 if ((err = tre_stack_push_int(s, v)) != REG_OK) \
943 static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
945 tre_ast_node_t *nbranch=0, *nunion=0;
946 int ere = ctx->cflags & REG_EXTENDED;
947 const char *s = ctx->re;
951 tre_stack_t *stack = ctx->stack;
953 PUSHINT(err, stack, subid++);
955 if ((!ere && *s == '\\' && s[1] == '(') ||
956 (ere && *s == '(')) {
957 PUSHPTR(err, stack, nunion);
958 PUSHPTR(err, stack, nbranch);
959 PUSHINT(err, stack, subid++);
964 nbranch = nunion = 0;
967 if ((!ere && *s == '\\' && s[1] == ')') ||
968 (ere && *s == ')' && depth)) {
969 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
973 err = parse_atom(ctx, s);
983 if (*s!='\\' && *s!='*') {
986 if (*s!='+' && *s!='?' && *s!='{')
991 /* extension: treat \+, \? as repetitions in BRE */
992 if (*s=='\\' && s[1]!='+' && s[1]!='?' && s[1]!='{')
997 /* extension: multiple consecutive *+?{,} is unspecified,
998 but (a+)+ has to be supported so accepting a++ makes
999 sense, note however that the RE_DUP_MAX limit can be
1000 circumvented: (a{255}){255} uses a lot of memory.. */
1002 s = parse_dup(s+1, ere, &min, &max);
1015 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
1017 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
1022 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
1023 if ((ere && *s == '|') ||
1024 (ere && *s == ')' && depth) ||
1025 (!ere && *s == '\\' && s[1] == ')') ||
1026 /* extension: treat \| as alternation in BRE */
1027 (!ere && *s == '\\' && s[1] == '|') ||
1029 /* extension: empty branch is unspecified (), (|a), (a|)
1030 here they are not rejected but match on empty string */
1032 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
1035 if (c == '\\' && s[1] == '|') {
1037 } else if (c == '|') {
1041 if (!depth) return REG_EPAREN;
1043 } else if (c == ')')
1046 err = marksub(ctx, nunion, tre_stack_pop_int(stack));
1049 if (!c && depth<0) {
1050 ctx->submatch_id = subid;
1055 nbranch = tre_stack_pop_voidptr(stack);
1056 nunion = tre_stack_pop_voidptr(stack);
1064 /***********************************************************************
1066 ***********************************************************************/
1071 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1076 Algorithms to setup tags so that submatch addressing can be done.
1080 /* Inserts a catenation node to the root of the tree given in `node'.
1081 As the left child a new tag with number `tag_id' to `node' is added,
1082 and the right child is the old root. */
1083 static reg_errcode_t
1084 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1086 tre_catenation_t *c;
1088 c = tre_mem_alloc(mem, sizeof(*c));
1091 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
1092 if (c->left == NULL)
1094 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1095 if (c->right == NULL)
1098 c->right->obj = node->obj;
1099 c->right->type = node->type;
1100 c->right->nullable = -1;
1101 c->right->submatch_id = -1;
1102 c->right->firstpos = NULL;
1103 c->right->lastpos = NULL;
1104 c->right->num_tags = 0;
1106 node->type = CATENATION;
1110 /* Inserts a catenation node to the root of the tree given in `node'.
1111 As the right child a new tag with number `tag_id' to `node' is added,
1112 and the left child is the old root. */
1113 static reg_errcode_t
1114 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1116 tre_catenation_t *c;
1118 c = tre_mem_alloc(mem, sizeof(*c));
1121 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
1122 if (c->right == NULL)
1124 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1125 if (c->left == NULL)
1128 c->left->obj = node->obj;
1129 c->left->type = node->type;
1130 c->left->nullable = -1;
1131 c->left->submatch_id = -1;
1132 c->left->firstpos = NULL;
1133 c->left->lastpos = NULL;
1134 c->left->num_tags = 0;
1136 node->type = CATENATION;
1142 ADDTAGS_AFTER_ITERATION,
1143 ADDTAGS_AFTER_UNION_LEFT,
1144 ADDTAGS_AFTER_UNION_RIGHT,
1145 ADDTAGS_AFTER_CAT_LEFT,
1146 ADDTAGS_AFTER_CAT_RIGHT,
1147 ADDTAGS_SET_SUBMATCH_END
1148 } tre_addtags_symbol_t;
1157 /* Go through `regset' and set submatch data for submatches that are
1160 tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
1164 for (i = 0; regset[i] >= 0; i++)
1166 int id = regset[i] / 2;
1167 int start = !(regset[i] % 2);
1169 tnfa->submatch_data[id].so_tag = tag;
1171 tnfa->submatch_data[id].eo_tag = tag;
1177 /* Adds tags to appropriate locations in the parse tree in `tree', so that
1178 subexpressions marked for submatch addressing can be traced. */
1179 static reg_errcode_t
1180 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
1183 reg_errcode_t status = REG_OK;
1184 tre_addtags_symbol_t symbol;
1185 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
1186 int bottom = tre_stack_num_objects(stack);
1187 /* True for first pass (counting number of needed tags) */
1188 int first_pass = (mem == NULL || tnfa == NULL);
1189 int *regset, *orig_regset;
1190 int num_tags = 0; /* Total number of tags. */
1191 int num_minimals = 0; /* Number of special minimal tags. */
1192 int tag = 0; /* The tag that is to be added next. */
1193 int next_tag = 1; /* Next tag to use after this one. */
1194 int *parents; /* Stack of submatches the current submatch is
1196 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
1197 tre_tag_states_t *saved_states;
1199 tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
1203 tnfa->minimal_tags[0] = -1;
1206 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
1210 orig_regset = regset;
1212 parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
1213 if (parents == NULL)
1220 saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
1221 if (saved_states == NULL)
1230 for (i = 0; i <= tnfa->num_submatches; i++)
1231 saved_states[i].tag = -1;
1234 STACK_PUSH(stack, voidptr, node);
1235 STACK_PUSH(stack, int, ADDTAGS_RECURSE);
1237 while (tre_stack_num_objects(stack) > bottom)
1239 if (status != REG_OK)
1242 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
1246 case ADDTAGS_SET_SUBMATCH_END:
1248 int id = tre_stack_pop_int(stack);
1251 /* Add end of this submatch to regset. */
1252 for (i = 0; regset[i] >= 0; i++);
1253 regset[i] = id * 2 + 1;
1256 /* Pop this submatch from the parents stack. */
1257 for (i = 0; parents[i] >= 0; i++);
1258 parents[i - 1] = -1;
1262 case ADDTAGS_RECURSE:
1263 node = tre_stack_pop_voidptr(stack);
1265 if (node->submatch_id >= 0)
1267 int id = node->submatch_id;
1271 /* Add start of this submatch to regset. */
1272 for (i = 0; regset[i] >= 0; i++);
1278 for (i = 0; parents[i] >= 0; i++);
1279 tnfa->submatch_data[id].parents = NULL;
1282 int *p = xmalloc(sizeof(*p) * (i + 1));
1285 status = REG_ESPACE;
1288 assert(tnfa->submatch_data[id].parents == NULL);
1289 tnfa->submatch_data[id].parents = p;
1290 for (i = 0; parents[i] >= 0; i++)
1296 /* Add end of this submatch to regset after processing this
1298 STACK_PUSHX(stack, int, node->submatch_id);
1299 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
1306 tre_literal_t *lit = node->obj;
1308 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1313 /* Regset is not empty, so add a tag before the
1314 literal or backref. */
1317 status = tre_add_tag_left(mem, node, tag);
1318 tnfa->tag_directions[tag] = direction;
1319 if (minimal_tag >= 0)
1321 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1322 tnfa->minimal_tags[i] = tag;
1323 tnfa->minimal_tags[i + 1] = minimal_tag;
1324 tnfa->minimal_tags[i + 2] = -1;
1328 tre_purge_regset(regset, tnfa, tag);
1343 assert(!IS_TAG(lit));
1349 tre_catenation_t *cat = node->obj;
1350 tre_ast_node_t *left = cat->left;
1351 tre_ast_node_t *right = cat->right;
1352 int reserved_tag = -1;
1355 /* After processing right child. */
1356 STACK_PUSHX(stack, voidptr, node);
1357 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
1359 /* Process right child. */
1360 STACK_PUSHX(stack, voidptr, right);
1361 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1363 /* After processing left child. */
1364 STACK_PUSHX(stack, int, next_tag + left->num_tags);
1365 if (left->num_tags > 0 && right->num_tags > 0)
1367 /* Reserve the next tag to the right child. */
1368 reserved_tag = next_tag;
1371 STACK_PUSHX(stack, int, reserved_tag);
1372 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
1374 /* Process left child. */
1375 STACK_PUSHX(stack, voidptr, left);
1376 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1382 tre_iteration_t *iter = node->obj;
1386 STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
1390 STACK_PUSHX(stack, int, tag);
1391 STACK_PUSHX(stack, int, iter->minimal);
1393 STACK_PUSHX(stack, voidptr, node);
1394 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
1396 STACK_PUSHX(stack, voidptr, iter->arg);
1397 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1399 /* Regset is not empty, so add a tag here. */
1400 if (regset[0] >= 0 || iter->minimal)
1405 status = tre_add_tag_left(mem, node, tag);
1407 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
1409 tnfa->tag_directions[tag] = direction;
1410 if (minimal_tag >= 0)
1412 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1413 tnfa->minimal_tags[i] = tag;
1414 tnfa->minimal_tags[i + 1] = minimal_tag;
1415 tnfa->minimal_tags[i + 2] = -1;
1419 tre_purge_regset(regset, tnfa, tag);
1427 direction = TRE_TAG_MINIMIZE;
1432 tre_union_t *uni = node->obj;
1433 tre_ast_node_t *left = uni->left;
1434 tre_ast_node_t *right = uni->right;
1440 left_tag = next_tag;
1441 right_tag = next_tag + 1;
1446 right_tag = next_tag;
1449 /* After processing right child. */
1450 STACK_PUSHX(stack, int, right_tag);
1451 STACK_PUSHX(stack, int, left_tag);
1452 STACK_PUSHX(stack, voidptr, regset);
1453 STACK_PUSHX(stack, int, regset[0] >= 0);
1454 STACK_PUSHX(stack, voidptr, node);
1455 STACK_PUSHX(stack, voidptr, right);
1456 STACK_PUSHX(stack, voidptr, left);
1457 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
1459 /* Process right child. */
1460 STACK_PUSHX(stack, voidptr, right);
1461 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1463 /* After processing left child. */
1464 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
1466 /* Process left child. */
1467 STACK_PUSHX(stack, voidptr, left);
1468 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1470 /* Regset is not empty, so add a tag here. */
1476 status = tre_add_tag_left(mem, node, tag);
1477 tnfa->tag_directions[tag] = direction;
1478 if (minimal_tag >= 0)
1480 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1481 tnfa->minimal_tags[i] = tag;
1482 tnfa->minimal_tags[i + 1] = minimal_tag;
1483 tnfa->minimal_tags[i + 2] = -1;
1487 tre_purge_regset(regset, tnfa, tag);
1496 if (node->num_submatches > 0)
1498 /* The next two tags are reserved for markers. */
1508 if (node->submatch_id >= 0)
1511 /* Push this submatch on the parents stack. */
1512 for (i = 0; parents[i] >= 0; i++);
1513 parents[i] = node->submatch_id;
1514 parents[i + 1] = -1;
1517 break; /* end case: ADDTAGS_RECURSE */
1519 case ADDTAGS_AFTER_ITERATION:
1523 node = tre_stack_pop_voidptr(stack);
1526 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
1527 + tre_stack_pop_int(stack);
1532 minimal = tre_stack_pop_int(stack);
1533 enter_tag = tre_stack_pop_int(stack);
1535 minimal_tag = enter_tag;
1541 direction = TRE_TAG_MINIMIZE;
1543 direction = TRE_TAG_MAXIMIZE;
1548 case ADDTAGS_AFTER_CAT_LEFT:
1550 int new_tag = tre_stack_pop_int(stack);
1551 next_tag = tre_stack_pop_int(stack);
1559 case ADDTAGS_AFTER_CAT_RIGHT:
1560 node = tre_stack_pop_voidptr(stack);
1562 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
1563 + ((tre_catenation_t *)node->obj)->right->num_tags;
1566 case ADDTAGS_AFTER_UNION_LEFT:
1567 /* Lift the bottom of the `regset' array so that when processing
1568 the right operand the items currently in the array are
1569 invisible. The original bottom was saved at ADDTAGS_UNION and
1570 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1571 while (*regset >= 0)
1575 case ADDTAGS_AFTER_UNION_RIGHT:
1577 int added_tags, tag_left, tag_right;
1578 tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
1579 tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
1580 node = tre_stack_pop_voidptr(stack);
1581 added_tags = tre_stack_pop_int(stack);
1584 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
1585 + ((tre_union_t *)node->obj)->right->num_tags + added_tags
1586 + ((node->num_submatches > 0) ? 2 : 0);
1588 regset = tre_stack_pop_voidptr(stack);
1589 tag_left = tre_stack_pop_int(stack);
1590 tag_right = tre_stack_pop_int(stack);
1592 /* Add tags after both children, the left child gets a smaller
1593 tag than the right child. This guarantees that we prefer
1594 the left child over the right child. */
1595 /* XXX - This is not always necessary (if the children have
1596 tags which must be seen for every match of that child). */
1597 /* XXX - Check if this is the only place where tre_add_tag_right
1598 is used. If so, use tre_add_tag_left (putting the tag before
1599 the child as opposed after the child) and throw away
1600 tre_add_tag_right. */
1601 if (node->num_submatches > 0)
1605 status = tre_add_tag_right(mem, left, tag_left);
1606 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1607 if (status == REG_OK)
1608 status = tre_add_tag_right(mem, right, tag_right);
1609 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1613 direction = TRE_TAG_MAXIMIZE;
1621 } /* end switch(symbol) */
1622 } /* end while(tre_stack_num_objects(stack) > bottom) */
1625 tre_purge_regset(regset, tnfa, tag);
1627 if (!first_pass && minimal_tag >= 0)
1630 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1631 tnfa->minimal_tags[i] = tag;
1632 tnfa->minimal_tags[i + 1] = minimal_tag;
1633 tnfa->minimal_tags[i + 2] = -1;
1638 assert(tree->num_tags == num_tags);
1639 tnfa->end_tag = num_tags;
1640 tnfa->num_tags = num_tags;
1641 tnfa->num_minimals = num_minimals;
1644 xfree(saved_states);
1651 AST to TNFA compilation routines.
1657 } tre_copyast_symbol_t;
1659 /* Flags for tre_copy_ast(). */
1660 #define COPY_REMOVE_TAGS 1
1661 #define COPY_MAXIMIZE_FIRST_TAG 2
1663 static reg_errcode_t
1664 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1665 int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1666 tre_ast_node_t **copy, int *max_pos)
1668 reg_errcode_t status = REG_OK;
1669 int bottom = tre_stack_num_objects(stack);
1672 tre_ast_node_t **result = copy;
1673 tre_copyast_symbol_t symbol;
1675 STACK_PUSH(stack, voidptr, ast);
1676 STACK_PUSH(stack, int, COPY_RECURSE);
1678 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1680 tre_ast_node_t *node;
1681 if (status != REG_OK)
1684 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1687 case COPY_SET_RESULT_PTR:
1688 result = tre_stack_pop_voidptr(stack);
1691 node = tre_stack_pop_voidptr(stack);
1696 tre_literal_t *lit = node->obj;
1697 int pos = lit->position;
1698 int min = lit->code_min;
1699 int max = lit->code_max;
1700 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1702 /* XXX - e.g. [ab] has only one position but two
1703 nodes, so we are creating holes in the state space
1704 here. Not fatal, just wastes memory. */
1708 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1710 /* Change this tag to empty. */
1714 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1717 /* Maximize the first tag. */
1718 tag_directions[max] = TRE_TAG_MAXIMIZE;
1721 *result = tre_ast_new_literal(mem, min, max, pos);
1722 if (*result == NULL)
1723 status = REG_ESPACE;
1725 tre_literal_t *p = (*result)->obj;
1726 p->class = lit->class;
1727 p->neg_classes = lit->neg_classes;
1736 tre_union_t *uni = node->obj;
1738 *result = tre_ast_new_union(mem, uni->left, uni->right);
1739 if (*result == NULL)
1741 status = REG_ESPACE;
1744 tmp = (*result)->obj;
1745 result = &tmp->left;
1746 STACK_PUSHX(stack, voidptr, uni->right);
1747 STACK_PUSHX(stack, int, COPY_RECURSE);
1748 STACK_PUSHX(stack, voidptr, &tmp->right);
1749 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1750 STACK_PUSHX(stack, voidptr, uni->left);
1751 STACK_PUSHX(stack, int, COPY_RECURSE);
1756 tre_catenation_t *cat = node->obj;
1757 tre_catenation_t *tmp;
1758 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1759 if (*result == NULL)
1761 status = REG_ESPACE;
1764 tmp = (*result)->obj;
1767 result = &tmp->left;
1769 STACK_PUSHX(stack, voidptr, cat->right);
1770 STACK_PUSHX(stack, int, COPY_RECURSE);
1771 STACK_PUSHX(stack, voidptr, &tmp->right);
1772 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1773 STACK_PUSHX(stack, voidptr, cat->left);
1774 STACK_PUSHX(stack, int, COPY_RECURSE);
1779 tre_iteration_t *iter = node->obj;
1780 STACK_PUSHX(stack, voidptr, iter->arg);
1781 STACK_PUSHX(stack, int, COPY_RECURSE);
1782 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1783 iter->max, iter->minimal);
1784 if (*result == NULL)
1786 status = REG_ESPACE;
1789 iter = (*result)->obj;
1790 result = &iter->arg;
1800 *pos_add += num_copied;
1807 } tre_expand_ast_symbol_t;
1809 /* Expands each iteration node that has a finite nonzero minimum or maximum
1810 iteration count to a catenated sequence of copies of the node. */
1811 static reg_errcode_t
1812 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1813 int *position, tre_tag_direction_t *tag_directions)
1815 reg_errcode_t status = REG_OK;
1816 int bottom = tre_stack_num_objects(stack);
1818 int pos_add_total = 0;
1822 STACK_PUSHR(stack, voidptr, ast);
1823 STACK_PUSHR(stack, int, EXPAND_RECURSE);
1824 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1826 tre_ast_node_t *node;
1827 tre_expand_ast_symbol_t symbol;
1829 if (status != REG_OK)
1832 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1833 node = tre_stack_pop_voidptr(stack);
1836 case EXPAND_RECURSE:
1841 tre_literal_t *lit= node->obj;
1842 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1844 lit->position += pos_add;
1845 if (lit->position > max_pos)
1846 max_pos = lit->position;
1852 tre_union_t *uni = node->obj;
1853 STACK_PUSHX(stack, voidptr, uni->right);
1854 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1855 STACK_PUSHX(stack, voidptr, uni->left);
1856 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1861 tre_catenation_t *cat = node->obj;
1862 STACK_PUSHX(stack, voidptr, cat->right);
1863 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1864 STACK_PUSHX(stack, voidptr, cat->left);
1865 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1870 tre_iteration_t *iter = node->obj;
1871 STACK_PUSHX(stack, int, pos_add);
1872 STACK_PUSHX(stack, voidptr, node);
1873 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1874 STACK_PUSHX(stack, voidptr, iter->arg);
1875 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1876 /* If we are going to expand this node at EXPAND_AFTER_ITER
1877 then don't increase the `pos' fields of the nodes now, it
1878 will get done when expanding. */
1879 if (iter->min > 1 || iter->max > 1)
1889 case EXPAND_AFTER_ITER:
1891 tre_iteration_t *iter = node->obj;
1893 pos_add = tre_stack_pop_int(stack);
1894 pos_add_last = pos_add;
1895 if (iter->min > 1 || iter->max > 1)
1897 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1899 int pos_add_save = pos_add;
1901 /* Create a catenated sequence of copies of the node. */
1902 for (j = 0; j < iter->min; j++)
1904 tre_ast_node_t *copy;
1905 /* Remove tags from all but the last copy. */
1906 int flags = ((j + 1 < iter->min)
1908 : COPY_MAXIMIZE_FIRST_TAG);
1909 pos_add_save = pos_add;
1910 status = tre_copy_ast(mem, stack, iter->arg, flags,
1911 &pos_add, tag_directions, ©,
1913 if (status != REG_OK)
1916 seq1 = tre_ast_new_catenation(mem, seq1, copy);
1923 if (iter->max == -1)
1925 /* No upper limit. */
1926 pos_add_save = pos_add;
1927 status = tre_copy_ast(mem, stack, iter->arg, 0,
1928 &pos_add, NULL, &seq2, &max_pos);
1929 if (status != REG_OK)
1931 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1937 for (j = iter->min; j < iter->max; j++)
1939 tre_ast_node_t *tmp, *copy;
1940 pos_add_save = pos_add;
1941 status = tre_copy_ast(mem, stack, iter->arg, 0,
1942 &pos_add, NULL, ©, &max_pos);
1943 if (status != REG_OK)
1946 seq2 = tre_ast_new_catenation(mem, copy, seq2);
1951 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1954 seq2 = tre_ast_new_union(mem, tmp, seq2);
1960 pos_add = pos_add_save;
1963 else if (seq2 != NULL)
1964 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1967 node->obj = seq1->obj;
1968 node->type = seq1->type;
1972 pos_add_total += pos_add - pos_add_last;
1973 if (iter_depth == 0)
1974 pos_add = pos_add_total;
1984 *position += pos_add_total;
1986 /* `max_pos' should never be larger than `*position' if the above
1987 code works, but just an extra safeguard let's make sure
1988 `*position' is set large enough so enough memory will be
1989 allocated for the transition table. */
1990 if (max_pos > *position)
1991 *position = max_pos;
1996 static tre_pos_and_tags_t *
1997 tre_set_empty(tre_mem_t mem)
1999 tre_pos_and_tags_t *new_set;
2001 new_set = tre_mem_calloc(mem, sizeof(*new_set));
2002 if (new_set == NULL)
2005 new_set[0].position = -1;
2006 new_set[0].code_min = -1;
2007 new_set[0].code_max = -1;
2012 static tre_pos_and_tags_t *
2013 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2014 tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
2016 tre_pos_and_tags_t *new_set;
2018 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2019 if (new_set == NULL)
2022 new_set[0].position = position;
2023 new_set[0].code_min = code_min;
2024 new_set[0].code_max = code_max;
2025 new_set[0].class = class;
2026 new_set[0].neg_classes = neg_classes;
2027 new_set[0].backref = backref;
2028 new_set[1].position = -1;
2029 new_set[1].code_min = -1;
2030 new_set[1].code_max = -1;
2035 static tre_pos_and_tags_t *
2036 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2037 int *tags, int assertions)
2040 tre_pos_and_tags_t *new_set;
2044 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2045 for (s1 = 0; set1[s1].position >= 0; s1++);
2046 for (s2 = 0; set2[s2].position >= 0; s2++);
2047 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2051 for (s1 = 0; set1[s1].position >= 0; s1++)
2053 new_set[s1].position = set1[s1].position;
2054 new_set[s1].code_min = set1[s1].code_min;
2055 new_set[s1].code_max = set1[s1].code_max;
2056 new_set[s1].assertions = set1[s1].assertions | assertions;
2057 new_set[s1].class = set1[s1].class;
2058 new_set[s1].neg_classes = set1[s1].neg_classes;
2059 new_set[s1].backref = set1[s1].backref;
2060 if (set1[s1].tags == NULL && tags == NULL)
2061 new_set[s1].tags = NULL;
2064 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2065 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2066 * (i + num_tags + 1)));
2067 if (new_tags == NULL)
2069 for (j = 0; j < i; j++)
2070 new_tags[j] = set1[s1].tags[j];
2071 for (i = 0; i < num_tags; i++)
2072 new_tags[j + i] = tags[i];
2073 new_tags[j + i] = -1;
2074 new_set[s1].tags = new_tags;
2078 for (s2 = 0; set2[s2].position >= 0; s2++)
2080 new_set[s1 + s2].position = set2[s2].position;
2081 new_set[s1 + s2].code_min = set2[s2].code_min;
2082 new_set[s1 + s2].code_max = set2[s2].code_max;
2083 /* XXX - why not | assertions here as well? */
2084 new_set[s1 + s2].assertions = set2[s2].assertions;
2085 new_set[s1 + s2].class = set2[s2].class;
2086 new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
2087 new_set[s1 + s2].backref = set2[s2].backref;
2088 if (set2[s2].tags == NULL)
2089 new_set[s1 + s2].tags = NULL;
2092 for (i = 0; set2[s2].tags[i] >= 0; i++);
2093 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2094 if (new_tags == NULL)
2096 for (j = 0; j < i; j++)
2097 new_tags[j] = set2[s2].tags[j];
2099 new_set[s1 + s2].tags = new_tags;
2102 new_set[s1 + s2].position = -1;
2106 /* Finds the empty path through `node' which is the one that should be
2107 taken according to POSIX.2 rules, and adds the tags on that path to
2108 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2109 set to the number of tags seen on the path. */
2110 static reg_errcode_t
2111 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2112 int *assertions, int *num_tags_seen)
2116 tre_catenation_t *cat;
2117 tre_iteration_t *iter;
2119 int bottom = tre_stack_num_objects(stack);
2120 reg_errcode_t status = REG_OK;
2124 status = tre_stack_push_voidptr(stack, node);
2126 /* Walk through the tree recursively. */
2127 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2129 node = tre_stack_pop_voidptr(stack);
2134 lit = (tre_literal_t *)node->obj;
2135 switch (lit->code_min)
2138 if (lit->code_max >= 0)
2142 /* Add the tag to `tags'. */
2143 for (i = 0; tags[i] >= 0; i++)
2144 if (tags[i] == lit->code_max)
2148 tags[i] = lit->code_max;
2157 assert(lit->code_max >= 1
2158 || lit->code_max <= ASSERT_LAST);
2159 if (assertions != NULL)
2160 *assertions |= lit->code_max;
2171 /* Subexpressions starting earlier take priority over ones
2172 starting later, so we prefer the left subexpression over the
2173 right subexpression. */
2174 uni = (tre_union_t *)node->obj;
2175 if (uni->left->nullable)
2176 STACK_PUSHX(stack, voidptr, uni->left)
2177 else if (uni->right->nullable)
2178 STACK_PUSHX(stack, voidptr, uni->right)
2184 /* The path must go through both children. */
2185 cat = (tre_catenation_t *)node->obj;
2186 assert(cat->left->nullable);
2187 assert(cat->right->nullable);
2188 STACK_PUSHX(stack, voidptr, cat->left);
2189 STACK_PUSHX(stack, voidptr, cat->right);
2193 /* A match with an empty string is preferred over no match at
2194 all, so we go through the argument if possible. */
2195 iter = (tre_iteration_t *)node->obj;
2196 if (iter->arg->nullable)
2197 STACK_PUSHX(stack, voidptr, iter->arg);
2213 NFL_POST_CATENATION,
2215 } tre_nfl_stack_symbol_t;
2218 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2219 the nodes of the AST `tree'. */
2220 static reg_errcode_t
2221 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2223 int bottom = tre_stack_num_objects(stack);
2225 STACK_PUSHR(stack, voidptr, tree);
2226 STACK_PUSHR(stack, int, NFL_RECURSE);
2228 while (tre_stack_num_objects(stack) > bottom)
2230 tre_nfl_stack_symbol_t symbol;
2231 tre_ast_node_t *node;
2233 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2234 node = tre_stack_pop_voidptr(stack);
2242 tre_literal_t *lit = (tre_literal_t *)node->obj;
2243 if (IS_BACKREF(lit))
2245 /* Back references: nullable = false, firstpos = {i},
2248 node->firstpos = tre_set_one(mem, lit->position, 0,
2249 TRE_CHAR_MAX, 0, NULL, -1);
2250 if (!node->firstpos)
2252 node->lastpos = tre_set_one(mem, lit->position, 0,
2253 TRE_CHAR_MAX, 0, NULL,
2254 (int)lit->code_max);
2258 else if (lit->code_min < 0)
2260 /* Tags, empty strings, params, and zero width assertions:
2261 nullable = true, firstpos = {}, and lastpos = {}. */
2263 node->firstpos = tre_set_empty(mem);
2264 if (!node->firstpos)
2266 node->lastpos = tre_set_empty(mem);
2272 /* Literal at position i: nullable = false, firstpos = {i},
2276 tre_set_one(mem, lit->position, (int)lit->code_min,
2277 (int)lit->code_max, 0, NULL, -1);
2278 if (!node->firstpos)
2280 node->lastpos = tre_set_one(mem, lit->position,
2283 lit->class, lit->neg_classes,
2292 /* Compute the attributes for the two subtrees, and after that
2294 STACK_PUSHR(stack, voidptr, node);
2295 STACK_PUSHR(stack, int, NFL_POST_UNION);
2296 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2297 STACK_PUSHR(stack, int, NFL_RECURSE);
2298 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2299 STACK_PUSHR(stack, int, NFL_RECURSE);
2303 /* Compute the attributes for the two subtrees, and after that
2305 STACK_PUSHR(stack, voidptr, node);
2306 STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2307 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2308 STACK_PUSHR(stack, int, NFL_RECURSE);
2309 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2310 STACK_PUSHR(stack, int, NFL_RECURSE);
2314 /* Compute the attributes for the subtree, and after that for
2316 STACK_PUSHR(stack, voidptr, node);
2317 STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2318 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2319 STACK_PUSHR(stack, int, NFL_RECURSE);
2322 break; /* end case: NFL_RECURSE */
2324 case NFL_POST_UNION:
2326 tre_union_t *uni = (tre_union_t *)node->obj;
2327 node->nullable = uni->left->nullable || uni->right->nullable;
2328 node->firstpos = tre_set_union(mem, uni->left->firstpos,
2329 uni->right->firstpos, NULL, 0);
2330 if (!node->firstpos)
2332 node->lastpos = tre_set_union(mem, uni->left->lastpos,
2333 uni->right->lastpos, NULL, 0);
2339 case NFL_POST_ITERATION:
2341 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2343 if (iter->min == 0 || iter->arg->nullable)
2347 node->firstpos = iter->arg->firstpos;
2348 node->lastpos = iter->arg->lastpos;
2352 case NFL_POST_CATENATION:
2354 int num_tags, *tags, assertions;
2355 reg_errcode_t status;
2356 tre_catenation_t *cat = node->obj;
2357 node->nullable = cat->left->nullable && cat->right->nullable;
2359 /* Compute firstpos. */
2360 if (cat->left->nullable)
2362 /* The left side matches the empty string. Make a first pass
2363 with tre_match_empty() to get the number of tags and
2365 status = tre_match_empty(stack, cat->left,
2366 NULL, NULL, &num_tags);
2367 if (status != REG_OK)
2369 /* Allocate arrays for the tags and parameters. */
2370 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2375 /* Second pass with tre_mach_empty() to get the list of
2376 tags and parameters. */
2377 status = tre_match_empty(stack, cat->left, tags,
2379 if (status != REG_OK)
2385 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2388 if (!node->firstpos)
2393 node->firstpos = cat->left->firstpos;
2396 /* Compute lastpos. */
2397 if (cat->right->nullable)
2399 /* The right side matches the empty string. Make a first pass
2400 with tre_match_empty() to get the number of tags and
2402 status = tre_match_empty(stack, cat->right,
2403 NULL, NULL, &num_tags);
2404 if (status != REG_OK)
2406 /* Allocate arrays for the tags and parameters. */
2407 tags = xmalloc(sizeof(int) * (num_tags + 1));
2412 /* Second pass with tre_mach_empty() to get the list of
2413 tags and parameters. */
2414 status = tre_match_empty(stack, cat->right, tags,
2416 if (status != REG_OK)
2422 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2430 node->lastpos = cat->right->lastpos;
2445 /* Adds a transition from each position in `p1' to each position in `p2'. */
2446 static reg_errcode_t
2447 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2448 tre_tnfa_transition_t *transitions,
2449 int *counts, int *offs)
2451 tre_pos_and_tags_t *orig_p2 = p2;
2452 tre_tnfa_transition_t *trans;
2453 int i, j, k, l, dup, prev_p2_pos;
2455 if (transitions != NULL)
2456 while (p1->position >= 0)
2460 while (p2->position >= 0)
2462 /* Optimization: if this position was already handled, skip it. */
2463 if (p2->position == prev_p2_pos)
2468 prev_p2_pos = p2->position;
2469 /* Set `trans' to point to the next unused transition from
2470 position `p1->position'. */
2471 trans = transitions + offs[p1->position];
2472 while (trans->state != NULL)
2475 /* If we find a previous transition from `p1->position' to
2476 `p2->position', it is overwritten. This can happen only
2477 if there are nested loops in the regexp, like in "((a)*)*".
2478 In POSIX.2 repetition using the outer loop is always
2479 preferred over using the inner loop. Therefore the
2480 transition for the inner loop is useless and can be thrown
2482 /* XXX - The same position is used for all nodes in a bracket
2483 expression, so this optimization cannot be used (it will
2484 break bracket expressions) unless I figure out a way to
2486 if (trans->state_id == p2->position)
2494 if (trans->state == NULL)
2495 (trans + 1)->state = NULL;
2496 /* Use the character ranges, assertions, etc. from `p1' for
2497 the transition from `p1' to `p2'. */
2498 trans->code_min = p1->code_min;
2499 trans->code_max = p1->code_max;
2500 trans->state = transitions + offs[p2->position];
2501 trans->state_id = p2->position;
2502 trans->assertions = p1->assertions | p2->assertions
2503 | (p1->class ? ASSERT_CHAR_CLASS : 0)
2504 | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
2505 if (p1->backref >= 0)
2507 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
2508 assert(p2->backref < 0);
2509 trans->u.backref = p1->backref;
2510 trans->assertions |= ASSERT_BACKREF;
2513 trans->u.class = p1->class;
2514 if (p1->neg_classes != NULL)
2516 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
2517 trans->neg_classes =
2518 xmalloc(sizeof(*trans->neg_classes) * (i + 1));
2519 if (trans->neg_classes == NULL)
2521 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2522 trans->neg_classes[i] = p1->neg_classes[i];
2523 trans->neg_classes[i] = (tre_ctype_t)0;
2526 trans->neg_classes = NULL;
2528 /* Find out how many tags this transition has. */
2530 if (p1->tags != NULL)
2531 while(p1->tags[i] >= 0)
2534 if (p2->tags != NULL)
2535 while(p2->tags[j] >= 0)
2538 /* If we are overwriting a transition, free the old tag array. */
2539 if (trans->tags != NULL)
2543 /* If there were any tags, allocate an array and fill it. */
2546 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2550 if (p1->tags != NULL)
2551 while(p1->tags[i] >= 0)
2553 trans->tags[i] = p1->tags[i];
2558 if (p2->tags != NULL)
2559 while (p2->tags[j] >= 0)
2561 /* Don't add duplicates. */
2563 for (k = 0; k < i; k++)
2564 if (trans->tags[k] == p2->tags[j])
2570 trans->tags[l++] = p2->tags[j];
2573 trans->tags[l] = -1;
2581 /* Compute a maximum limit for the number of transitions leaving
2583 while (p1->position >= 0)
2586 while (p2->position >= 0)
2588 counts[p1->position]++;
2596 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2597 labelled with one character range (there are no transitions on empty
2598 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2600 static reg_errcode_t
2601 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2602 int *counts, int *offs)
2605 tre_catenation_t *cat;
2606 tre_iteration_t *iter;
2607 reg_errcode_t errcode = REG_OK;
2609 /* XXX - recurse using a stack!. */
2615 uni = (tre_union_t *)node->obj;
2616 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2617 if (errcode != REG_OK)
2619 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2623 cat = (tre_catenation_t *)node->obj;
2624 /* Add a transition from each position in cat->left->lastpos
2625 to each position in cat->right->firstpos. */
2626 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2627 transitions, counts, offs);
2628 if (errcode != REG_OK)
2630 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2631 if (errcode != REG_OK)
2633 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2637 iter = (tre_iteration_t *)node->obj;
2638 assert(iter->max == -1 || iter->max == 1);
2640 if (iter->max == -1)
2642 assert(iter->min == 0 || iter->min == 1);
2643 /* Add a transition from each last position in the iterated
2644 expression to each first position. */
2645 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2646 transitions, counts, offs);
2647 if (errcode != REG_OK)
2650 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2657 #define ERROR_EXIT(err) \
2661 if (/*CONSTCOND*/1) \
2664 while (/*CONSTCOND*/0)
2668 regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
2671 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2672 tre_pos_and_tags_t *p;
2673 int *counts = NULL, *offs = NULL;
2675 tre_tnfa_transition_t *transitions, *initial;
2676 tre_tnfa_t *tnfa = NULL;
2677 tre_submatch_data_t *submatch_data;
2678 tre_tag_direction_t *tag_directions = NULL;
2679 reg_errcode_t errcode;
2682 /* Parse context. */
2683 tre_parse_ctx_t parse_ctx;
2685 /* Allocate a stack used throughout the compilation process for various
2687 stack = tre_stack_new(512, 1024000, 128);
2690 /* Allocate a fast memory allocator. */
2691 mem = tre_mem_new();
2694 tre_stack_destroy(stack);
2698 /* Parse the regexp. */
2699 memset(&parse_ctx, 0, sizeof(parse_ctx));
2700 parse_ctx.mem = mem;
2701 parse_ctx.stack = stack;
2702 parse_ctx.re = regex;
2703 parse_ctx.cflags = cflags;
2704 parse_ctx.max_backref = -1;
2705 errcode = tre_parse(&parse_ctx);
2706 if (errcode != REG_OK)
2707 ERROR_EXIT(errcode);
2708 preg->re_nsub = parse_ctx.submatch_id - 1;
2712 tre_ast_print(tree);
2713 #endif /* TRE_DEBUG */
2715 /* Referring to nonexistent subexpressions is illegal. */
2716 if (parse_ctx.max_backref > (int)preg->re_nsub)
2717 ERROR_EXIT(REG_ESUBREG);
2719 /* Allocate the TNFA struct. */
2720 tnfa = xcalloc(1, sizeof(tre_tnfa_t));
2722 ERROR_EXIT(REG_ESPACE);
2723 tnfa->have_backrefs = parse_ctx.max_backref >= 0;
2724 tnfa->have_approx = 0;
2725 tnfa->num_submatches = parse_ctx.submatch_id;
2727 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
2728 regexp does not have back references, this can be skipped. */
2729 if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
2732 /* Figure out how many tags we will need. */
2733 errcode = tre_add_tags(NULL, stack, tree, tnfa);
2734 if (errcode != REG_OK)
2735 ERROR_EXIT(errcode);
2737 if (tnfa->num_tags > 0)
2739 tag_directions = xmalloc(sizeof(*tag_directions)
2740 * (tnfa->num_tags + 1));
2741 if (tag_directions == NULL)
2742 ERROR_EXIT(REG_ESPACE);
2743 tnfa->tag_directions = tag_directions;
2744 memset(tag_directions, -1,
2745 sizeof(*tag_directions) * (tnfa->num_tags + 1));
2747 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
2748 sizeof(*tnfa->minimal_tags));
2749 if (tnfa->minimal_tags == NULL)
2750 ERROR_EXIT(REG_ESPACE);
2752 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
2753 sizeof(*submatch_data));
2754 if (submatch_data == NULL)
2755 ERROR_EXIT(REG_ESPACE);
2756 tnfa->submatch_data = submatch_data;
2758 errcode = tre_add_tags(mem, stack, tree, tnfa);
2759 if (errcode != REG_OK)
2760 ERROR_EXIT(errcode);
2764 /* Expand iteration nodes. */
2765 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
2767 if (errcode != REG_OK)
2768 ERROR_EXIT(errcode);
2770 /* Add a dummy node for the final state.
2771 XXX - For certain patterns this dummy node can be optimized away,
2772 for example "a*" or "ab*". Figure out a simple way to detect
2773 this possibility. */
2775 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
2776 if (tmp_ast_r == NULL)
2777 ERROR_EXIT(REG_ESPACE);
2779 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
2781 ERROR_EXIT(REG_ESPACE);
2783 errcode = tre_compute_nfl(mem, stack, tree);
2784 if (errcode != REG_OK)
2785 ERROR_EXIT(errcode);
2787 counts = xmalloc(sizeof(int) * parse_ctx.position);
2789 ERROR_EXIT(REG_ESPACE);
2791 offs = xmalloc(sizeof(int) * parse_ctx.position);
2793 ERROR_EXIT(REG_ESPACE);
2795 for (i = 0; i < parse_ctx.position; i++)
2797 tre_ast_to_tnfa(tree, NULL, counts, NULL);
2800 for (i = 0; i < parse_ctx.position; i++)
2803 add += counts[i] + 1;
2806 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
2807 if (transitions == NULL)
2808 ERROR_EXIT(REG_ESPACE);
2809 tnfa->transitions = transitions;
2810 tnfa->num_transitions = add;
2812 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2813 if (errcode != REG_OK)
2814 ERROR_EXIT(errcode);
2816 tnfa->firstpos_chars = NULL;
2820 while (p->position >= 0)
2826 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2827 if (initial == NULL)
2828 ERROR_EXIT(REG_ESPACE);
2829 tnfa->initial = initial;
2832 for (p = tree->firstpos; p->position >= 0; p++)
2834 initial[i].state = transitions + offs[p->position];
2835 initial[i].state_id = p->position;
2836 initial[i].tags = NULL;
2837 /* Copy the arrays p->tags, and p->params, they are allocated
2838 from a tre_mem object. */
2842 for (j = 0; p->tags[j] >= 0; j++);
2843 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
2844 if (!initial[i].tags)
2845 ERROR_EXIT(REG_ESPACE);
2846 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2848 initial[i].assertions = p->assertions;
2851 initial[i].state = NULL;
2853 tnfa->num_transitions = add;
2854 tnfa->final = transitions + offs[tree->lastpos[0].position];
2855 tnfa->num_states = parse_ctx.position;
2856 tnfa->cflags = cflags;
2858 tre_mem_destroy(mem);
2859 tre_stack_destroy(stack);
2863 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2867 /* Free everything that was allocated and return the error code. */
2868 tre_mem_destroy(mem);
2870 tre_stack_destroy(stack);
2875 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2884 regfree(regex_t *preg)
2888 tre_tnfa_transition_t *trans;
2890 tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2894 for (i = 0; i < tnfa->num_transitions; i++)
2895 if (tnfa->transitions[i].state)
2897 if (tnfa->transitions[i].tags)
2898 xfree(tnfa->transitions[i].tags);
2899 if (tnfa->transitions[i].neg_classes)
2900 xfree(tnfa->transitions[i].neg_classes);
2902 if (tnfa->transitions)
2903 xfree(tnfa->transitions);
2907 for (trans = tnfa->initial; trans->state; trans++)
2912 xfree(tnfa->initial);
2915 if (tnfa->submatch_data)
2917 for (i = 0; i < tnfa->num_submatches; i++)
2918 if (tnfa->submatch_data[i].parents)
2919 xfree(tnfa->submatch_data[i].parents);
2920 xfree(tnfa->submatch_data);
2923 if (tnfa->tag_directions)
2924 xfree(tnfa->tag_directions);
2925 if (tnfa->firstpos_chars)
2926 xfree(tnfa->firstpos_chars);
2927 if (tnfa->minimal_tags)
2928 xfree(tnfa->minimal_tags);