2 regcomp.c - TRE POSIX compatible regex compilation functions.
4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 /***********************************************************************
45 ***********************************************************************/
54 tre_ctype_t *neg_classes;
59 /***********************************************************************
60 from tre-ast.c and tre-ast.h
61 ***********************************************************************/
63 /* The different AST node types. */
71 /* Special subtypes of TRE_LITERAL. */
72 #define EMPTY -1 /* Empty leaf (denotes empty string). */
73 #define ASSERTION -2 /* Assertion leaf. */
74 #define TAG -3 /* Tag leaf. */
75 #define BACKREF -4 /* Back reference leaf. */
77 #define IS_SPECIAL(x) ((x)->code_min < 0)
78 #define IS_EMPTY(x) ((x)->code_min == EMPTY)
79 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
80 #define IS_TAG(x) ((x)->code_min == TAG)
81 #define IS_BACKREF(x) ((x)->code_min == BACKREF)
84 /* A generic AST node. All AST nodes consist of this node on the top
85 level with `obj' pointing to the actual content. */
87 tre_ast_type_t type; /* Type of the node. */
88 void *obj; /* Pointer to actual node. */
93 tre_pos_and_tags_t *firstpos;
94 tre_pos_and_tags_t *lastpos;
98 /* A "literal" node. These are created for assertions, back references,
99 tags, matching parameter settings, and all expressions that match one
106 tre_ctype_t *neg_classes;
109 /* A "catenation" node. These are created when two regexps are concatenated.
110 If there are more than one subexpressions in sequence, the `left' part
111 holds all but the last, and `right' part holds the last subexpression
112 (catenation is left associative). */
114 tre_ast_node_t *left;
115 tre_ast_node_t *right;
118 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
121 /* Subexpression to match. */
123 /* Minimum number of consecutive matches. */
125 /* Maximum number of consecutive matches. */
127 /* If 0, match as many characters as possible, if 1 match as few as
128 possible. Note that this does not always mean the same thing as
129 matching as many/few repetitions as possible. */
130 unsigned int minimal:1;
133 /* An "union" node. These are created for the "|" operator. */
135 tre_ast_node_t *left;
136 tre_ast_node_t *right;
140 static tre_ast_node_t *
141 tre_ast_new_node(tre_mem_t mem, int type, void *obj)
143 tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node);
149 node->submatch_id = -1;
153 static tre_ast_node_t *
154 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
156 tre_ast_node_t *node;
159 lit = tre_mem_calloc(mem, sizeof *lit);
160 node = tre_ast_new_node(mem, LITERAL, lit);
163 lit->code_min = code_min;
164 lit->code_max = code_max;
165 lit->position = position;
169 static tre_ast_node_t *
170 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal)
172 tre_ast_node_t *node;
173 tre_iteration_t *iter;
175 iter = tre_mem_calloc(mem, sizeof *iter);
176 node = tre_ast_new_node(mem, ITERATION, iter);
182 iter->minimal = minimal;
183 node->num_submatches = arg->num_submatches;
187 static tre_ast_node_t *
188 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
190 tre_ast_node_t *node;
195 un = tre_mem_calloc(mem, sizeof *un);
196 node = tre_ast_new_node(mem, UNION, un);
201 node->num_submatches = left->num_submatches + right->num_submatches;
205 static tre_ast_node_t *
206 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
208 tre_ast_node_t *node;
209 tre_catenation_t *cat;
213 cat = tre_mem_calloc(mem, sizeof *cat);
214 node = tre_ast_new_node(mem, CATENATION, cat);
219 node->num_submatches = left->num_submatches + right->num_submatches;
224 /***********************************************************************
225 from tre-stack.c and tre-stack.h
226 ***********************************************************************/
228 typedef struct tre_stack_rec tre_stack_t;
230 /* Creates a new stack object. `size' is initial size in bytes, `max_size'
231 is maximum size, and `increment' specifies how much more space will be
232 allocated with realloc() if all space gets used up. Returns the stack
233 object or NULL if out of memory. */
235 tre_stack_new(int size, int max_size, int increment);
237 /* Frees the stack object. */
239 tre_stack_destroy(tre_stack_t *s);
241 /* Returns the current number of objects in the stack. */
243 tre_stack_num_objects(tre_stack_t *s);
245 /* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
246 `value' on top of stack `s'. Returns REG_ESPACE if out of memory.
247 This tries to realloc() more space before failing if maximum size
248 has not yet been reached. Returns REG_OK if successful. */
249 #define declare_pushf(typetag, type) \
250 static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
252 declare_pushf(voidptr, void *);
253 declare_pushf(int, int);
255 /* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
256 element off of stack `s' and returns it. The stack must not be
258 #define declare_popf(typetag, type) \
259 static type tre_stack_pop_ ## typetag(tre_stack_t *s)
261 declare_popf(voidptr, void *);
262 declare_popf(int, int);
264 /* Just to save some typing. */
265 #define STACK_PUSH(s, typetag, value) \
268 status = tre_stack_push_ ## typetag(s, value); \
270 while (/*CONSTCOND*/0)
272 #define STACK_PUSHX(s, typetag, value) \
274 status = tre_stack_push_ ## typetag(s, value); \
275 if (status != REG_OK) \
279 #define STACK_PUSHR(s, typetag, value) \
281 reg_errcode_t _status; \
282 _status = tre_stack_push_ ## typetag(s, value); \
283 if (_status != REG_OK) \
287 union tre_stack_item {
292 struct tre_stack_rec {
297 union tre_stack_item *stack;
302 tre_stack_new(int size, int max_size, int increment)
306 s = xmalloc(sizeof(*s));
309 s->stack = xmalloc(sizeof(*s->stack) * size);
310 if (s->stack == NULL)
316 s->max_size = max_size;
317 s->increment = increment;
324 tre_stack_destroy(tre_stack_t *s)
331 tre_stack_num_objects(tre_stack_t *s)
337 tre_stack_push(tre_stack_t *s, union tre_stack_item value)
339 if (s->ptr < s->size)
341 s->stack[s->ptr] = value;
346 if (s->size >= s->max_size)
352 union tre_stack_item *new_buffer;
354 new_size = s->size + s->increment;
355 if (new_size > s->max_size)
356 new_size = s->max_size;
357 new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
358 if (new_buffer == NULL)
362 assert(new_size > s->size);
364 s->stack = new_buffer;
365 tre_stack_push(s, value);
371 #define define_pushf(typetag, type) \
372 declare_pushf(typetag, type) { \
373 union tre_stack_item item; \
374 item.typetag ## _value = value; \
375 return tre_stack_push(s, item); \
378 define_pushf(int, int)
379 define_pushf(voidptr, void *)
381 #define define_popf(typetag, type) \
382 declare_popf(typetag, type) { \
383 return s->stack[--s->ptr].typetag ## _value; \
386 define_popf(int, int)
387 define_popf(voidptr, void *)
390 /***********************************************************************
391 from tre-parse.c and tre-parse.h
392 ***********************************************************************/
396 /* Memory allocator. The AST is allocated using this. */
398 /* Stack used for keeping track of regexp syntax. */
400 /* The parsed node after a parse function returns. */
402 /* Position in the regexp pattern after a parse function returns. */
404 /* The first character of the regexp. */
406 /* Current submatch ID. */
408 /* Current position (number of literal). */
410 /* The highest back reference or -1 if none seen so far. */
412 /* Compilation flags. */
416 /* Some macros for expanding \w, \s, etc. */
417 static const struct {
419 const char *expansion;
421 {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
422 {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
423 {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
424 {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
428 /* Expands a macro delimited by `regex' and `regex_end' to `buf', which
429 must have at least `len' items. Sets buf[0] to zero if the there
430 is no match in `tre_macros'. */
431 static const char *tre_expand_macro(const char *s)
434 for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);
435 return tre_macros[i].expansion;
439 tre_compare_lit(const void *a, const void *b)
441 const tre_literal_t *const *la = a;
442 const tre_literal_t *const *lb = b;
443 /* assumes the range of valid code_min is < INT_MAX */
444 return la[0]->code_min - lb[0]->code_min;
454 static tre_literal_t *tre_new_lit(struct literals *p)
457 if (p->len >= p->cap) {
461 a = xrealloc(p->a, p->cap * sizeof *p->a);
467 *a = tre_mem_calloc(p->mem, sizeof **a);
471 static int add_icase_literals(struct literals *ls, int min, int max)
475 for (c=min; c<=max; ) {
476 /* assumes islower(c) and isupper(c) are exclusive
477 and toupper(c)!=c if islower(c).
478 multiple opposite case characters are not supported */
479 if (tre_islower(c)) {
480 b = e = tre_toupper(c);
481 for (c++, e++; c<=max; c++, e++)
482 if (tre_toupper(c) != e) break;
483 } else if (tre_isupper(c)) {
484 b = e = tre_tolower(c);
485 for (c++, e++; c<=max; c++, e++)
486 if (tre_tolower(c) != e) break;
491 lit = tre_new_lit(ls);
502 /* Maximum number of character classes in a negated bracket expression. */
503 #define MAX_NEG_CLASSES 64
508 tre_ctype_t a[MAX_NEG_CLASSES];
511 // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
515 Bracket = '[' List ']' | '[^' List ']'
516 List = Term | List Term
517 Term = Char | Range | Chclass | Eqclass
518 Range = Char '-' Char | Char '-' '-'
519 Char = Coll | coll_single
521 Coll = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]'
522 Eqclass = '[=' coll_single '=]' | '[=' coll_multi '=]'
523 Chclass = '[:' class ':]'
525 coll_single is a single char collating element but it can be
526 '-' only at the beginning or end of a List and
527 ']' only at the beginning of a List and
528 '^' anywhere except after the openning '['
531 static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, struct literals *ls, struct neg *neg)
533 const char *start = s;
541 len = mbtowc(&wc, s, -1);
543 return *s ? REG_BADPAT : REG_EBRACK;
544 if (*s == ']' && s != start) {
548 if (*s == '-' && s != start && s[1] != ']' &&
549 /* extension: [a-z--@] is accepted as [a-z]|[--@] */
550 (s[1] != '-' || s[2] == ']'))
552 if (*s == '[' && (s[1] == '.' || s[1] == '='))
553 /* collating symbols and equivalence classes are not supported */
555 if (*s == '[' && s[1] == ':') {
556 char tmp[CHARCLASS_NAME_MAX+1];
558 for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) {
562 class = tre_ctype(tmp);
566 if (!class || s[len+1] != ']')
574 if (*s == '-' && s[1] != ']') {
576 len = mbtowc(&wc, s, -1);
578 /* XXX - Should use collation order instead of
579 encoding values in character ranges. */
580 if (len <= 0 || min > max)
586 if (class && neg->negate) {
587 if (neg->len >= MAX_NEG_CLASSES)
589 neg->a[neg->len++] = class;
591 tre_literal_t *lit = tre_new_lit(ls);
599 /* Add opposite-case codepoints if REG_ICASE is present.
600 It seems that POSIX requires that bracket negation
601 should happen before case-folding, but most practical
602 implementations do it the other way around. Changing
603 the order would need efficient representation of
604 case-fold ranges and bracket range sets even with
605 simple patterns so this is ok for now. */
606 if (ctx->cflags & REG_ICASE && !class)
607 if (add_icase_literals(ls, min, max))
613 static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s)
615 int i, max, min, negmax, negmin;
616 tre_ast_node_t *node = 0, *n;
626 ls.a = xmalloc(ls.cap * sizeof *ls.a);
630 neg.negate = *s == '^';
634 err = parse_bracket_terms(ctx, s, &ls, &neg);
636 goto parse_bracket_done;
639 /* Sort the array if we need to negate it. */
640 qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
641 /* extra lit for the last negated range */
642 lit = tre_new_lit(&ls);
645 goto parse_bracket_done;
647 lit->code_min = TRE_CHAR_MAX+1;
648 lit->code_max = TRE_CHAR_MAX+1;
650 /* negated classes */
652 nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a);
655 goto parse_bracket_done;
657 memcpy(nc, neg.a, neg.len*sizeof *neg.a);
662 /* Build a union of the items in the array, negated if necessary. */
664 for (i = 0; i < ls.len; i++) {
671 negmin = MAX(max + 1, negmin);
675 lit->code_min = negmin;
676 lit->code_max = negmax;
679 lit->position = ctx->position;
680 lit->neg_classes = nc;
681 n = tre_ast_new_node(ctx->mem, LITERAL, lit);
682 node = tre_ast_new_union(ctx->mem, node, n);
696 static const char *parse_dup_count(const char *s, int *n)
703 *n = 10 * *n + (*s - '0');
705 if (!isdigit(*s) || *n > RE_DUP_MAX)
711 static reg_errcode_t parse_dup(tre_parse_ctx_t *ctx, const char *s)
715 s = parse_dup_count(s, &min);
717 s = parse_dup_count(s+1, &max);
722 (max < min && max >= 0) ||
726 (!(ctx->cflags & REG_EXTENDED) && *s++ != '\\') ||
731 if (min == 0 && max == 0)
732 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
734 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
741 static int hexval(unsigned c)
743 if (c-'0'<10) return c-'0';
745 if (c-'a'<6) return c-'a'+10;
749 static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int subid)
751 if (node->submatch_id >= 0) {
752 tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
755 n = tre_ast_new_catenation(ctx->mem, n, node);
758 n->num_submatches = node->num_submatches;
761 node->submatch_id = subid;
762 node->num_submatches++;
769 Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^' Branch '$'
770 Branch = Atom | Branch Atom
771 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch '\)' | back_ref
772 Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count '\}'
774 (leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
777 Regex = Branch | Regex '|' Branch
778 Branch = Atom | Branch Atom
779 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex ')' | '^' | '$'
780 Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count ',' Count '}'
782 (a*+?, ^*, $+, \X, {, (|a) are unspecified)
785 static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
787 int len, ere = ctx->cflags & REG_EXTENDED;
789 tre_ast_node_t *node;
793 return parse_bracket(ctx, s+1);
795 p = tre_expand_macro(s+1);
797 /* assume \X expansion is a single atom */
798 reg_errcode_t err = parse_atom(ctx, p);
802 /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
807 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
810 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
813 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
816 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
826 for (i=0; i<len && v<0x110000; i++) {
837 node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
841 /* reject repetitions after empty expression in BRE */
845 if (!ere && (unsigned)*s-'1' < 9) {
848 node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++);
849 ctx->max_backref = MAX(val, ctx->max_backref);
851 /* extension: accept unknown escaped char
859 if (ctx->cflags & REG_NEWLINE) {
860 tre_ast_node_t *tmp1, *tmp2;
861 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++);
862 tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX, ctx->position++);
864 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
868 node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++);
873 /* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
874 if (!ere && s != ctx->re)
876 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
880 /* '$' is special everywhere in EREs, and in the end of the string in BREs. */
883 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
891 /* reject repetitions after empty expression in ERE */
898 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
902 len = mbtowc(&wc, s, -1);
905 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) {
906 tre_ast_node_t *tmp1, *tmp2;
907 /* multiple opposite case characters are not supported */
908 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc), ctx->position);
909 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc), ctx->position);
911 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
915 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
928 #define PUSHPTR(err, s, v) do { \
929 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \
933 #define PUSHINT(err, s, v) do { \
934 if ((err = tre_stack_push_int(s, v)) != REG_OK) \
938 static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
940 tre_ast_node_t *nbranch=0, *nunion=0;
941 int ere = ctx->cflags & REG_EXTENDED;
942 const char *s = ctx->re;
946 tre_stack_t *stack = ctx->stack;
948 PUSHINT(err, stack, subid++);
950 if ((!ere && *s == '\\' && s[1] == '(') ||
951 (ere && *s == '(')) {
952 PUSHPTR(err, stack, nunion);
953 PUSHPTR(err, stack, nbranch);
954 PUSHINT(err, stack, subid++);
959 nbranch = nunion = 0;
962 if ((!ere && *s == '\\' && s[1] == ')') ||
963 (ere && *s == ')' && depth)) {
964 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
968 err = parse_atom(ctx, s);
975 /* extension: repetitions are rejected after an empty node
976 eg. (+), |*, {2}, but assertions are not treated as empty
977 so ^* or $? are accepted currently. */
991 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
994 /* extension: multiple consecutive *+?{,} is unspecified,
995 but (a+)+ has to be supported so accepting a++ makes
996 sense, note however that the RE_DUP_MAX limit can be
997 circumvented: (a{255}){255} uses a lot of memory.. */
1000 if (ere || s[1] != '{')
1008 err = parse_dup(ctx, s+1);
1015 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
1016 if ((ere && *s == '|') ||
1017 (ere && *s == ')' && depth) ||
1018 (!ere && *s == '\\' && s[1] == ')') ||
1020 /* extension: empty branch is unspecified (), (|a), (a|)
1021 here they are not rejected but match on empty string */
1023 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
1027 if (!depth) return REG_EPAREN;
1029 } else if (c == ')')
1032 err = marksub(ctx, nunion, tre_stack_pop_int(stack));
1035 if (!c && depth<0) {
1036 ctx->submatch_id = subid;
1041 nbranch = tre_stack_pop_voidptr(stack);
1042 nunion = tre_stack_pop_voidptr(stack);
1051 /***********************************************************************
1053 ***********************************************************************/
1058 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1063 Algorithms to setup tags so that submatch addressing can be done.
1067 /* Inserts a catenation node to the root of the tree given in `node'.
1068 As the left child a new tag with number `tag_id' to `node' is added,
1069 and the right child is the old root. */
1070 static reg_errcode_t
1071 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1073 tre_catenation_t *c;
1075 c = tre_mem_alloc(mem, sizeof(*c));
1078 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
1079 if (c->left == NULL)
1081 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1082 if (c->right == NULL)
1085 c->right->obj = node->obj;
1086 c->right->type = node->type;
1087 c->right->nullable = -1;
1088 c->right->submatch_id = -1;
1089 c->right->firstpos = NULL;
1090 c->right->lastpos = NULL;
1091 c->right->num_tags = 0;
1093 node->type = CATENATION;
1097 /* Inserts a catenation node to the root of the tree given in `node'.
1098 As the right child a new tag with number `tag_id' to `node' is added,
1099 and the left child is the old root. */
1100 static reg_errcode_t
1101 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1103 tre_catenation_t *c;
1105 c = tre_mem_alloc(mem, sizeof(*c));
1108 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
1109 if (c->right == NULL)
1111 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1112 if (c->left == NULL)
1115 c->left->obj = node->obj;
1116 c->left->type = node->type;
1117 c->left->nullable = -1;
1118 c->left->submatch_id = -1;
1119 c->left->firstpos = NULL;
1120 c->left->lastpos = NULL;
1121 c->left->num_tags = 0;
1123 node->type = CATENATION;
1129 ADDTAGS_AFTER_ITERATION,
1130 ADDTAGS_AFTER_UNION_LEFT,
1131 ADDTAGS_AFTER_UNION_RIGHT,
1132 ADDTAGS_AFTER_CAT_LEFT,
1133 ADDTAGS_AFTER_CAT_RIGHT,
1134 ADDTAGS_SET_SUBMATCH_END
1135 } tre_addtags_symbol_t;
1144 /* Go through `regset' and set submatch data for submatches that are
1147 tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
1151 for (i = 0; regset[i] >= 0; i++)
1153 int id = regset[i] / 2;
1154 int start = !(regset[i] % 2);
1156 tnfa->submatch_data[id].so_tag = tag;
1158 tnfa->submatch_data[id].eo_tag = tag;
1164 /* Adds tags to appropriate locations in the parse tree in `tree', so that
1165 subexpressions marked for submatch addressing can be traced. */
1166 static reg_errcode_t
1167 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
1170 reg_errcode_t status = REG_OK;
1171 tre_addtags_symbol_t symbol;
1172 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
1173 int bottom = tre_stack_num_objects(stack);
1174 /* True for first pass (counting number of needed tags) */
1175 int first_pass = (mem == NULL || tnfa == NULL);
1176 int *regset, *orig_regset;
1177 int num_tags = 0; /* Total number of tags. */
1178 int num_minimals = 0; /* Number of special minimal tags. */
1179 int tag = 0; /* The tag that is to be added next. */
1180 int next_tag = 1; /* Next tag to use after this one. */
1181 int *parents; /* Stack of submatches the current submatch is
1183 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
1184 tre_tag_states_t *saved_states;
1186 tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
1190 tnfa->minimal_tags[0] = -1;
1193 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
1197 orig_regset = regset;
1199 parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
1200 if (parents == NULL)
1207 saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
1208 if (saved_states == NULL)
1217 for (i = 0; i <= tnfa->num_submatches; i++)
1218 saved_states[i].tag = -1;
1221 STACK_PUSH(stack, voidptr, node);
1222 STACK_PUSH(stack, int, ADDTAGS_RECURSE);
1224 while (tre_stack_num_objects(stack) > bottom)
1226 if (status != REG_OK)
1229 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
1233 case ADDTAGS_SET_SUBMATCH_END:
1235 int id = tre_stack_pop_int(stack);
1238 /* Add end of this submatch to regset. */
1239 for (i = 0; regset[i] >= 0; i++);
1240 regset[i] = id * 2 + 1;
1243 /* Pop this submatch from the parents stack. */
1244 for (i = 0; parents[i] >= 0; i++);
1245 parents[i - 1] = -1;
1249 case ADDTAGS_RECURSE:
1250 node = tre_stack_pop_voidptr(stack);
1252 if (node->submatch_id >= 0)
1254 int id = node->submatch_id;
1258 /* Add start of this submatch to regset. */
1259 for (i = 0; regset[i] >= 0; i++);
1265 for (i = 0; parents[i] >= 0; i++);
1266 tnfa->submatch_data[id].parents = NULL;
1269 int *p = xmalloc(sizeof(*p) * (i + 1));
1272 status = REG_ESPACE;
1275 assert(tnfa->submatch_data[id].parents == NULL);
1276 tnfa->submatch_data[id].parents = p;
1277 for (i = 0; parents[i] >= 0; i++)
1283 /* Add end of this submatch to regset after processing this
1285 STACK_PUSHX(stack, int, node->submatch_id);
1286 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
1293 tre_literal_t *lit = node->obj;
1295 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1300 /* Regset is not empty, so add a tag before the
1301 literal or backref. */
1304 status = tre_add_tag_left(mem, node, tag);
1305 tnfa->tag_directions[tag] = direction;
1306 if (minimal_tag >= 0)
1308 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1309 tnfa->minimal_tags[i] = tag;
1310 tnfa->minimal_tags[i + 1] = minimal_tag;
1311 tnfa->minimal_tags[i + 2] = -1;
1315 tre_purge_regset(regset, tnfa, tag);
1330 assert(!IS_TAG(lit));
1336 tre_catenation_t *cat = node->obj;
1337 tre_ast_node_t *left = cat->left;
1338 tre_ast_node_t *right = cat->right;
1339 int reserved_tag = -1;
1342 /* After processing right child. */
1343 STACK_PUSHX(stack, voidptr, node);
1344 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
1346 /* Process right child. */
1347 STACK_PUSHX(stack, voidptr, right);
1348 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1350 /* After processing left child. */
1351 STACK_PUSHX(stack, int, next_tag + left->num_tags);
1352 if (left->num_tags > 0 && right->num_tags > 0)
1354 /* Reserve the next tag to the right child. */
1355 reserved_tag = next_tag;
1358 STACK_PUSHX(stack, int, reserved_tag);
1359 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
1361 /* Process left child. */
1362 STACK_PUSHX(stack, voidptr, left);
1363 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1369 tre_iteration_t *iter = node->obj;
1373 STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
1377 STACK_PUSHX(stack, int, tag);
1378 STACK_PUSHX(stack, int, iter->minimal);
1380 STACK_PUSHX(stack, voidptr, node);
1381 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
1383 STACK_PUSHX(stack, voidptr, iter->arg);
1384 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1386 /* Regset is not empty, so add a tag here. */
1387 if (regset[0] >= 0 || iter->minimal)
1392 status = tre_add_tag_left(mem, node, tag);
1394 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
1396 tnfa->tag_directions[tag] = direction;
1397 if (minimal_tag >= 0)
1399 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1400 tnfa->minimal_tags[i] = tag;
1401 tnfa->minimal_tags[i + 1] = minimal_tag;
1402 tnfa->minimal_tags[i + 2] = -1;
1406 tre_purge_regset(regset, tnfa, tag);
1414 direction = TRE_TAG_MINIMIZE;
1419 tre_union_t *uni = node->obj;
1420 tre_ast_node_t *left = uni->left;
1421 tre_ast_node_t *right = uni->right;
1427 left_tag = next_tag;
1428 right_tag = next_tag + 1;
1433 right_tag = next_tag;
1436 /* After processing right child. */
1437 STACK_PUSHX(stack, int, right_tag);
1438 STACK_PUSHX(stack, int, left_tag);
1439 STACK_PUSHX(stack, voidptr, regset);
1440 STACK_PUSHX(stack, int, regset[0] >= 0);
1441 STACK_PUSHX(stack, voidptr, node);
1442 STACK_PUSHX(stack, voidptr, right);
1443 STACK_PUSHX(stack, voidptr, left);
1444 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
1446 /* Process right child. */
1447 STACK_PUSHX(stack, voidptr, right);
1448 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1450 /* After processing left child. */
1451 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
1453 /* Process left child. */
1454 STACK_PUSHX(stack, voidptr, left);
1455 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1457 /* Regset is not empty, so add a tag here. */
1463 status = tre_add_tag_left(mem, node, tag);
1464 tnfa->tag_directions[tag] = direction;
1465 if (minimal_tag >= 0)
1467 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1468 tnfa->minimal_tags[i] = tag;
1469 tnfa->minimal_tags[i + 1] = minimal_tag;
1470 tnfa->minimal_tags[i + 2] = -1;
1474 tre_purge_regset(regset, tnfa, tag);
1483 if (node->num_submatches > 0)
1485 /* The next two tags are reserved for markers. */
1495 if (node->submatch_id >= 0)
1498 /* Push this submatch on the parents stack. */
1499 for (i = 0; parents[i] >= 0; i++);
1500 parents[i] = node->submatch_id;
1501 parents[i + 1] = -1;
1504 break; /* end case: ADDTAGS_RECURSE */
1506 case ADDTAGS_AFTER_ITERATION:
1510 node = tre_stack_pop_voidptr(stack);
1513 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
1514 + tre_stack_pop_int(stack);
1519 minimal = tre_stack_pop_int(stack);
1520 enter_tag = tre_stack_pop_int(stack);
1522 minimal_tag = enter_tag;
1528 direction = TRE_TAG_MINIMIZE;
1530 direction = TRE_TAG_MAXIMIZE;
1535 case ADDTAGS_AFTER_CAT_LEFT:
1537 int new_tag = tre_stack_pop_int(stack);
1538 next_tag = tre_stack_pop_int(stack);
1546 case ADDTAGS_AFTER_CAT_RIGHT:
1547 node = tre_stack_pop_voidptr(stack);
1549 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
1550 + ((tre_catenation_t *)node->obj)->right->num_tags;
1553 case ADDTAGS_AFTER_UNION_LEFT:
1554 /* Lift the bottom of the `regset' array so that when processing
1555 the right operand the items currently in the array are
1556 invisible. The original bottom was saved at ADDTAGS_UNION and
1557 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1558 while (*regset >= 0)
1562 case ADDTAGS_AFTER_UNION_RIGHT:
1564 int added_tags, tag_left, tag_right;
1565 tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
1566 tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
1567 node = tre_stack_pop_voidptr(stack);
1568 added_tags = tre_stack_pop_int(stack);
1571 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
1572 + ((tre_union_t *)node->obj)->right->num_tags + added_tags
1573 + ((node->num_submatches > 0) ? 2 : 0);
1575 regset = tre_stack_pop_voidptr(stack);
1576 tag_left = tre_stack_pop_int(stack);
1577 tag_right = tre_stack_pop_int(stack);
1579 /* Add tags after both children, the left child gets a smaller
1580 tag than the right child. This guarantees that we prefer
1581 the left child over the right child. */
1582 /* XXX - This is not always necessary (if the children have
1583 tags which must be seen for every match of that child). */
1584 /* XXX - Check if this is the only place where tre_add_tag_right
1585 is used. If so, use tre_add_tag_left (putting the tag before
1586 the child as opposed after the child) and throw away
1587 tre_add_tag_right. */
1588 if (node->num_submatches > 0)
1592 status = tre_add_tag_right(mem, left, tag_left);
1593 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1594 if (status == REG_OK)
1595 status = tre_add_tag_right(mem, right, tag_right);
1596 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1600 direction = TRE_TAG_MAXIMIZE;
1608 } /* end switch(symbol) */
1609 } /* end while(tre_stack_num_objects(stack) > bottom) */
1612 tre_purge_regset(regset, tnfa, tag);
1614 if (!first_pass && minimal_tag >= 0)
1617 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1618 tnfa->minimal_tags[i] = tag;
1619 tnfa->minimal_tags[i + 1] = minimal_tag;
1620 tnfa->minimal_tags[i + 2] = -1;
1625 assert(tree->num_tags == num_tags);
1626 tnfa->end_tag = num_tags;
1627 tnfa->num_tags = num_tags;
1628 tnfa->num_minimals = num_minimals;
1631 xfree(saved_states);
1638 AST to TNFA compilation routines.
1644 } tre_copyast_symbol_t;
1646 /* Flags for tre_copy_ast(). */
1647 #define COPY_REMOVE_TAGS 1
1648 #define COPY_MAXIMIZE_FIRST_TAG 2
1650 static reg_errcode_t
1651 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1652 int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1653 tre_ast_node_t **copy, int *max_pos)
1655 reg_errcode_t status = REG_OK;
1656 int bottom = tre_stack_num_objects(stack);
1659 tre_ast_node_t **result = copy;
1660 tre_copyast_symbol_t symbol;
1662 STACK_PUSH(stack, voidptr, ast);
1663 STACK_PUSH(stack, int, COPY_RECURSE);
1665 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1667 tre_ast_node_t *node;
1668 if (status != REG_OK)
1671 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1674 case COPY_SET_RESULT_PTR:
1675 result = tre_stack_pop_voidptr(stack);
1678 node = tre_stack_pop_voidptr(stack);
1683 tre_literal_t *lit = node->obj;
1684 int pos = lit->position;
1685 int min = lit->code_min;
1686 int max = lit->code_max;
1687 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1689 /* XXX - e.g. [ab] has only one position but two
1690 nodes, so we are creating holes in the state space
1691 here. Not fatal, just wastes memory. */
1695 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1697 /* Change this tag to empty. */
1701 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1704 /* Maximize the first tag. */
1705 tag_directions[max] = TRE_TAG_MAXIMIZE;
1708 *result = tre_ast_new_literal(mem, min, max, pos);
1709 if (*result == NULL)
1710 status = REG_ESPACE;
1712 tre_literal_t *p = (*result)->obj;
1713 p->class = lit->class;
1714 p->neg_classes = lit->neg_classes;
1723 tre_union_t *uni = node->obj;
1725 *result = tre_ast_new_union(mem, uni->left, uni->right);
1726 if (*result == NULL)
1728 status = REG_ESPACE;
1731 tmp = (*result)->obj;
1732 result = &tmp->left;
1733 STACK_PUSHX(stack, voidptr, uni->right);
1734 STACK_PUSHX(stack, int, COPY_RECURSE);
1735 STACK_PUSHX(stack, voidptr, &tmp->right);
1736 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1737 STACK_PUSHX(stack, voidptr, uni->left);
1738 STACK_PUSHX(stack, int, COPY_RECURSE);
1743 tre_catenation_t *cat = node->obj;
1744 tre_catenation_t *tmp;
1745 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1746 if (*result == NULL)
1748 status = REG_ESPACE;
1751 tmp = (*result)->obj;
1754 result = &tmp->left;
1756 STACK_PUSHX(stack, voidptr, cat->right);
1757 STACK_PUSHX(stack, int, COPY_RECURSE);
1758 STACK_PUSHX(stack, voidptr, &tmp->right);
1759 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1760 STACK_PUSHX(stack, voidptr, cat->left);
1761 STACK_PUSHX(stack, int, COPY_RECURSE);
1766 tre_iteration_t *iter = node->obj;
1767 STACK_PUSHX(stack, voidptr, iter->arg);
1768 STACK_PUSHX(stack, int, COPY_RECURSE);
1769 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1770 iter->max, iter->minimal);
1771 if (*result == NULL)
1773 status = REG_ESPACE;
1776 iter = (*result)->obj;
1777 result = &iter->arg;
1787 *pos_add += num_copied;
1794 } tre_expand_ast_symbol_t;
1796 /* Expands each iteration node that has a finite nonzero minimum or maximum
1797 iteration count to a catenated sequence of copies of the node. */
1798 static reg_errcode_t
1799 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1800 int *position, tre_tag_direction_t *tag_directions)
1802 reg_errcode_t status = REG_OK;
1803 int bottom = tre_stack_num_objects(stack);
1805 int pos_add_total = 0;
1809 STACK_PUSHR(stack, voidptr, ast);
1810 STACK_PUSHR(stack, int, EXPAND_RECURSE);
1811 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1813 tre_ast_node_t *node;
1814 tre_expand_ast_symbol_t symbol;
1816 if (status != REG_OK)
1819 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1820 node = tre_stack_pop_voidptr(stack);
1823 case EXPAND_RECURSE:
1828 tre_literal_t *lit= node->obj;
1829 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1831 lit->position += pos_add;
1832 if (lit->position > max_pos)
1833 max_pos = lit->position;
1839 tre_union_t *uni = node->obj;
1840 STACK_PUSHX(stack, voidptr, uni->right);
1841 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1842 STACK_PUSHX(stack, voidptr, uni->left);
1843 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1848 tre_catenation_t *cat = node->obj;
1849 STACK_PUSHX(stack, voidptr, cat->right);
1850 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1851 STACK_PUSHX(stack, voidptr, cat->left);
1852 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1857 tre_iteration_t *iter = node->obj;
1858 STACK_PUSHX(stack, int, pos_add);
1859 STACK_PUSHX(stack, voidptr, node);
1860 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1861 STACK_PUSHX(stack, voidptr, iter->arg);
1862 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1863 /* If we are going to expand this node at EXPAND_AFTER_ITER
1864 then don't increase the `pos' fields of the nodes now, it
1865 will get done when expanding. */
1866 if (iter->min > 1 || iter->max > 1)
1876 case EXPAND_AFTER_ITER:
1878 tre_iteration_t *iter = node->obj;
1880 pos_add = tre_stack_pop_int(stack);
1881 pos_add_last = pos_add;
1882 if (iter->min > 1 || iter->max > 1)
1884 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1886 int pos_add_save = pos_add;
1888 /* Create a catenated sequence of copies of the node. */
1889 for (j = 0; j < iter->min; j++)
1891 tre_ast_node_t *copy;
1892 /* Remove tags from all but the last copy. */
1893 int flags = ((j + 1 < iter->min)
1895 : COPY_MAXIMIZE_FIRST_TAG);
1896 pos_add_save = pos_add;
1897 status = tre_copy_ast(mem, stack, iter->arg, flags,
1898 &pos_add, tag_directions, ©,
1900 if (status != REG_OK)
1903 seq1 = tre_ast_new_catenation(mem, seq1, copy);
1910 if (iter->max == -1)
1912 /* No upper limit. */
1913 pos_add_save = pos_add;
1914 status = tre_copy_ast(mem, stack, iter->arg, 0,
1915 &pos_add, NULL, &seq2, &max_pos);
1916 if (status != REG_OK)
1918 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1924 for (j = iter->min; j < iter->max; j++)
1926 tre_ast_node_t *tmp, *copy;
1927 pos_add_save = pos_add;
1928 status = tre_copy_ast(mem, stack, iter->arg, 0,
1929 &pos_add, NULL, ©, &max_pos);
1930 if (status != REG_OK)
1933 seq2 = tre_ast_new_catenation(mem, copy, seq2);
1938 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1941 seq2 = tre_ast_new_union(mem, tmp, seq2);
1947 pos_add = pos_add_save;
1950 else if (seq2 != NULL)
1951 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1954 node->obj = seq1->obj;
1955 node->type = seq1->type;
1959 pos_add_total += pos_add - pos_add_last;
1960 if (iter_depth == 0)
1961 pos_add = pos_add_total;
1971 *position += pos_add_total;
1973 /* `max_pos' should never be larger than `*position' if the above
1974 code works, but just an extra safeguard let's make sure
1975 `*position' is set large enough so enough memory will be
1976 allocated for the transition table. */
1977 if (max_pos > *position)
1978 *position = max_pos;
1983 static tre_pos_and_tags_t *
1984 tre_set_empty(tre_mem_t mem)
1986 tre_pos_and_tags_t *new_set;
1988 new_set = tre_mem_calloc(mem, sizeof(*new_set));
1989 if (new_set == NULL)
1992 new_set[0].position = -1;
1993 new_set[0].code_min = -1;
1994 new_set[0].code_max = -1;
1999 static tre_pos_and_tags_t *
2000 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2001 tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
2003 tre_pos_and_tags_t *new_set;
2005 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2006 if (new_set == NULL)
2009 new_set[0].position = position;
2010 new_set[0].code_min = code_min;
2011 new_set[0].code_max = code_max;
2012 new_set[0].class = class;
2013 new_set[0].neg_classes = neg_classes;
2014 new_set[0].backref = backref;
2015 new_set[1].position = -1;
2016 new_set[1].code_min = -1;
2017 new_set[1].code_max = -1;
2022 static tre_pos_and_tags_t *
2023 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2024 int *tags, int assertions)
2027 tre_pos_and_tags_t *new_set;
2031 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2032 for (s1 = 0; set1[s1].position >= 0; s1++);
2033 for (s2 = 0; set2[s2].position >= 0; s2++);
2034 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2038 for (s1 = 0; set1[s1].position >= 0; s1++)
2040 new_set[s1].position = set1[s1].position;
2041 new_set[s1].code_min = set1[s1].code_min;
2042 new_set[s1].code_max = set1[s1].code_max;
2043 new_set[s1].assertions = set1[s1].assertions | assertions;
2044 new_set[s1].class = set1[s1].class;
2045 new_set[s1].neg_classes = set1[s1].neg_classes;
2046 new_set[s1].backref = set1[s1].backref;
2047 if (set1[s1].tags == NULL && tags == NULL)
2048 new_set[s1].tags = NULL;
2051 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2052 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2053 * (i + num_tags + 1)));
2054 if (new_tags == NULL)
2056 for (j = 0; j < i; j++)
2057 new_tags[j] = set1[s1].tags[j];
2058 for (i = 0; i < num_tags; i++)
2059 new_tags[j + i] = tags[i];
2060 new_tags[j + i] = -1;
2061 new_set[s1].tags = new_tags;
2065 for (s2 = 0; set2[s2].position >= 0; s2++)
2067 new_set[s1 + s2].position = set2[s2].position;
2068 new_set[s1 + s2].code_min = set2[s2].code_min;
2069 new_set[s1 + s2].code_max = set2[s2].code_max;
2070 /* XXX - why not | assertions here as well? */
2071 new_set[s1 + s2].assertions = set2[s2].assertions;
2072 new_set[s1 + s2].class = set2[s2].class;
2073 new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
2074 new_set[s1 + s2].backref = set2[s2].backref;
2075 if (set2[s2].tags == NULL)
2076 new_set[s1 + s2].tags = NULL;
2079 for (i = 0; set2[s2].tags[i] >= 0; i++);
2080 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2081 if (new_tags == NULL)
2083 for (j = 0; j < i; j++)
2084 new_tags[j] = set2[s2].tags[j];
2086 new_set[s1 + s2].tags = new_tags;
2089 new_set[s1 + s2].position = -1;
2093 /* Finds the empty path through `node' which is the one that should be
2094 taken according to POSIX.2 rules, and adds the tags on that path to
2095 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2096 set to the number of tags seen on the path. */
2097 static reg_errcode_t
2098 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2099 int *assertions, int *num_tags_seen)
2103 tre_catenation_t *cat;
2104 tre_iteration_t *iter;
2106 int bottom = tre_stack_num_objects(stack);
2107 reg_errcode_t status = REG_OK;
2111 status = tre_stack_push_voidptr(stack, node);
2113 /* Walk through the tree recursively. */
2114 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2116 node = tre_stack_pop_voidptr(stack);
2121 lit = (tre_literal_t *)node->obj;
2122 switch (lit->code_min)
2125 if (lit->code_max >= 0)
2129 /* Add the tag to `tags'. */
2130 for (i = 0; tags[i] >= 0; i++)
2131 if (tags[i] == lit->code_max)
2135 tags[i] = lit->code_max;
2144 assert(lit->code_max >= 1
2145 || lit->code_max <= ASSERT_LAST);
2146 if (assertions != NULL)
2147 *assertions |= lit->code_max;
2158 /* Subexpressions starting earlier take priority over ones
2159 starting later, so we prefer the left subexpression over the
2160 right subexpression. */
2161 uni = (tre_union_t *)node->obj;
2162 if (uni->left->nullable)
2163 STACK_PUSHX(stack, voidptr, uni->left)
2164 else if (uni->right->nullable)
2165 STACK_PUSHX(stack, voidptr, uni->right)
2171 /* The path must go through both children. */
2172 cat = (tre_catenation_t *)node->obj;
2173 assert(cat->left->nullable);
2174 assert(cat->right->nullable);
2175 STACK_PUSHX(stack, voidptr, cat->left);
2176 STACK_PUSHX(stack, voidptr, cat->right);
2180 /* A match with an empty string is preferred over no match at
2181 all, so we go through the argument if possible. */
2182 iter = (tre_iteration_t *)node->obj;
2183 if (iter->arg->nullable)
2184 STACK_PUSHX(stack, voidptr, iter->arg);
2200 NFL_POST_CATENATION,
2202 } tre_nfl_stack_symbol_t;
2205 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2206 the nodes of the AST `tree'. */
2207 static reg_errcode_t
2208 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2210 int bottom = tre_stack_num_objects(stack);
2212 STACK_PUSHR(stack, voidptr, tree);
2213 STACK_PUSHR(stack, int, NFL_RECURSE);
2215 while (tre_stack_num_objects(stack) > bottom)
2217 tre_nfl_stack_symbol_t symbol;
2218 tre_ast_node_t *node;
2220 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2221 node = tre_stack_pop_voidptr(stack);
2229 tre_literal_t *lit = (tre_literal_t *)node->obj;
2230 if (IS_BACKREF(lit))
2232 /* Back references: nullable = false, firstpos = {i},
2235 node->firstpos = tre_set_one(mem, lit->position, 0,
2236 TRE_CHAR_MAX, 0, NULL, -1);
2237 if (!node->firstpos)
2239 node->lastpos = tre_set_one(mem, lit->position, 0,
2240 TRE_CHAR_MAX, 0, NULL,
2241 (int)lit->code_max);
2245 else if (lit->code_min < 0)
2247 /* Tags, empty strings, params, and zero width assertions:
2248 nullable = true, firstpos = {}, and lastpos = {}. */
2250 node->firstpos = tre_set_empty(mem);
2251 if (!node->firstpos)
2253 node->lastpos = tre_set_empty(mem);
2259 /* Literal at position i: nullable = false, firstpos = {i},
2263 tre_set_one(mem, lit->position, (int)lit->code_min,
2264 (int)lit->code_max, 0, NULL, -1);
2265 if (!node->firstpos)
2267 node->lastpos = tre_set_one(mem, lit->position,
2270 lit->class, lit->neg_classes,
2279 /* Compute the attributes for the two subtrees, and after that
2281 STACK_PUSHR(stack, voidptr, node);
2282 STACK_PUSHR(stack, int, NFL_POST_UNION);
2283 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2284 STACK_PUSHR(stack, int, NFL_RECURSE);
2285 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2286 STACK_PUSHR(stack, int, NFL_RECURSE);
2290 /* Compute the attributes for the two subtrees, and after that
2292 STACK_PUSHR(stack, voidptr, node);
2293 STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2294 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2295 STACK_PUSHR(stack, int, NFL_RECURSE);
2296 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2297 STACK_PUSHR(stack, int, NFL_RECURSE);
2301 /* Compute the attributes for the subtree, and after that for
2303 STACK_PUSHR(stack, voidptr, node);
2304 STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2305 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2306 STACK_PUSHR(stack, int, NFL_RECURSE);
2309 break; /* end case: NFL_RECURSE */
2311 case NFL_POST_UNION:
2313 tre_union_t *uni = (tre_union_t *)node->obj;
2314 node->nullable = uni->left->nullable || uni->right->nullable;
2315 node->firstpos = tre_set_union(mem, uni->left->firstpos,
2316 uni->right->firstpos, NULL, 0);
2317 if (!node->firstpos)
2319 node->lastpos = tre_set_union(mem, uni->left->lastpos,
2320 uni->right->lastpos, NULL, 0);
2326 case NFL_POST_ITERATION:
2328 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2330 if (iter->min == 0 || iter->arg->nullable)
2334 node->firstpos = iter->arg->firstpos;
2335 node->lastpos = iter->arg->lastpos;
2339 case NFL_POST_CATENATION:
2341 int num_tags, *tags, assertions;
2342 reg_errcode_t status;
2343 tre_catenation_t *cat = node->obj;
2344 node->nullable = cat->left->nullable && cat->right->nullable;
2346 /* Compute firstpos. */
2347 if (cat->left->nullable)
2349 /* The left side matches the empty string. Make a first pass
2350 with tre_match_empty() to get the number of tags and
2352 status = tre_match_empty(stack, cat->left,
2353 NULL, NULL, &num_tags);
2354 if (status != REG_OK)
2356 /* Allocate arrays for the tags and parameters. */
2357 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2362 /* Second pass with tre_mach_empty() to get the list of
2363 tags and parameters. */
2364 status = tre_match_empty(stack, cat->left, tags,
2366 if (status != REG_OK)
2372 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2375 if (!node->firstpos)
2380 node->firstpos = cat->left->firstpos;
2383 /* Compute lastpos. */
2384 if (cat->right->nullable)
2386 /* The right side matches the empty string. Make a first pass
2387 with tre_match_empty() to get the number of tags and
2389 status = tre_match_empty(stack, cat->right,
2390 NULL, NULL, &num_tags);
2391 if (status != REG_OK)
2393 /* Allocate arrays for the tags and parameters. */
2394 tags = xmalloc(sizeof(int) * (num_tags + 1));
2399 /* Second pass with tre_mach_empty() to get the list of
2400 tags and parameters. */
2401 status = tre_match_empty(stack, cat->right, tags,
2403 if (status != REG_OK)
2409 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2417 node->lastpos = cat->right->lastpos;
2432 /* Adds a transition from each position in `p1' to each position in `p2'. */
2433 static reg_errcode_t
2434 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2435 tre_tnfa_transition_t *transitions,
2436 int *counts, int *offs)
2438 tre_pos_and_tags_t *orig_p2 = p2;
2439 tre_tnfa_transition_t *trans;
2440 int i, j, k, l, dup, prev_p2_pos;
2442 if (transitions != NULL)
2443 while (p1->position >= 0)
2447 while (p2->position >= 0)
2449 /* Optimization: if this position was already handled, skip it. */
2450 if (p2->position == prev_p2_pos)
2455 prev_p2_pos = p2->position;
2456 /* Set `trans' to point to the next unused transition from
2457 position `p1->position'. */
2458 trans = transitions + offs[p1->position];
2459 while (trans->state != NULL)
2462 /* If we find a previous transition from `p1->position' to
2463 `p2->position', it is overwritten. This can happen only
2464 if there are nested loops in the regexp, like in "((a)*)*".
2465 In POSIX.2 repetition using the outer loop is always
2466 preferred over using the inner loop. Therefore the
2467 transition for the inner loop is useless and can be thrown
2469 /* XXX - The same position is used for all nodes in a bracket
2470 expression, so this optimization cannot be used (it will
2471 break bracket expressions) unless I figure out a way to
2473 if (trans->state_id == p2->position)
2481 if (trans->state == NULL)
2482 (trans + 1)->state = NULL;
2483 /* Use the character ranges, assertions, etc. from `p1' for
2484 the transition from `p1' to `p2'. */
2485 trans->code_min = p1->code_min;
2486 trans->code_max = p1->code_max;
2487 trans->state = transitions + offs[p2->position];
2488 trans->state_id = p2->position;
2489 trans->assertions = p1->assertions | p2->assertions
2490 | (p1->class ? ASSERT_CHAR_CLASS : 0)
2491 | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
2492 if (p1->backref >= 0)
2494 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
2495 assert(p2->backref < 0);
2496 trans->u.backref = p1->backref;
2497 trans->assertions |= ASSERT_BACKREF;
2500 trans->u.class = p1->class;
2501 if (p1->neg_classes != NULL)
2503 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
2504 trans->neg_classes =
2505 xmalloc(sizeof(*trans->neg_classes) * (i + 1));
2506 if (trans->neg_classes == NULL)
2508 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2509 trans->neg_classes[i] = p1->neg_classes[i];
2510 trans->neg_classes[i] = (tre_ctype_t)0;
2513 trans->neg_classes = NULL;
2515 /* Find out how many tags this transition has. */
2517 if (p1->tags != NULL)
2518 while(p1->tags[i] >= 0)
2521 if (p2->tags != NULL)
2522 while(p2->tags[j] >= 0)
2525 /* If we are overwriting a transition, free the old tag array. */
2526 if (trans->tags != NULL)
2530 /* If there were any tags, allocate an array and fill it. */
2533 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2537 if (p1->tags != NULL)
2538 while(p1->tags[i] >= 0)
2540 trans->tags[i] = p1->tags[i];
2545 if (p2->tags != NULL)
2546 while (p2->tags[j] >= 0)
2548 /* Don't add duplicates. */
2550 for (k = 0; k < i; k++)
2551 if (trans->tags[k] == p2->tags[j])
2557 trans->tags[l++] = p2->tags[j];
2560 trans->tags[l] = -1;
2568 /* Compute a maximum limit for the number of transitions leaving
2570 while (p1->position >= 0)
2573 while (p2->position >= 0)
2575 counts[p1->position]++;
2583 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2584 labelled with one character range (there are no transitions on empty
2585 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2587 static reg_errcode_t
2588 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2589 int *counts, int *offs)
2592 tre_catenation_t *cat;
2593 tre_iteration_t *iter;
2594 reg_errcode_t errcode = REG_OK;
2596 /* XXX - recurse using a stack!. */
2602 uni = (tre_union_t *)node->obj;
2603 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2604 if (errcode != REG_OK)
2606 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2610 cat = (tre_catenation_t *)node->obj;
2611 /* Add a transition from each position in cat->left->lastpos
2612 to each position in cat->right->firstpos. */
2613 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2614 transitions, counts, offs);
2615 if (errcode != REG_OK)
2617 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2618 if (errcode != REG_OK)
2620 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2624 iter = (tre_iteration_t *)node->obj;
2625 assert(iter->max == -1 || iter->max == 1);
2627 if (iter->max == -1)
2629 assert(iter->min == 0 || iter->min == 1);
2630 /* Add a transition from each last position in the iterated
2631 expression to each first position. */
2632 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2633 transitions, counts, offs);
2634 if (errcode != REG_OK)
2637 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2644 #define ERROR_EXIT(err) \
2648 if (/*CONSTCOND*/1) \
2651 while (/*CONSTCOND*/0)
2655 regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
2658 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2659 tre_pos_and_tags_t *p;
2660 int *counts = NULL, *offs = NULL;
2662 tre_tnfa_transition_t *transitions, *initial;
2663 tre_tnfa_t *tnfa = NULL;
2664 tre_submatch_data_t *submatch_data;
2665 tre_tag_direction_t *tag_directions = NULL;
2666 reg_errcode_t errcode;
2669 /* Parse context. */
2670 tre_parse_ctx_t parse_ctx;
2672 /* Allocate a stack used throughout the compilation process for various
2674 stack = tre_stack_new(512, 10240, 128);
2677 /* Allocate a fast memory allocator. */
2678 mem = tre_mem_new();
2681 tre_stack_destroy(stack);
2685 /* Parse the regexp. */
2686 memset(&parse_ctx, 0, sizeof(parse_ctx));
2687 parse_ctx.mem = mem;
2688 parse_ctx.stack = stack;
2689 parse_ctx.re = regex;
2690 parse_ctx.cflags = cflags;
2691 parse_ctx.max_backref = -1;
2692 errcode = tre_parse(&parse_ctx);
2693 if (errcode != REG_OK)
2694 ERROR_EXIT(errcode);
2695 preg->re_nsub = parse_ctx.submatch_id - 1;
2699 tre_ast_print(tree);
2700 #endif /* TRE_DEBUG */
2702 /* Referring to nonexistent subexpressions is illegal. */
2703 if (parse_ctx.max_backref > (int)preg->re_nsub)
2704 ERROR_EXIT(REG_ESUBREG);
2706 /* Allocate the TNFA struct. */
2707 tnfa = xcalloc(1, sizeof(tre_tnfa_t));
2709 ERROR_EXIT(REG_ESPACE);
2710 tnfa->have_backrefs = parse_ctx.max_backref >= 0;
2711 tnfa->have_approx = 0;
2712 tnfa->num_submatches = parse_ctx.submatch_id;
2714 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
2715 regexp does not have back references, this can be skipped. */
2716 if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
2719 /* Figure out how many tags we will need. */
2720 errcode = tre_add_tags(NULL, stack, tree, tnfa);
2721 if (errcode != REG_OK)
2722 ERROR_EXIT(errcode);
2724 if (tnfa->num_tags > 0)
2726 tag_directions = xmalloc(sizeof(*tag_directions)
2727 * (tnfa->num_tags + 1));
2728 if (tag_directions == NULL)
2729 ERROR_EXIT(REG_ESPACE);
2730 tnfa->tag_directions = tag_directions;
2731 memset(tag_directions, -1,
2732 sizeof(*tag_directions) * (tnfa->num_tags + 1));
2734 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
2735 sizeof(*tnfa->minimal_tags));
2736 if (tnfa->minimal_tags == NULL)
2737 ERROR_EXIT(REG_ESPACE);
2739 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
2740 sizeof(*submatch_data));
2741 if (submatch_data == NULL)
2742 ERROR_EXIT(REG_ESPACE);
2743 tnfa->submatch_data = submatch_data;
2745 errcode = tre_add_tags(mem, stack, tree, tnfa);
2746 if (errcode != REG_OK)
2747 ERROR_EXIT(errcode);
2751 /* Expand iteration nodes. */
2752 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
2754 if (errcode != REG_OK)
2755 ERROR_EXIT(errcode);
2757 /* Add a dummy node for the final state.
2758 XXX - For certain patterns this dummy node can be optimized away,
2759 for example "a*" or "ab*". Figure out a simple way to detect
2760 this possibility. */
2762 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
2763 if (tmp_ast_r == NULL)
2764 ERROR_EXIT(REG_ESPACE);
2766 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
2768 ERROR_EXIT(REG_ESPACE);
2770 errcode = tre_compute_nfl(mem, stack, tree);
2771 if (errcode != REG_OK)
2772 ERROR_EXIT(errcode);
2774 counts = xmalloc(sizeof(int) * parse_ctx.position);
2776 ERROR_EXIT(REG_ESPACE);
2778 offs = xmalloc(sizeof(int) * parse_ctx.position);
2780 ERROR_EXIT(REG_ESPACE);
2782 for (i = 0; i < parse_ctx.position; i++)
2784 tre_ast_to_tnfa(tree, NULL, counts, NULL);
2787 for (i = 0; i < parse_ctx.position; i++)
2790 add += counts[i] + 1;
2793 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
2794 if (transitions == NULL)
2795 ERROR_EXIT(REG_ESPACE);
2796 tnfa->transitions = transitions;
2797 tnfa->num_transitions = add;
2799 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2800 if (errcode != REG_OK)
2801 ERROR_EXIT(errcode);
2803 tnfa->firstpos_chars = NULL;
2807 while (p->position >= 0)
2813 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2814 if (initial == NULL)
2815 ERROR_EXIT(REG_ESPACE);
2816 tnfa->initial = initial;
2819 for (p = tree->firstpos; p->position >= 0; p++)
2821 initial[i].state = transitions + offs[p->position];
2822 initial[i].state_id = p->position;
2823 initial[i].tags = NULL;
2824 /* Copy the arrays p->tags, and p->params, they are allocated
2825 from a tre_mem object. */
2829 for (j = 0; p->tags[j] >= 0; j++);
2830 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
2831 if (!initial[i].tags)
2832 ERROR_EXIT(REG_ESPACE);
2833 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2835 initial[i].assertions = p->assertions;
2838 initial[i].state = NULL;
2840 tnfa->num_transitions = add;
2841 tnfa->final = transitions + offs[tree->lastpos[0].position];
2842 tnfa->num_states = parse_ctx.position;
2843 tnfa->cflags = cflags;
2845 tre_mem_destroy(mem);
2846 tre_stack_destroy(stack);
2850 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2854 /* Free everything that was allocated and return the error code. */
2855 tre_mem_destroy(mem);
2857 tre_stack_destroy(stack);
2862 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2871 regfree(regex_t *preg)
2875 tre_tnfa_transition_t *trans;
2877 tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2881 for (i = 0; i < tnfa->num_transitions; i++)
2882 if (tnfa->transitions[i].state)
2884 if (tnfa->transitions[i].tags)
2885 xfree(tnfa->transitions[i].tags);
2886 if (tnfa->transitions[i].neg_classes)
2887 xfree(tnfa->transitions[i].neg_classes);
2889 if (tnfa->transitions)
2890 xfree(tnfa->transitions);
2894 for (trans = tnfa->initial; trans->state; trans++)
2899 xfree(tnfa->initial);
2902 if (tnfa->submatch_data)
2904 for (i = 0; i < tnfa->num_submatches; i++)
2905 if (tnfa->submatch_data[i].parents)
2906 xfree(tnfa->submatch_data[i].parents);
2907 xfree(tnfa->submatch_data);
2910 if (tnfa->tag_directions)
2911 xfree(tnfa->tag_directions);
2912 if (tnfa->firstpos_chars)
2913 xfree(tnfa->firstpos_chars);
2914 if (tnfa->minimal_tags)
2915 xfree(tnfa->minimal_tags);