2 regcomp.c - TRE POSIX compatible regex compilation functions.
4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 /***********************************************************************
45 ***********************************************************************/
54 tre_ctype_t *neg_classes;
59 /***********************************************************************
60 from tre-ast.c and tre-ast.h
61 ***********************************************************************/
63 /* The different AST node types. */
71 /* Special subtypes of TRE_LITERAL. */
72 #define EMPTY -1 /* Empty leaf (denotes empty string). */
73 #define ASSERTION -2 /* Assertion leaf. */
74 #define TAG -3 /* Tag leaf. */
75 #define BACKREF -4 /* Back reference leaf. */
77 #define IS_SPECIAL(x) ((x)->code_min < 0)
78 #define IS_EMPTY(x) ((x)->code_min == EMPTY)
79 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
80 #define IS_TAG(x) ((x)->code_min == TAG)
81 #define IS_BACKREF(x) ((x)->code_min == BACKREF)
84 /* A generic AST node. All AST nodes consist of this node on the top
85 level with `obj' pointing to the actual content. */
87 tre_ast_type_t type; /* Type of the node. */
88 void *obj; /* Pointer to actual node. */
93 tre_pos_and_tags_t *firstpos;
94 tre_pos_and_tags_t *lastpos;
98 /* A "literal" node. These are created for assertions, back references,
99 tags, matching parameter settings, and all expressions that match one
106 tre_ctype_t *neg_classes;
109 /* A "catenation" node. These are created when two regexps are concatenated.
110 If there are more than one subexpressions in sequence, the `left' part
111 holds all but the last, and `right' part holds the last subexpression
112 (catenation is left associative). */
114 tre_ast_node_t *left;
115 tre_ast_node_t *right;
118 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
121 /* Subexpression to match. */
123 /* Minimum number of consecutive matches. */
125 /* Maximum number of consecutive matches. */
127 /* If 0, match as many characters as possible, if 1 match as few as
128 possible. Note that this does not always mean the same thing as
129 matching as many/few repetitions as possible. */
130 unsigned int minimal:1;
133 /* An "union" node. These are created for the "|" operator. */
135 tre_ast_node_t *left;
136 tre_ast_node_t *right;
140 static tre_ast_node_t *
141 tre_ast_new_node(tre_mem_t mem, int type, void *obj)
143 tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node);
149 node->submatch_id = -1;
153 static tre_ast_node_t *
154 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
156 tre_ast_node_t *node;
159 lit = tre_mem_calloc(mem, sizeof *lit);
160 node = tre_ast_new_node(mem, LITERAL, lit);
163 lit->code_min = code_min;
164 lit->code_max = code_max;
165 lit->position = position;
169 static tre_ast_node_t *
170 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal)
172 tre_ast_node_t *node;
173 tre_iteration_t *iter;
175 iter = tre_mem_calloc(mem, sizeof *iter);
176 node = tre_ast_new_node(mem, ITERATION, iter);
182 iter->minimal = minimal;
183 node->num_submatches = arg->num_submatches;
187 static tre_ast_node_t *
188 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
190 tre_ast_node_t *node;
195 un = tre_mem_calloc(mem, sizeof *un);
196 node = tre_ast_new_node(mem, UNION, un);
201 node->num_submatches = left->num_submatches + right->num_submatches;
205 static tre_ast_node_t *
206 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
208 tre_ast_node_t *node;
209 tre_catenation_t *cat;
213 cat = tre_mem_calloc(mem, sizeof *cat);
214 node = tre_ast_new_node(mem, CATENATION, cat);
219 node->num_submatches = left->num_submatches + right->num_submatches;
224 /***********************************************************************
225 from tre-stack.c and tre-stack.h
226 ***********************************************************************/
228 typedef struct tre_stack_rec tre_stack_t;
230 /* Creates a new stack object. `size' is initial size in bytes, `max_size'
231 is maximum size, and `increment' specifies how much more space will be
232 allocated with realloc() if all space gets used up. Returns the stack
233 object or NULL if out of memory. */
235 tre_stack_new(int size, int max_size, int increment);
237 /* Frees the stack object. */
239 tre_stack_destroy(tre_stack_t *s);
241 /* Returns the current number of objects in the stack. */
243 tre_stack_num_objects(tre_stack_t *s);
245 /* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
246 `value' on top of stack `s'. Returns REG_ESPACE if out of memory.
247 This tries to realloc() more space before failing if maximum size
248 has not yet been reached. Returns REG_OK if successful. */
249 #define declare_pushf(typetag, type) \
250 static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
252 declare_pushf(voidptr, void *);
253 declare_pushf(int, int);
255 /* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
256 element off of stack `s' and returns it. The stack must not be
258 #define declare_popf(typetag, type) \
259 static type tre_stack_pop_ ## typetag(tre_stack_t *s)
261 declare_popf(voidptr, void *);
262 declare_popf(int, int);
264 /* Just to save some typing. */
265 #define STACK_PUSH(s, typetag, value) \
268 status = tre_stack_push_ ## typetag(s, value); \
270 while (/*CONSTCOND*/0)
272 #define STACK_PUSHX(s, typetag, value) \
274 status = tre_stack_push_ ## typetag(s, value); \
275 if (status != REG_OK) \
279 #define STACK_PUSHR(s, typetag, value) \
281 reg_errcode_t _status; \
282 _status = tre_stack_push_ ## typetag(s, value); \
283 if (_status != REG_OK) \
287 union tre_stack_item {
292 struct tre_stack_rec {
297 union tre_stack_item *stack;
302 tre_stack_new(int size, int max_size, int increment)
306 s = xmalloc(sizeof(*s));
309 s->stack = xmalloc(sizeof(*s->stack) * size);
310 if (s->stack == NULL)
316 s->max_size = max_size;
317 s->increment = increment;
324 tre_stack_destroy(tre_stack_t *s)
331 tre_stack_num_objects(tre_stack_t *s)
337 tre_stack_push(tre_stack_t *s, union tre_stack_item value)
339 if (s->ptr < s->size)
341 s->stack[s->ptr] = value;
346 if (s->size >= s->max_size)
352 union tre_stack_item *new_buffer;
354 new_size = s->size + s->increment;
355 if (new_size > s->max_size)
356 new_size = s->max_size;
357 new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
358 if (new_buffer == NULL)
362 assert(new_size > s->size);
364 s->stack = new_buffer;
365 tre_stack_push(s, value);
371 #define define_pushf(typetag, type) \
372 declare_pushf(typetag, type) { \
373 union tre_stack_item item; \
374 item.typetag ## _value = value; \
375 return tre_stack_push(s, item); \
378 define_pushf(int, int)
379 define_pushf(voidptr, void *)
381 #define define_popf(typetag, type) \
382 declare_popf(typetag, type) { \
383 return s->stack[--s->ptr].typetag ## _value; \
386 define_popf(int, int)
387 define_popf(voidptr, void *)
390 /***********************************************************************
391 from tre-parse.c and tre-parse.h
392 ***********************************************************************/
396 /* Memory allocator. The AST is allocated using this. */
398 /* Stack used for keeping track of regexp syntax. */
400 /* The parsed node after a parse function returns. */
402 /* Position in the regexp pattern after a parse function returns. */
404 /* The first character of the regexp. */
406 /* Current submatch ID. */
408 /* Current position (number of literal). */
410 /* The highest back reference or -1 if none seen so far. */
412 /* Compilation flags. */
416 /* Some macros for expanding \w, \s, etc. */
417 static const struct {
419 const char *expansion;
421 {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
422 {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
423 {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
424 {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
428 /* Expands a macro delimited by `regex' and `regex_end' to `buf', which
429 must have at least `len' items. Sets buf[0] to zero if the there
430 is no match in `tre_macros'. */
431 static const char *tre_expand_macro(const char *s)
434 for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);
435 return tre_macros[i].expansion;
439 tre_compare_lit(const void *a, const void *b)
441 const tre_literal_t *const *la = a;
442 const tre_literal_t *const *lb = b;
443 /* assumes the range of valid code_min is < INT_MAX */
444 return la[0]->code_min - lb[0]->code_min;
454 static tre_literal_t *tre_new_lit(struct literals *p)
457 if (p->len >= p->cap) {
461 a = xrealloc(p->a, p->cap * sizeof *p->a);
467 *a = tre_mem_calloc(p->mem, sizeof **a);
471 static int add_icase_literals(struct literals *ls, int min, int max)
475 for (c=min; c<=max; ) {
476 /* assumes islower(c) and isupper(c) are exclusive
477 and toupper(c)!=c if islower(c).
478 multiple opposite case characters are not supported */
479 if (tre_islower(c)) {
480 b = e = tre_toupper(c);
481 for (c++, e++; c<=max; c++, e++)
482 if (tre_toupper(c) != e) break;
483 } else if (tre_isupper(c)) {
484 b = e = tre_tolower(c);
485 for (c++, e++; c<=max; c++, e++)
486 if (tre_tolower(c) != e) break;
491 lit = tre_new_lit(ls);
502 /* Maximum number of character classes in a negated bracket expression. */
503 #define MAX_NEG_CLASSES 64
508 tre_ctype_t a[MAX_NEG_CLASSES];
511 // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
515 Bracket = '[' List ']' | '[^' List ']'
516 List = Term | List Term
517 Term = Char | Range | Chclass | Eqclass
518 Range = Char '-' Char | Char '-' '-'
519 Char = Coll | coll_single
521 Coll = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]'
522 Eqclass = '[=' coll_single '=]' | '[=' coll_multi '=]'
523 Chclass = '[:' class ':]'
525 coll_single is a single char collating element but it can be
526 '-' only at the beginning or end of a List and
527 ']' only at the beginning of a List and
528 '^' anywhere except after the openning '['
531 static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, struct literals *ls, struct neg *neg)
533 const char *start = s;
541 len = mbtowc(&wc, s, -1);
543 return *s ? REG_BADPAT : REG_EBRACK;
544 if (*s == ']' && s != start) {
548 if (*s == '-' && s != start && s[1] != ']' &&
549 /* extension: [a-z--@] is accepted as [a-z]|[--@] */
550 (s[1] != '-' || s[2] == ']'))
552 if (*s == '[' && (s[1] == '.' || s[1] == '='))
553 /* collating symbols and equivalence classes are not supported */
555 if (*s == '[' && s[1] == ':') {
556 char tmp[CHARCLASS_NAME_MAX+1];
558 for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) {
562 class = tre_ctype(tmp);
566 if (!class || s[len+1] != ']')
574 if (*s == '-' && s[1] != ']') {
576 len = mbtowc(&wc, s, -1);
578 /* XXX - Should use collation order instead of
579 encoding values in character ranges. */
580 if (len <= 0 || min > max)
586 if (class && neg->negate) {
587 if (neg->len >= MAX_NEG_CLASSES)
589 neg->a[neg->len++] = class;
591 tre_literal_t *lit = tre_new_lit(ls);
599 /* Add opposite-case codepoints if REG_ICASE is present.
600 It seems that POSIX requires that bracket negation
601 should happen before case-folding, but most practical
602 implementations do it the other way around. Changing
603 the order would need efficient representation of
604 case-fold ranges and bracket range sets even with
605 simple patterns so this is ok for now. */
606 if (ctx->cflags & REG_ICASE && !class)
607 if (add_icase_literals(ls, min, max))
613 static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s)
615 int i, max, min, negmax, negmin;
616 tre_ast_node_t *node = 0, *n;
626 ls.a = xmalloc(ls.cap * sizeof *ls.a);
630 neg.negate = *s == '^';
634 err = parse_bracket_terms(ctx, s, &ls, &neg);
636 goto parse_bracket_done;
639 /* Sort the array if we need to negate it. */
640 qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
641 /* extra lit for the last negated range */
642 lit = tre_new_lit(&ls);
645 goto parse_bracket_done;
647 lit->code_min = TRE_CHAR_MAX+1;
648 lit->code_max = TRE_CHAR_MAX+1;
650 /* negated classes */
652 nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a);
655 goto parse_bracket_done;
657 memcpy(nc, neg.a, neg.len*sizeof *neg.a);
662 /* Build a union of the items in the array, negated if necessary. */
664 for (i = 0; i < ls.len; i++) {
671 negmin = MAX(max + 1, negmin);
675 lit->code_min = negmin;
676 lit->code_max = negmax;
679 lit->position = ctx->position;
680 lit->neg_classes = nc;
681 n = tre_ast_new_node(ctx->mem, LITERAL, lit);
682 node = tre_ast_new_union(ctx->mem, node, n);
696 static const char *parse_dup_count(const char *s, int *n)
703 *n = 10 * *n + (*s - '0');
705 if (!isdigit(*s) || *n > RE_DUP_MAX)
711 static reg_errcode_t parse_dup(tre_parse_ctx_t *ctx, const char *s)
715 s = parse_dup_count(s, &min);
717 s = parse_dup_count(s+1, &max);
722 (max < min && max >= 0) ||
726 (!(ctx->cflags & REG_EXTENDED) && *s++ != '\\') ||
731 if (min == 0 && max == 0)
732 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
734 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
741 static int hexval(unsigned c)
743 if (c-'0'<10) return c-'0';
745 if (c-'a'<6) return c-'a'+10;
749 static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int subid)
751 if (node->submatch_id >= 0) {
752 tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
755 n = tre_ast_new_catenation(ctx->mem, n, node);
758 n->num_submatches = node->num_submatches;
761 node->submatch_id = subid;
762 node->num_submatches++;
769 Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^' Branch '$'
770 Branch = Atom | Branch Atom
771 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch '\)' | back_ref
772 Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count '\}'
774 (leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
777 Regex = Branch | Regex '|' Branch
778 Branch = Atom | Branch Atom
779 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex ')' | '^' | '$'
780 Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count ',' Count '}'
782 (a*+?, ^*, $+, \X, {, (|a) are unspecified)
785 static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
787 int len, ere = ctx->cflags & REG_EXTENDED;
789 tre_ast_node_t *node;
793 return parse_bracket(ctx, s+1);
795 p = tre_expand_macro(s+1);
797 /* assume \X expansion is a single atom */
798 reg_errcode_t err = parse_atom(ctx, p);
802 /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
807 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
810 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
813 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
816 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
826 for (i=0; i<len && v<0x110000; i++) {
837 node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
841 if (!ere && (unsigned)*s-'1' < 9) {
844 node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++);
845 ctx->max_backref = MAX(val, ctx->max_backref);
847 /* extension: accept unknown escaped char
855 if (ctx->cflags & REG_NEWLINE) {
856 tre_ast_node_t *tmp1, *tmp2;
857 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++);
858 tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX, ctx->position++);
860 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
864 node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++);
869 /* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
870 if (!ere && s != ctx->re)
872 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
876 /* '$' is special everywhere in EREs, and in the end of the string in BREs. */
879 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
890 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
894 len = mbtowc(&wc, s, -1);
897 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) {
898 tre_ast_node_t *tmp1, *tmp2;
899 /* multiple opposite case characters are not supported */
900 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc), ctx->position);
901 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc), ctx->position);
903 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
907 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
920 #define PUSHPTR(err, s, v) do { \
921 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \
925 #define PUSHINT(err, s, v) do { \
926 if ((err = tre_stack_push_int(s, v)) != REG_OK) \
930 static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
932 tre_ast_node_t *nbranch=0, *nunion=0;
933 int ere = ctx->cflags & REG_EXTENDED;
934 const char *s = ctx->re;
938 tre_stack_t *stack = ctx->stack;
940 PUSHINT(err, stack, subid++);
942 if ((!ere && *s == '\\' && s[1] == '(') ||
943 (ere && *s == '(')) {
944 PUSHPTR(err, stack, nunion);
945 PUSHPTR(err, stack, nbranch);
946 PUSHINT(err, stack, subid++);
951 nbranch = nunion = 0;
954 if ((!ere && *s == '\\' && s[1] == ')') ||
955 (ere && *s == ')' && depth)) {
956 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
960 err = parse_atom(ctx, s);
967 /* extension: repetitions are accepted after an empty node
968 eg. (+), ^*, a$?, a|{2} */
982 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
985 /* extension: multiple consecutive *+?{,} is unspecified,
986 but (a+)+ has to be supported so accepting a++ makes
987 sense, note however that the RE_DUP_MAX limit can be
988 circumvented: (a{255}){255} uses a lot of memory.. */
991 if (ere || s[1] != '{')
999 err = parse_dup(ctx, s+1);
1006 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
1007 if ((ere && *s == '|') ||
1008 (ere && *s == ')' && depth) ||
1009 (!ere && *s == '\\' && s[1] == ')') ||
1011 /* extension: empty branch is unspecified (), (|a), (a|)
1012 here they are not rejected but match on empty string */
1014 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
1018 if (!depth) return REG_EPAREN;
1020 } else if (c == ')')
1023 err = marksub(ctx, nunion, tre_stack_pop_int(stack));
1026 if (!c && depth<0) {
1027 ctx->submatch_id = subid;
1032 nbranch = tre_stack_pop_voidptr(stack);
1033 nunion = tre_stack_pop_voidptr(stack);
1042 /***********************************************************************
1044 ***********************************************************************/
1049 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1054 Algorithms to setup tags so that submatch addressing can be done.
1058 /* Inserts a catenation node to the root of the tree given in `node'.
1059 As the left child a new tag with number `tag_id' to `node' is added,
1060 and the right child is the old root. */
1061 static reg_errcode_t
1062 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1064 tre_catenation_t *c;
1066 c = tre_mem_alloc(mem, sizeof(*c));
1069 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
1070 if (c->left == NULL)
1072 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1073 if (c->right == NULL)
1076 c->right->obj = node->obj;
1077 c->right->type = node->type;
1078 c->right->nullable = -1;
1079 c->right->submatch_id = -1;
1080 c->right->firstpos = NULL;
1081 c->right->lastpos = NULL;
1082 c->right->num_tags = 0;
1084 node->type = CATENATION;
1088 /* Inserts a catenation node to the root of the tree given in `node'.
1089 As the right child a new tag with number `tag_id' to `node' is added,
1090 and the left child is the old root. */
1091 static reg_errcode_t
1092 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1094 tre_catenation_t *c;
1096 c = tre_mem_alloc(mem, sizeof(*c));
1099 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
1100 if (c->right == NULL)
1102 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1103 if (c->left == NULL)
1106 c->left->obj = node->obj;
1107 c->left->type = node->type;
1108 c->left->nullable = -1;
1109 c->left->submatch_id = -1;
1110 c->left->firstpos = NULL;
1111 c->left->lastpos = NULL;
1112 c->left->num_tags = 0;
1114 node->type = CATENATION;
1120 ADDTAGS_AFTER_ITERATION,
1121 ADDTAGS_AFTER_UNION_LEFT,
1122 ADDTAGS_AFTER_UNION_RIGHT,
1123 ADDTAGS_AFTER_CAT_LEFT,
1124 ADDTAGS_AFTER_CAT_RIGHT,
1125 ADDTAGS_SET_SUBMATCH_END
1126 } tre_addtags_symbol_t;
1135 /* Go through `regset' and set submatch data for submatches that are
1138 tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
1142 for (i = 0; regset[i] >= 0; i++)
1144 int id = regset[i] / 2;
1145 int start = !(regset[i] % 2);
1147 tnfa->submatch_data[id].so_tag = tag;
1149 tnfa->submatch_data[id].eo_tag = tag;
1155 /* Adds tags to appropriate locations in the parse tree in `tree', so that
1156 subexpressions marked for submatch addressing can be traced. */
1157 static reg_errcode_t
1158 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
1161 reg_errcode_t status = REG_OK;
1162 tre_addtags_symbol_t symbol;
1163 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
1164 int bottom = tre_stack_num_objects(stack);
1165 /* True for first pass (counting number of needed tags) */
1166 int first_pass = (mem == NULL || tnfa == NULL);
1167 int *regset, *orig_regset;
1168 int num_tags = 0; /* Total number of tags. */
1169 int num_minimals = 0; /* Number of special minimal tags. */
1170 int tag = 0; /* The tag that is to be added next. */
1171 int next_tag = 1; /* Next tag to use after this one. */
1172 int *parents; /* Stack of submatches the current submatch is
1174 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
1175 tre_tag_states_t *saved_states;
1177 tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
1181 tnfa->minimal_tags[0] = -1;
1184 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
1188 orig_regset = regset;
1190 parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
1191 if (parents == NULL)
1198 saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
1199 if (saved_states == NULL)
1208 for (i = 0; i <= tnfa->num_submatches; i++)
1209 saved_states[i].tag = -1;
1212 STACK_PUSH(stack, voidptr, node);
1213 STACK_PUSH(stack, int, ADDTAGS_RECURSE);
1215 while (tre_stack_num_objects(stack) > bottom)
1217 if (status != REG_OK)
1220 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
1224 case ADDTAGS_SET_SUBMATCH_END:
1226 int id = tre_stack_pop_int(stack);
1229 /* Add end of this submatch to regset. */
1230 for (i = 0; regset[i] >= 0; i++);
1231 regset[i] = id * 2 + 1;
1234 /* Pop this submatch from the parents stack. */
1235 for (i = 0; parents[i] >= 0; i++);
1236 parents[i - 1] = -1;
1240 case ADDTAGS_RECURSE:
1241 node = tre_stack_pop_voidptr(stack);
1243 if (node->submatch_id >= 0)
1245 int id = node->submatch_id;
1249 /* Add start of this submatch to regset. */
1250 for (i = 0; regset[i] >= 0; i++);
1256 for (i = 0; parents[i] >= 0; i++);
1257 tnfa->submatch_data[id].parents = NULL;
1260 int *p = xmalloc(sizeof(*p) * (i + 1));
1263 status = REG_ESPACE;
1266 assert(tnfa->submatch_data[id].parents == NULL);
1267 tnfa->submatch_data[id].parents = p;
1268 for (i = 0; parents[i] >= 0; i++)
1274 /* Add end of this submatch to regset after processing this
1276 STACK_PUSHX(stack, int, node->submatch_id);
1277 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
1284 tre_literal_t *lit = node->obj;
1286 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1291 /* Regset is not empty, so add a tag before the
1292 literal or backref. */
1295 status = tre_add_tag_left(mem, node, tag);
1296 tnfa->tag_directions[tag] = direction;
1297 if (minimal_tag >= 0)
1299 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1300 tnfa->minimal_tags[i] = tag;
1301 tnfa->minimal_tags[i + 1] = minimal_tag;
1302 tnfa->minimal_tags[i + 2] = -1;
1306 tre_purge_regset(regset, tnfa, tag);
1321 assert(!IS_TAG(lit));
1327 tre_catenation_t *cat = node->obj;
1328 tre_ast_node_t *left = cat->left;
1329 tre_ast_node_t *right = cat->right;
1330 int reserved_tag = -1;
1333 /* After processing right child. */
1334 STACK_PUSHX(stack, voidptr, node);
1335 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
1337 /* Process right child. */
1338 STACK_PUSHX(stack, voidptr, right);
1339 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1341 /* After processing left child. */
1342 STACK_PUSHX(stack, int, next_tag + left->num_tags);
1343 if (left->num_tags > 0 && right->num_tags > 0)
1345 /* Reserve the next tag to the right child. */
1346 reserved_tag = next_tag;
1349 STACK_PUSHX(stack, int, reserved_tag);
1350 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
1352 /* Process left child. */
1353 STACK_PUSHX(stack, voidptr, left);
1354 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1360 tre_iteration_t *iter = node->obj;
1364 STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
1368 STACK_PUSHX(stack, int, tag);
1369 STACK_PUSHX(stack, int, iter->minimal);
1371 STACK_PUSHX(stack, voidptr, node);
1372 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
1374 STACK_PUSHX(stack, voidptr, iter->arg);
1375 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1377 /* Regset is not empty, so add a tag here. */
1378 if (regset[0] >= 0 || iter->minimal)
1383 status = tre_add_tag_left(mem, node, tag);
1385 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
1387 tnfa->tag_directions[tag] = direction;
1388 if (minimal_tag >= 0)
1390 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1391 tnfa->minimal_tags[i] = tag;
1392 tnfa->minimal_tags[i + 1] = minimal_tag;
1393 tnfa->minimal_tags[i + 2] = -1;
1397 tre_purge_regset(regset, tnfa, tag);
1405 direction = TRE_TAG_MINIMIZE;
1410 tre_union_t *uni = node->obj;
1411 tre_ast_node_t *left = uni->left;
1412 tre_ast_node_t *right = uni->right;
1418 left_tag = next_tag;
1419 right_tag = next_tag + 1;
1424 right_tag = next_tag;
1427 /* After processing right child. */
1428 STACK_PUSHX(stack, int, right_tag);
1429 STACK_PUSHX(stack, int, left_tag);
1430 STACK_PUSHX(stack, voidptr, regset);
1431 STACK_PUSHX(stack, int, regset[0] >= 0);
1432 STACK_PUSHX(stack, voidptr, node);
1433 STACK_PUSHX(stack, voidptr, right);
1434 STACK_PUSHX(stack, voidptr, left);
1435 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
1437 /* Process right child. */
1438 STACK_PUSHX(stack, voidptr, right);
1439 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1441 /* After processing left child. */
1442 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
1444 /* Process left child. */
1445 STACK_PUSHX(stack, voidptr, left);
1446 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1448 /* Regset is not empty, so add a tag here. */
1454 status = tre_add_tag_left(mem, node, tag);
1455 tnfa->tag_directions[tag] = direction;
1456 if (minimal_tag >= 0)
1458 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1459 tnfa->minimal_tags[i] = tag;
1460 tnfa->minimal_tags[i + 1] = minimal_tag;
1461 tnfa->minimal_tags[i + 2] = -1;
1465 tre_purge_regset(regset, tnfa, tag);
1474 if (node->num_submatches > 0)
1476 /* The next two tags are reserved for markers. */
1486 if (node->submatch_id >= 0)
1489 /* Push this submatch on the parents stack. */
1490 for (i = 0; parents[i] >= 0; i++);
1491 parents[i] = node->submatch_id;
1492 parents[i + 1] = -1;
1495 break; /* end case: ADDTAGS_RECURSE */
1497 case ADDTAGS_AFTER_ITERATION:
1501 node = tre_stack_pop_voidptr(stack);
1504 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
1505 + tre_stack_pop_int(stack);
1510 minimal = tre_stack_pop_int(stack);
1511 enter_tag = tre_stack_pop_int(stack);
1513 minimal_tag = enter_tag;
1519 direction = TRE_TAG_MINIMIZE;
1521 direction = TRE_TAG_MAXIMIZE;
1526 case ADDTAGS_AFTER_CAT_LEFT:
1528 int new_tag = tre_stack_pop_int(stack);
1529 next_tag = tre_stack_pop_int(stack);
1537 case ADDTAGS_AFTER_CAT_RIGHT:
1538 node = tre_stack_pop_voidptr(stack);
1540 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
1541 + ((tre_catenation_t *)node->obj)->right->num_tags;
1544 case ADDTAGS_AFTER_UNION_LEFT:
1545 /* Lift the bottom of the `regset' array so that when processing
1546 the right operand the items currently in the array are
1547 invisible. The original bottom was saved at ADDTAGS_UNION and
1548 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1549 while (*regset >= 0)
1553 case ADDTAGS_AFTER_UNION_RIGHT:
1555 int added_tags, tag_left, tag_right;
1556 tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
1557 tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
1558 node = tre_stack_pop_voidptr(stack);
1559 added_tags = tre_stack_pop_int(stack);
1562 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
1563 + ((tre_union_t *)node->obj)->right->num_tags + added_tags
1564 + ((node->num_submatches > 0) ? 2 : 0);
1566 regset = tre_stack_pop_voidptr(stack);
1567 tag_left = tre_stack_pop_int(stack);
1568 tag_right = tre_stack_pop_int(stack);
1570 /* Add tags after both children, the left child gets a smaller
1571 tag than the right child. This guarantees that we prefer
1572 the left child over the right child. */
1573 /* XXX - This is not always necessary (if the children have
1574 tags which must be seen for every match of that child). */
1575 /* XXX - Check if this is the only place where tre_add_tag_right
1576 is used. If so, use tre_add_tag_left (putting the tag before
1577 the child as opposed after the child) and throw away
1578 tre_add_tag_right. */
1579 if (node->num_submatches > 0)
1583 status = tre_add_tag_right(mem, left, tag_left);
1584 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1585 if (status == REG_OK)
1586 status = tre_add_tag_right(mem, right, tag_right);
1587 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1591 direction = TRE_TAG_MAXIMIZE;
1599 } /* end switch(symbol) */
1600 } /* end while(tre_stack_num_objects(stack) > bottom) */
1603 tre_purge_regset(regset, tnfa, tag);
1605 if (!first_pass && minimal_tag >= 0)
1608 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1609 tnfa->minimal_tags[i] = tag;
1610 tnfa->minimal_tags[i + 1] = minimal_tag;
1611 tnfa->minimal_tags[i + 2] = -1;
1616 assert(tree->num_tags == num_tags);
1617 tnfa->end_tag = num_tags;
1618 tnfa->num_tags = num_tags;
1619 tnfa->num_minimals = num_minimals;
1622 xfree(saved_states);
1629 AST to TNFA compilation routines.
1635 } tre_copyast_symbol_t;
1637 /* Flags for tre_copy_ast(). */
1638 #define COPY_REMOVE_TAGS 1
1639 #define COPY_MAXIMIZE_FIRST_TAG 2
1641 static reg_errcode_t
1642 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1643 int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1644 tre_ast_node_t **copy, int *max_pos)
1646 reg_errcode_t status = REG_OK;
1647 int bottom = tre_stack_num_objects(stack);
1650 tre_ast_node_t **result = copy;
1651 tre_copyast_symbol_t symbol;
1653 STACK_PUSH(stack, voidptr, ast);
1654 STACK_PUSH(stack, int, COPY_RECURSE);
1656 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1658 tre_ast_node_t *node;
1659 if (status != REG_OK)
1662 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1665 case COPY_SET_RESULT_PTR:
1666 result = tre_stack_pop_voidptr(stack);
1669 node = tre_stack_pop_voidptr(stack);
1674 tre_literal_t *lit = node->obj;
1675 int pos = lit->position;
1676 int min = lit->code_min;
1677 int max = lit->code_max;
1678 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1680 /* XXX - e.g. [ab] has only one position but two
1681 nodes, so we are creating holes in the state space
1682 here. Not fatal, just wastes memory. */
1686 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1688 /* Change this tag to empty. */
1692 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1695 /* Maximize the first tag. */
1696 tag_directions[max] = TRE_TAG_MAXIMIZE;
1699 *result = tre_ast_new_literal(mem, min, max, pos);
1700 if (*result == NULL)
1701 status = REG_ESPACE;
1703 tre_literal_t *p = (*result)->obj;
1704 p->class = lit->class;
1705 p->neg_classes = lit->neg_classes;
1714 tre_union_t *uni = node->obj;
1716 *result = tre_ast_new_union(mem, uni->left, uni->right);
1717 if (*result == NULL)
1719 status = REG_ESPACE;
1722 tmp = (*result)->obj;
1723 result = &tmp->left;
1724 STACK_PUSHX(stack, voidptr, uni->right);
1725 STACK_PUSHX(stack, int, COPY_RECURSE);
1726 STACK_PUSHX(stack, voidptr, &tmp->right);
1727 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1728 STACK_PUSHX(stack, voidptr, uni->left);
1729 STACK_PUSHX(stack, int, COPY_RECURSE);
1734 tre_catenation_t *cat = node->obj;
1735 tre_catenation_t *tmp;
1736 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1737 if (*result == NULL)
1739 status = REG_ESPACE;
1742 tmp = (*result)->obj;
1745 result = &tmp->left;
1747 STACK_PUSHX(stack, voidptr, cat->right);
1748 STACK_PUSHX(stack, int, COPY_RECURSE);
1749 STACK_PUSHX(stack, voidptr, &tmp->right);
1750 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1751 STACK_PUSHX(stack, voidptr, cat->left);
1752 STACK_PUSHX(stack, int, COPY_RECURSE);
1757 tre_iteration_t *iter = node->obj;
1758 STACK_PUSHX(stack, voidptr, iter->arg);
1759 STACK_PUSHX(stack, int, COPY_RECURSE);
1760 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1761 iter->max, iter->minimal);
1762 if (*result == NULL)
1764 status = REG_ESPACE;
1767 iter = (*result)->obj;
1768 result = &iter->arg;
1778 *pos_add += num_copied;
1785 } tre_expand_ast_symbol_t;
1787 /* Expands each iteration node that has a finite nonzero minimum or maximum
1788 iteration count to a catenated sequence of copies of the node. */
1789 static reg_errcode_t
1790 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1791 int *position, tre_tag_direction_t *tag_directions)
1793 reg_errcode_t status = REG_OK;
1794 int bottom = tre_stack_num_objects(stack);
1796 int pos_add_total = 0;
1800 STACK_PUSHR(stack, voidptr, ast);
1801 STACK_PUSHR(stack, int, EXPAND_RECURSE);
1802 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1804 tre_ast_node_t *node;
1805 tre_expand_ast_symbol_t symbol;
1807 if (status != REG_OK)
1810 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1811 node = tre_stack_pop_voidptr(stack);
1814 case EXPAND_RECURSE:
1819 tre_literal_t *lit= node->obj;
1820 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1822 lit->position += pos_add;
1823 if (lit->position > max_pos)
1824 max_pos = lit->position;
1830 tre_union_t *uni = node->obj;
1831 STACK_PUSHX(stack, voidptr, uni->right);
1832 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1833 STACK_PUSHX(stack, voidptr, uni->left);
1834 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1839 tre_catenation_t *cat = node->obj;
1840 STACK_PUSHX(stack, voidptr, cat->right);
1841 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1842 STACK_PUSHX(stack, voidptr, cat->left);
1843 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1848 tre_iteration_t *iter = node->obj;
1849 STACK_PUSHX(stack, int, pos_add);
1850 STACK_PUSHX(stack, voidptr, node);
1851 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1852 STACK_PUSHX(stack, voidptr, iter->arg);
1853 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1854 /* If we are going to expand this node at EXPAND_AFTER_ITER
1855 then don't increase the `pos' fields of the nodes now, it
1856 will get done when expanding. */
1857 if (iter->min > 1 || iter->max > 1)
1867 case EXPAND_AFTER_ITER:
1869 tre_iteration_t *iter = node->obj;
1871 pos_add = tre_stack_pop_int(stack);
1872 pos_add_last = pos_add;
1873 if (iter->min > 1 || iter->max > 1)
1875 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1877 int pos_add_save = pos_add;
1879 /* Create a catenated sequence of copies of the node. */
1880 for (j = 0; j < iter->min; j++)
1882 tre_ast_node_t *copy;
1883 /* Remove tags from all but the last copy. */
1884 int flags = ((j + 1 < iter->min)
1886 : COPY_MAXIMIZE_FIRST_TAG);
1887 pos_add_save = pos_add;
1888 status = tre_copy_ast(mem, stack, iter->arg, flags,
1889 &pos_add, tag_directions, ©,
1891 if (status != REG_OK)
1894 seq1 = tre_ast_new_catenation(mem, seq1, copy);
1901 if (iter->max == -1)
1903 /* No upper limit. */
1904 pos_add_save = pos_add;
1905 status = tre_copy_ast(mem, stack, iter->arg, 0,
1906 &pos_add, NULL, &seq2, &max_pos);
1907 if (status != REG_OK)
1909 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1915 for (j = iter->min; j < iter->max; j++)
1917 tre_ast_node_t *tmp, *copy;
1918 pos_add_save = pos_add;
1919 status = tre_copy_ast(mem, stack, iter->arg, 0,
1920 &pos_add, NULL, ©, &max_pos);
1921 if (status != REG_OK)
1924 seq2 = tre_ast_new_catenation(mem, copy, seq2);
1929 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1932 seq2 = tre_ast_new_union(mem, tmp, seq2);
1938 pos_add = pos_add_save;
1941 else if (seq2 != NULL)
1942 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1945 node->obj = seq1->obj;
1946 node->type = seq1->type;
1950 pos_add_total += pos_add - pos_add_last;
1951 if (iter_depth == 0)
1952 pos_add = pos_add_total;
1962 *position += pos_add_total;
1964 /* `max_pos' should never be larger than `*position' if the above
1965 code works, but just an extra safeguard let's make sure
1966 `*position' is set large enough so enough memory will be
1967 allocated for the transition table. */
1968 if (max_pos > *position)
1969 *position = max_pos;
1974 static tre_pos_and_tags_t *
1975 tre_set_empty(tre_mem_t mem)
1977 tre_pos_and_tags_t *new_set;
1979 new_set = tre_mem_calloc(mem, sizeof(*new_set));
1980 if (new_set == NULL)
1983 new_set[0].position = -1;
1984 new_set[0].code_min = -1;
1985 new_set[0].code_max = -1;
1990 static tre_pos_and_tags_t *
1991 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
1992 tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
1994 tre_pos_and_tags_t *new_set;
1996 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
1997 if (new_set == NULL)
2000 new_set[0].position = position;
2001 new_set[0].code_min = code_min;
2002 new_set[0].code_max = code_max;
2003 new_set[0].class = class;
2004 new_set[0].neg_classes = neg_classes;
2005 new_set[0].backref = backref;
2006 new_set[1].position = -1;
2007 new_set[1].code_min = -1;
2008 new_set[1].code_max = -1;
2013 static tre_pos_and_tags_t *
2014 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2015 int *tags, int assertions)
2018 tre_pos_and_tags_t *new_set;
2022 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2023 for (s1 = 0; set1[s1].position >= 0; s1++);
2024 for (s2 = 0; set2[s2].position >= 0; s2++);
2025 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2029 for (s1 = 0; set1[s1].position >= 0; s1++)
2031 new_set[s1].position = set1[s1].position;
2032 new_set[s1].code_min = set1[s1].code_min;
2033 new_set[s1].code_max = set1[s1].code_max;
2034 new_set[s1].assertions = set1[s1].assertions | assertions;
2035 new_set[s1].class = set1[s1].class;
2036 new_set[s1].neg_classes = set1[s1].neg_classes;
2037 new_set[s1].backref = set1[s1].backref;
2038 if (set1[s1].tags == NULL && tags == NULL)
2039 new_set[s1].tags = NULL;
2042 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2043 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2044 * (i + num_tags + 1)));
2045 if (new_tags == NULL)
2047 for (j = 0; j < i; j++)
2048 new_tags[j] = set1[s1].tags[j];
2049 for (i = 0; i < num_tags; i++)
2050 new_tags[j + i] = tags[i];
2051 new_tags[j + i] = -1;
2052 new_set[s1].tags = new_tags;
2056 for (s2 = 0; set2[s2].position >= 0; s2++)
2058 new_set[s1 + s2].position = set2[s2].position;
2059 new_set[s1 + s2].code_min = set2[s2].code_min;
2060 new_set[s1 + s2].code_max = set2[s2].code_max;
2061 /* XXX - why not | assertions here as well? */
2062 new_set[s1 + s2].assertions = set2[s2].assertions;
2063 new_set[s1 + s2].class = set2[s2].class;
2064 new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
2065 new_set[s1 + s2].backref = set2[s2].backref;
2066 if (set2[s2].tags == NULL)
2067 new_set[s1 + s2].tags = NULL;
2070 for (i = 0; set2[s2].tags[i] >= 0; i++);
2071 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2072 if (new_tags == NULL)
2074 for (j = 0; j < i; j++)
2075 new_tags[j] = set2[s2].tags[j];
2077 new_set[s1 + s2].tags = new_tags;
2080 new_set[s1 + s2].position = -1;
2084 /* Finds the empty path through `node' which is the one that should be
2085 taken according to POSIX.2 rules, and adds the tags on that path to
2086 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2087 set to the number of tags seen on the path. */
2088 static reg_errcode_t
2089 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2090 int *assertions, int *num_tags_seen)
2094 tre_catenation_t *cat;
2095 tre_iteration_t *iter;
2097 int bottom = tre_stack_num_objects(stack);
2098 reg_errcode_t status = REG_OK;
2102 status = tre_stack_push_voidptr(stack, node);
2104 /* Walk through the tree recursively. */
2105 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2107 node = tre_stack_pop_voidptr(stack);
2112 lit = (tre_literal_t *)node->obj;
2113 switch (lit->code_min)
2116 if (lit->code_max >= 0)
2120 /* Add the tag to `tags'. */
2121 for (i = 0; tags[i] >= 0; i++)
2122 if (tags[i] == lit->code_max)
2126 tags[i] = lit->code_max;
2135 assert(lit->code_max >= 1
2136 || lit->code_max <= ASSERT_LAST);
2137 if (assertions != NULL)
2138 *assertions |= lit->code_max;
2149 /* Subexpressions starting earlier take priority over ones
2150 starting later, so we prefer the left subexpression over the
2151 right subexpression. */
2152 uni = (tre_union_t *)node->obj;
2153 if (uni->left->nullable)
2154 STACK_PUSHX(stack, voidptr, uni->left)
2155 else if (uni->right->nullable)
2156 STACK_PUSHX(stack, voidptr, uni->right)
2162 /* The path must go through both children. */
2163 cat = (tre_catenation_t *)node->obj;
2164 assert(cat->left->nullable);
2165 assert(cat->right->nullable);
2166 STACK_PUSHX(stack, voidptr, cat->left);
2167 STACK_PUSHX(stack, voidptr, cat->right);
2171 /* A match with an empty string is preferred over no match at
2172 all, so we go through the argument if possible. */
2173 iter = (tre_iteration_t *)node->obj;
2174 if (iter->arg->nullable)
2175 STACK_PUSHX(stack, voidptr, iter->arg);
2191 NFL_POST_CATENATION,
2193 } tre_nfl_stack_symbol_t;
2196 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2197 the nodes of the AST `tree'. */
2198 static reg_errcode_t
2199 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2201 int bottom = tre_stack_num_objects(stack);
2203 STACK_PUSHR(stack, voidptr, tree);
2204 STACK_PUSHR(stack, int, NFL_RECURSE);
2206 while (tre_stack_num_objects(stack) > bottom)
2208 tre_nfl_stack_symbol_t symbol;
2209 tre_ast_node_t *node;
2211 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2212 node = tre_stack_pop_voidptr(stack);
2220 tre_literal_t *lit = (tre_literal_t *)node->obj;
2221 if (IS_BACKREF(lit))
2223 /* Back references: nullable = false, firstpos = {i},
2226 node->firstpos = tre_set_one(mem, lit->position, 0,
2227 TRE_CHAR_MAX, 0, NULL, -1);
2228 if (!node->firstpos)
2230 node->lastpos = tre_set_one(mem, lit->position, 0,
2231 TRE_CHAR_MAX, 0, NULL,
2232 (int)lit->code_max);
2236 else if (lit->code_min < 0)
2238 /* Tags, empty strings, params, and zero width assertions:
2239 nullable = true, firstpos = {}, and lastpos = {}. */
2241 node->firstpos = tre_set_empty(mem);
2242 if (!node->firstpos)
2244 node->lastpos = tre_set_empty(mem);
2250 /* Literal at position i: nullable = false, firstpos = {i},
2254 tre_set_one(mem, lit->position, (int)lit->code_min,
2255 (int)lit->code_max, 0, NULL, -1);
2256 if (!node->firstpos)
2258 node->lastpos = tre_set_one(mem, lit->position,
2261 lit->class, lit->neg_classes,
2270 /* Compute the attributes for the two subtrees, and after that
2272 STACK_PUSHR(stack, voidptr, node);
2273 STACK_PUSHR(stack, int, NFL_POST_UNION);
2274 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2275 STACK_PUSHR(stack, int, NFL_RECURSE);
2276 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2277 STACK_PUSHR(stack, int, NFL_RECURSE);
2281 /* Compute the attributes for the two subtrees, and after that
2283 STACK_PUSHR(stack, voidptr, node);
2284 STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2285 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2286 STACK_PUSHR(stack, int, NFL_RECURSE);
2287 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2288 STACK_PUSHR(stack, int, NFL_RECURSE);
2292 /* Compute the attributes for the subtree, and after that for
2294 STACK_PUSHR(stack, voidptr, node);
2295 STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2296 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2297 STACK_PUSHR(stack, int, NFL_RECURSE);
2300 break; /* end case: NFL_RECURSE */
2302 case NFL_POST_UNION:
2304 tre_union_t *uni = (tre_union_t *)node->obj;
2305 node->nullable = uni->left->nullable || uni->right->nullable;
2306 node->firstpos = tre_set_union(mem, uni->left->firstpos,
2307 uni->right->firstpos, NULL, 0);
2308 if (!node->firstpos)
2310 node->lastpos = tre_set_union(mem, uni->left->lastpos,
2311 uni->right->lastpos, NULL, 0);
2317 case NFL_POST_ITERATION:
2319 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2321 if (iter->min == 0 || iter->arg->nullable)
2325 node->firstpos = iter->arg->firstpos;
2326 node->lastpos = iter->arg->lastpos;
2330 case NFL_POST_CATENATION:
2332 int num_tags, *tags, assertions;
2333 reg_errcode_t status;
2334 tre_catenation_t *cat = node->obj;
2335 node->nullable = cat->left->nullable && cat->right->nullable;
2337 /* Compute firstpos. */
2338 if (cat->left->nullable)
2340 /* The left side matches the empty string. Make a first pass
2341 with tre_match_empty() to get the number of tags and
2343 status = tre_match_empty(stack, cat->left,
2344 NULL, NULL, &num_tags);
2345 if (status != REG_OK)
2347 /* Allocate arrays for the tags and parameters. */
2348 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2353 /* Second pass with tre_mach_empty() to get the list of
2354 tags and parameters. */
2355 status = tre_match_empty(stack, cat->left, tags,
2357 if (status != REG_OK)
2363 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2366 if (!node->firstpos)
2371 node->firstpos = cat->left->firstpos;
2374 /* Compute lastpos. */
2375 if (cat->right->nullable)
2377 /* The right side matches the empty string. Make a first pass
2378 with tre_match_empty() to get the number of tags and
2380 status = tre_match_empty(stack, cat->right,
2381 NULL, NULL, &num_tags);
2382 if (status != REG_OK)
2384 /* Allocate arrays for the tags and parameters. */
2385 tags = xmalloc(sizeof(int) * (num_tags + 1));
2390 /* Second pass with tre_mach_empty() to get the list of
2391 tags and parameters. */
2392 status = tre_match_empty(stack, cat->right, tags,
2394 if (status != REG_OK)
2400 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2408 node->lastpos = cat->right->lastpos;
2423 /* Adds a transition from each position in `p1' to each position in `p2'. */
2424 static reg_errcode_t
2425 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2426 tre_tnfa_transition_t *transitions,
2427 int *counts, int *offs)
2429 tre_pos_and_tags_t *orig_p2 = p2;
2430 tre_tnfa_transition_t *trans;
2431 int i, j, k, l, dup, prev_p2_pos;
2433 if (transitions != NULL)
2434 while (p1->position >= 0)
2438 while (p2->position >= 0)
2440 /* Optimization: if this position was already handled, skip it. */
2441 if (p2->position == prev_p2_pos)
2446 prev_p2_pos = p2->position;
2447 /* Set `trans' to point to the next unused transition from
2448 position `p1->position'. */
2449 trans = transitions + offs[p1->position];
2450 while (trans->state != NULL)
2453 /* If we find a previous transition from `p1->position' to
2454 `p2->position', it is overwritten. This can happen only
2455 if there are nested loops in the regexp, like in "((a)*)*".
2456 In POSIX.2 repetition using the outer loop is always
2457 preferred over using the inner loop. Therefore the
2458 transition for the inner loop is useless and can be thrown
2460 /* XXX - The same position is used for all nodes in a bracket
2461 expression, so this optimization cannot be used (it will
2462 break bracket expressions) unless I figure out a way to
2464 if (trans->state_id == p2->position)
2472 if (trans->state == NULL)
2473 (trans + 1)->state = NULL;
2474 /* Use the character ranges, assertions, etc. from `p1' for
2475 the transition from `p1' to `p2'. */
2476 trans->code_min = p1->code_min;
2477 trans->code_max = p1->code_max;
2478 trans->state = transitions + offs[p2->position];
2479 trans->state_id = p2->position;
2480 trans->assertions = p1->assertions | p2->assertions
2481 | (p1->class ? ASSERT_CHAR_CLASS : 0)
2482 | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
2483 if (p1->backref >= 0)
2485 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
2486 assert(p2->backref < 0);
2487 trans->u.backref = p1->backref;
2488 trans->assertions |= ASSERT_BACKREF;
2491 trans->u.class = p1->class;
2492 if (p1->neg_classes != NULL)
2494 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
2495 trans->neg_classes =
2496 xmalloc(sizeof(*trans->neg_classes) * (i + 1));
2497 if (trans->neg_classes == NULL)
2499 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2500 trans->neg_classes[i] = p1->neg_classes[i];
2501 trans->neg_classes[i] = (tre_ctype_t)0;
2504 trans->neg_classes = NULL;
2506 /* Find out how many tags this transition has. */
2508 if (p1->tags != NULL)
2509 while(p1->tags[i] >= 0)
2512 if (p2->tags != NULL)
2513 while(p2->tags[j] >= 0)
2516 /* If we are overwriting a transition, free the old tag array. */
2517 if (trans->tags != NULL)
2521 /* If there were any tags, allocate an array and fill it. */
2524 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2528 if (p1->tags != NULL)
2529 while(p1->tags[i] >= 0)
2531 trans->tags[i] = p1->tags[i];
2536 if (p2->tags != NULL)
2537 while (p2->tags[j] >= 0)
2539 /* Don't add duplicates. */
2541 for (k = 0; k < i; k++)
2542 if (trans->tags[k] == p2->tags[j])
2548 trans->tags[l++] = p2->tags[j];
2551 trans->tags[l] = -1;
2559 /* Compute a maximum limit for the number of transitions leaving
2561 while (p1->position >= 0)
2564 while (p2->position >= 0)
2566 counts[p1->position]++;
2574 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2575 labelled with one character range (there are no transitions on empty
2576 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2578 static reg_errcode_t
2579 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2580 int *counts, int *offs)
2583 tre_catenation_t *cat;
2584 tre_iteration_t *iter;
2585 reg_errcode_t errcode = REG_OK;
2587 /* XXX - recurse using a stack!. */
2593 uni = (tre_union_t *)node->obj;
2594 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2595 if (errcode != REG_OK)
2597 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2601 cat = (tre_catenation_t *)node->obj;
2602 /* Add a transition from each position in cat->left->lastpos
2603 to each position in cat->right->firstpos. */
2604 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2605 transitions, counts, offs);
2606 if (errcode != REG_OK)
2608 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2609 if (errcode != REG_OK)
2611 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2615 iter = (tre_iteration_t *)node->obj;
2616 assert(iter->max == -1 || iter->max == 1);
2618 if (iter->max == -1)
2620 assert(iter->min == 0 || iter->min == 1);
2621 /* Add a transition from each last position in the iterated
2622 expression to each first position. */
2623 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2624 transitions, counts, offs);
2625 if (errcode != REG_OK)
2628 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2635 #define ERROR_EXIT(err) \
2639 if (/*CONSTCOND*/1) \
2642 while (/*CONSTCOND*/0)
2646 regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
2649 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2650 tre_pos_and_tags_t *p;
2651 int *counts = NULL, *offs = NULL;
2653 tre_tnfa_transition_t *transitions, *initial;
2654 tre_tnfa_t *tnfa = NULL;
2655 tre_submatch_data_t *submatch_data;
2656 tre_tag_direction_t *tag_directions = NULL;
2657 reg_errcode_t errcode;
2660 /* Parse context. */
2661 tre_parse_ctx_t parse_ctx;
2663 /* Allocate a stack used throughout the compilation process for various
2665 stack = tre_stack_new(512, 10240, 128);
2668 /* Allocate a fast memory allocator. */
2669 mem = tre_mem_new();
2672 tre_stack_destroy(stack);
2676 /* Parse the regexp. */
2677 memset(&parse_ctx, 0, sizeof(parse_ctx));
2678 parse_ctx.mem = mem;
2679 parse_ctx.stack = stack;
2680 parse_ctx.re = regex;
2681 parse_ctx.cflags = cflags;
2682 parse_ctx.max_backref = -1;
2683 errcode = tre_parse(&parse_ctx);
2684 if (errcode != REG_OK)
2685 ERROR_EXIT(errcode);
2686 preg->re_nsub = parse_ctx.submatch_id - 1;
2690 tre_ast_print(tree);
2691 #endif /* TRE_DEBUG */
2693 /* Referring to nonexistent subexpressions is illegal. */
2694 if (parse_ctx.max_backref > (int)preg->re_nsub)
2695 ERROR_EXIT(REG_ESUBREG);
2697 /* Allocate the TNFA struct. */
2698 tnfa = xcalloc(1, sizeof(tre_tnfa_t));
2700 ERROR_EXIT(REG_ESPACE);
2701 tnfa->have_backrefs = parse_ctx.max_backref >= 0;
2702 tnfa->have_approx = 0;
2703 tnfa->num_submatches = parse_ctx.submatch_id;
2705 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
2706 regexp does not have back references, this can be skipped. */
2707 if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
2710 /* Figure out how many tags we will need. */
2711 errcode = tre_add_tags(NULL, stack, tree, tnfa);
2712 if (errcode != REG_OK)
2713 ERROR_EXIT(errcode);
2715 if (tnfa->num_tags > 0)
2717 tag_directions = xmalloc(sizeof(*tag_directions)
2718 * (tnfa->num_tags + 1));
2719 if (tag_directions == NULL)
2720 ERROR_EXIT(REG_ESPACE);
2721 tnfa->tag_directions = tag_directions;
2722 memset(tag_directions, -1,
2723 sizeof(*tag_directions) * (tnfa->num_tags + 1));
2725 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
2726 sizeof(*tnfa->minimal_tags));
2727 if (tnfa->minimal_tags == NULL)
2728 ERROR_EXIT(REG_ESPACE);
2730 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
2731 sizeof(*submatch_data));
2732 if (submatch_data == NULL)
2733 ERROR_EXIT(REG_ESPACE);
2734 tnfa->submatch_data = submatch_data;
2736 errcode = tre_add_tags(mem, stack, tree, tnfa);
2737 if (errcode != REG_OK)
2738 ERROR_EXIT(errcode);
2742 /* Expand iteration nodes. */
2743 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
2745 if (errcode != REG_OK)
2746 ERROR_EXIT(errcode);
2748 /* Add a dummy node for the final state.
2749 XXX - For certain patterns this dummy node can be optimized away,
2750 for example "a*" or "ab*". Figure out a simple way to detect
2751 this possibility. */
2753 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
2754 if (tmp_ast_r == NULL)
2755 ERROR_EXIT(REG_ESPACE);
2757 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
2759 ERROR_EXIT(REG_ESPACE);
2761 errcode = tre_compute_nfl(mem, stack, tree);
2762 if (errcode != REG_OK)
2763 ERROR_EXIT(errcode);
2765 counts = xmalloc(sizeof(int) * parse_ctx.position);
2767 ERROR_EXIT(REG_ESPACE);
2769 offs = xmalloc(sizeof(int) * parse_ctx.position);
2771 ERROR_EXIT(REG_ESPACE);
2773 for (i = 0; i < parse_ctx.position; i++)
2775 tre_ast_to_tnfa(tree, NULL, counts, NULL);
2778 for (i = 0; i < parse_ctx.position; i++)
2781 add += counts[i] + 1;
2784 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
2785 if (transitions == NULL)
2786 ERROR_EXIT(REG_ESPACE);
2787 tnfa->transitions = transitions;
2788 tnfa->num_transitions = add;
2790 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2791 if (errcode != REG_OK)
2792 ERROR_EXIT(errcode);
2794 tnfa->firstpos_chars = NULL;
2798 while (p->position >= 0)
2804 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2805 if (initial == NULL)
2806 ERROR_EXIT(REG_ESPACE);
2807 tnfa->initial = initial;
2810 for (p = tree->firstpos; p->position >= 0; p++)
2812 initial[i].state = transitions + offs[p->position];
2813 initial[i].state_id = p->position;
2814 initial[i].tags = NULL;
2815 /* Copy the arrays p->tags, and p->params, they are allocated
2816 from a tre_mem object. */
2820 for (j = 0; p->tags[j] >= 0; j++);
2821 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
2822 if (!initial[i].tags)
2823 ERROR_EXIT(REG_ESPACE);
2824 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2826 initial[i].assertions = p->assertions;
2829 initial[i].state = NULL;
2831 tnfa->num_transitions = add;
2832 tnfa->final = transitions + offs[tree->lastpos[0].position];
2833 tnfa->num_states = parse_ctx.position;
2834 tnfa->cflags = cflags;
2836 tre_mem_destroy(mem);
2837 tre_stack_destroy(stack);
2841 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2845 /* Free everything that was allocated and return the error code. */
2846 tre_mem_destroy(mem);
2848 tre_stack_destroy(stack);
2853 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2862 regfree(regex_t *preg)
2866 tre_tnfa_transition_t *trans;
2868 tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2872 for (i = 0; i < tnfa->num_transitions; i++)
2873 if (tnfa->transitions[i].state)
2875 if (tnfa->transitions[i].tags)
2876 xfree(tnfa->transitions[i].tags);
2877 if (tnfa->transitions[i].neg_classes)
2878 xfree(tnfa->transitions[i].neg_classes);
2880 if (tnfa->transitions)
2881 xfree(tnfa->transitions);
2885 for (trans = tnfa->initial; trans->state; trans++)
2890 xfree(tnfa->initial);
2893 if (tnfa->submatch_data)
2895 for (i = 0; i < tnfa->num_submatches; i++)
2896 if (tnfa->submatch_data[i].parents)
2897 xfree(tnfa->submatch_data[i].parents);
2898 xfree(tnfa->submatch_data);
2901 if (tnfa->tag_directions)
2902 xfree(tnfa->tag_directions);
2903 if (tnfa->firstpos_chars)
2904 xfree(tnfa->firstpos_chars);
2905 if (tnfa->minimal_tags)
2906 xfree(tnfa->minimal_tags);