2 regcomp.c - TRE POSIX compatible regex compilation functions.
4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 /***********************************************************************
45 ***********************************************************************/
54 tre_ctype_t *neg_classes;
59 /***********************************************************************
60 from tre-ast.c and tre-ast.h
61 ***********************************************************************/
63 /* The different AST node types. */
71 /* Special subtypes of TRE_LITERAL. */
72 #define EMPTY -1 /* Empty leaf (denotes empty string). */
73 #define ASSERTION -2 /* Assertion leaf. */
74 #define TAG -3 /* Tag leaf. */
75 #define BACKREF -4 /* Back reference leaf. */
77 #define IS_SPECIAL(x) ((x)->code_min < 0)
78 #define IS_EMPTY(x) ((x)->code_min == EMPTY)
79 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
80 #define IS_TAG(x) ((x)->code_min == TAG)
81 #define IS_BACKREF(x) ((x)->code_min == BACKREF)
84 /* A generic AST node. All AST nodes consist of this node on the top
85 level with `obj' pointing to the actual content. */
87 tre_ast_type_t type; /* Type of the node. */
88 void *obj; /* Pointer to actual node. */
93 tre_pos_and_tags_t *firstpos;
94 tre_pos_and_tags_t *lastpos;
98 /* A "literal" node. These are created for assertions, back references,
99 tags, matching parameter settings, and all expressions that match one
106 tre_ctype_t *neg_classes;
109 /* A "catenation" node. These are created when two regexps are concatenated.
110 If there are more than one subexpressions in sequence, the `left' part
111 holds all but the last, and `right' part holds the last subexpression
112 (catenation is left associative). */
114 tre_ast_node_t *left;
115 tre_ast_node_t *right;
118 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
121 /* Subexpression to match. */
123 /* Minimum number of consecutive matches. */
125 /* Maximum number of consecutive matches. */
127 /* If 0, match as many characters as possible, if 1 match as few as
128 possible. Note that this does not always mean the same thing as
129 matching as many/few repetitions as possible. */
130 unsigned int minimal:1;
133 /* An "union" node. These are created for the "|" operator. */
135 tre_ast_node_t *left;
136 tre_ast_node_t *right;
140 static tre_ast_node_t *
141 tre_ast_new_node(tre_mem_t mem, int type, void *obj)
143 tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node);
149 node->submatch_id = -1;
153 static tre_ast_node_t *
154 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
156 tre_ast_node_t *node;
159 lit = tre_mem_calloc(mem, sizeof *lit);
160 node = tre_ast_new_node(mem, LITERAL, lit);
163 lit->code_min = code_min;
164 lit->code_max = code_max;
165 lit->position = position;
169 static tre_ast_node_t *
170 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal)
172 tre_ast_node_t *node;
173 tre_iteration_t *iter;
175 iter = tre_mem_calloc(mem, sizeof *iter);
176 node = tre_ast_new_node(mem, ITERATION, iter);
182 iter->minimal = minimal;
183 node->num_submatches = arg->num_submatches;
187 static tre_ast_node_t *
188 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
190 tre_ast_node_t *node;
195 un = tre_mem_calloc(mem, sizeof *un);
196 node = tre_ast_new_node(mem, UNION, un);
201 node->num_submatches = left->num_submatches + right->num_submatches;
205 static tre_ast_node_t *
206 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
208 tre_ast_node_t *node;
209 tre_catenation_t *cat;
213 cat = tre_mem_calloc(mem, sizeof *cat);
214 node = tre_ast_new_node(mem, CATENATION, cat);
219 node->num_submatches = left->num_submatches + right->num_submatches;
224 /***********************************************************************
225 from tre-stack.c and tre-stack.h
226 ***********************************************************************/
228 typedef struct tre_stack_rec tre_stack_t;
230 /* Creates a new stack object. `size' is initial size in bytes, `max_size'
231 is maximum size, and `increment' specifies how much more space will be
232 allocated with realloc() if all space gets used up. Returns the stack
233 object or NULL if out of memory. */
235 tre_stack_new(int size, int max_size, int increment);
237 /* Frees the stack object. */
239 tre_stack_destroy(tre_stack_t *s);
241 /* Returns the current number of objects in the stack. */
243 tre_stack_num_objects(tre_stack_t *s);
245 /* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
246 `value' on top of stack `s'. Returns REG_ESPACE if out of memory.
247 This tries to realloc() more space before failing if maximum size
248 has not yet been reached. Returns REG_OK if successful. */
249 #define declare_pushf(typetag, type) \
250 static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
252 declare_pushf(voidptr, void *);
253 declare_pushf(int, int);
255 /* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
256 element off of stack `s' and returns it. The stack must not be
258 #define declare_popf(typetag, type) \
259 static type tre_stack_pop_ ## typetag(tre_stack_t *s)
261 declare_popf(voidptr, void *);
262 declare_popf(int, int);
264 /* Just to save some typing. */
265 #define STACK_PUSH(s, typetag, value) \
268 status = tre_stack_push_ ## typetag(s, value); \
270 while (/*CONSTCOND*/0)
272 #define STACK_PUSHX(s, typetag, value) \
274 status = tre_stack_push_ ## typetag(s, value); \
275 if (status != REG_OK) \
279 #define STACK_PUSHR(s, typetag, value) \
281 reg_errcode_t _status; \
282 _status = tre_stack_push_ ## typetag(s, value); \
283 if (_status != REG_OK) \
287 union tre_stack_item {
292 struct tre_stack_rec {
297 union tre_stack_item *stack;
302 tre_stack_new(int size, int max_size, int increment)
306 s = xmalloc(sizeof(*s));
309 s->stack = xmalloc(sizeof(*s->stack) * size);
310 if (s->stack == NULL)
316 s->max_size = max_size;
317 s->increment = increment;
324 tre_stack_destroy(tre_stack_t *s)
331 tre_stack_num_objects(tre_stack_t *s)
337 tre_stack_push(tre_stack_t *s, union tre_stack_item value)
339 if (s->ptr < s->size)
341 s->stack[s->ptr] = value;
346 if (s->size >= s->max_size)
352 union tre_stack_item *new_buffer;
354 new_size = s->size + s->increment;
355 if (new_size > s->max_size)
356 new_size = s->max_size;
357 new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
358 if (new_buffer == NULL)
362 assert(new_size > s->size);
364 s->stack = new_buffer;
365 tre_stack_push(s, value);
371 #define define_pushf(typetag, type) \
372 declare_pushf(typetag, type) { \
373 union tre_stack_item item; \
374 item.typetag ## _value = value; \
375 return tre_stack_push(s, item); \
378 define_pushf(int, int)
379 define_pushf(voidptr, void *)
381 #define define_popf(typetag, type) \
382 declare_popf(typetag, type) { \
383 return s->stack[--s->ptr].typetag ## _value; \
386 define_popf(int, int)
387 define_popf(voidptr, void *)
390 /***********************************************************************
391 from tre-parse.c and tre-parse.h
392 ***********************************************************************/
396 /* Memory allocator. The AST is allocated using this. */
398 /* Stack used for keeping track of regexp syntax. */
400 /* The parsed node after a parse function returns. */
402 /* Position in the regexp pattern after a parse function returns. */
404 /* The first character of the regexp. */
406 /* Current submatch ID. */
408 /* Current position (number of literal). */
410 /* The highest back reference or -1 if none seen so far. */
412 /* Compilation flags. */
416 /* Some macros for expanding \w, \s, etc. */
417 static const struct {
419 const char *expansion;
421 {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
422 {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
423 {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
424 {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
428 /* Expands a macro delimited by `regex' and `regex_end' to `buf', which
429 must have at least `len' items. Sets buf[0] to zero if the there
430 is no match in `tre_macros'. */
431 static const char *tre_expand_macro(const char *s)
434 for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);
435 return tre_macros[i].expansion;
439 tre_compare_lit(const void *a, const void *b)
441 const tre_literal_t *const *la = a;
442 const tre_literal_t *const *lb = b;
443 /* assumes the range of valid code_min is < INT_MAX */
444 return la[0]->code_min - lb[0]->code_min;
454 static tre_literal_t *tre_new_lit(struct literals *p)
457 if (p->len >= p->cap) {
461 a = xrealloc(p->a, p->cap * sizeof *p->a);
467 *a = tre_mem_calloc(p->mem, sizeof **a);
471 static int add_icase_literals(struct literals *ls, int min, int max)
475 for (c=min; c<=max; ) {
476 /* assumes islower(c) and isupper(c) are exclusive
477 and toupper(c)!=c if islower(c).
478 multiple opposite case characters are not supported */
479 if (tre_islower(c)) {
480 b = e = tre_toupper(c);
481 for (c++, e++; c<=max; c++, e++)
482 if (tre_toupper(c) != e) break;
483 } else if (tre_isupper(c)) {
484 b = e = tre_tolower(c);
485 for (c++, e++; c<=max; c++, e++)
486 if (tre_tolower(c) != e) break;
491 lit = tre_new_lit(ls);
502 /* Maximum number of character classes in a negated bracket expression. */
503 #define MAX_NEG_CLASSES 64
508 tre_ctype_t a[MAX_NEG_CLASSES];
511 // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
515 Bracket = '[' List ']' | '[^' List ']'
516 List = Term | List Term
517 Term = Char | Range | Chclass | Eqclass
518 Range = Char '-' Char | Char '-' '-'
519 Char = Coll | coll_single
521 Coll = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]'
522 Eqclass = '[=' coll_single '=]' | '[=' coll_multi '=]'
523 Chclass = '[:' class ':]'
525 coll_single is a single char collating element but it can be
526 '-' only at the beginning or end of a List and
527 ']' only at the beginning of a List and
528 '^' anywhere except after the openning '['
531 static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, struct literals *ls, struct neg *neg)
533 const char *start = s;
541 len = mbtowc(&wc, s, -1);
543 return *s ? REG_BADPAT : REG_EBRACK;
544 if (*s == ']' && s != start) {
548 if (*s == '-' && s != start && s[1] != ']' &&
549 /* extension: [a-z--@] is accepted as [a-z]|[--@] */
550 (s[1] != '-' || s[2] == ']'))
552 if (*s == '[' && (s[1] == '.' || s[1] == '='))
553 /* collating symbols and equivalence classes are not supported */
555 if (*s == '[' && s[1] == ':') {
556 char tmp[CHARCLASS_NAME_MAX+1];
558 for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) {
562 class = tre_ctype(tmp);
566 if (!class || s[len+1] != ']')
574 if (*s == '-' && s[1] != ']') {
576 len = mbtowc(&wc, s, -1);
578 /* XXX - Should use collation order instead of
579 encoding values in character ranges. */
580 if (len <= 0 || min > max)
586 if (class && neg->negate) {
587 if (neg->len >= MAX_NEG_CLASSES)
589 neg->a[neg->len++] = class;
591 tre_literal_t *lit = tre_new_lit(ls);
599 /* Add opposite-case codepoints if REG_ICASE is present.
600 It seems that POSIX requires that bracket negation
601 should happen before case-folding, but most practical
602 implementations do it the other way around. Changing
603 the order would need efficient representation of
604 case-fold ranges and bracket range sets even with
605 simple patterns so this is ok for now. */
606 if (ctx->cflags & REG_ICASE && !class)
607 if (add_icase_literals(ls, min, max))
613 static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s)
615 int i, max, min, negmax, negmin;
616 tre_ast_node_t *node = 0, *n;
626 ls.a = xmalloc(ls.cap * sizeof *ls.a);
630 neg.negate = *s == '^';
634 err = parse_bracket_terms(ctx, s, &ls, &neg);
636 goto parse_bracket_done;
639 /* Sort the array if we need to negate it. */
640 qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
641 /* extra lit for the last negated range */
642 lit = tre_new_lit(&ls);
645 goto parse_bracket_done;
647 lit->code_min = TRE_CHAR_MAX+1;
648 lit->code_max = TRE_CHAR_MAX+1;
650 /* negated classes */
652 nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a);
655 goto parse_bracket_done;
657 memcpy(nc, neg.a, neg.len*sizeof *neg.a);
662 /* Build a union of the items in the array, negated if necessary. */
664 for (i = 0; i < ls.len; i++) {
671 negmin = MAX(max + 1, negmin);
675 lit->code_min = negmin;
676 lit->code_max = negmax;
679 lit->position = ctx->position;
680 lit->neg_classes = nc;
681 n = tre_ast_new_node(ctx->mem, LITERAL, lit);
682 node = tre_ast_new_union(ctx->mem, node, n);
696 static const char *parse_dup_count(const char *s, int *n)
703 *n = 10 * *n + (*s - '0');
705 if (!isdigit(*s) || *n > RE_DUP_MAX)
711 static reg_errcode_t parse_dup(tre_parse_ctx_t *ctx, const char *s)
715 s = parse_dup_count(s, &min);
717 s = parse_dup_count(s+1, &max);
722 (max < min && max >= 0) ||
726 (!(ctx->cflags & REG_EXTENDED) && *s++ != '\\') ||
731 if (min == 0 && max == 0)
732 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
734 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
741 static int hexval(unsigned c)
743 if (c-'0'<10) return c-'0';
745 if (c-'a'<6) return c-'a'+10;
749 static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int subid)
751 if (node->submatch_id >= 0) {
752 tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
755 n = tre_ast_new_catenation(ctx->mem, n, node);
758 n->num_submatches = node->num_submatches;
761 node->submatch_id = subid;
762 node->num_submatches++;
769 Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^' Branch '$'
770 Branch = Atom | Branch Atom
771 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch '\)' | back_ref
772 Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count '\}'
774 (leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
777 Regex = Branch | Regex '|' Branch
778 Branch = Atom | Branch Atom
779 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex ')' | '^' | '$'
780 Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count ',' Count '}'
782 (a*+?, ^*, $+, \X, {, (|a) are unspecified)
785 static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
787 int len, ere = ctx->cflags & REG_EXTENDED;
789 tre_ast_node_t *node;
793 return parse_bracket(ctx, s+1);
795 p = tre_expand_macro(s+1);
797 /* assume \X expansion is a single atom */
798 reg_errcode_t err = parse_atom(ctx, p);
802 /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
807 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
810 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
813 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
816 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
826 for (i=0; i<len && v<0x110000; i++) {
837 node = tre_ast_new_literal(ctx->mem, v, v, ctx->position);
842 if (!ere && (unsigned)*s-'1' < 9) {
845 node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position);
846 ctx->max_backref = MAX(val, ctx->max_backref);
848 /* extension: accept unknown escaped char
857 if (ctx->cflags & REG_NEWLINE) {
858 tre_ast_node_t *tmp1, *tmp2;
859 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++);
860 tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX, ctx->position++);
862 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
866 node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++);
871 /* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
872 if (!ere && s != ctx->re)
874 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
878 /* '$' is special everywhere in EREs, and in the end of the string in BREs. */
881 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
892 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
896 len = mbtowc(&wc, s, -1);
899 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) {
900 tre_ast_node_t *tmp1, *tmp2;
901 /* multiple opposite case characters are not supported */
902 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc), ctx->position);
903 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc), ctx->position);
905 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
909 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
922 #define PUSHPTR(err, s, v) do { \
923 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \
927 #define PUSHINT(err, s, v) do { \
928 if ((err = tre_stack_push_int(s, v)) != REG_OK) \
932 static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
934 tre_ast_node_t *nbranch=0, *nunion=0;
935 int ere = ctx->cflags & REG_EXTENDED;
936 const char *s = ctx->re;
940 tre_stack_t *stack = ctx->stack;
942 PUSHINT(err, stack, subid++);
944 if ((!ere && *s == '\\' && s[1] == '(') ||
945 (ere && *s == '(')) {
946 PUSHPTR(err, stack, nunion);
947 PUSHPTR(err, stack, nbranch);
948 PUSHINT(err, stack, subid++);
953 nbranch = nunion = 0;
956 if ((!ere && *s == '\\' && s[1] == ')') ||
957 (ere && *s == ')' && depth)) {
958 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
962 err = parse_atom(ctx, s);
969 /* extension: repetitions are accepted after an empty node
970 eg. (+), ^*, a$?, a|{2} */
984 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
987 /* extension: multiple consecutive *+?{,} is unspecified,
988 but (a+)+ has to be supported so accepting a++ makes
989 sense, note however that the RE_DUP_MAX limit can be
990 circumvented: (a{255}){255} uses a lot of memory.. */
993 if (ere || s[1] != '{')
1001 err = parse_dup(ctx, s+1);
1008 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
1009 if ((ere && *s == '|') ||
1010 (ere && *s == ')' && depth) ||
1011 (!ere && *s == '\\' && s[1] == ')') ||
1013 /* extension: empty branch is unspecified (), (|a), (a|)
1014 here they are not rejected but match on empty string */
1016 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
1020 if (!depth) return REG_EPAREN;
1022 } else if (c == ')')
1025 err = marksub(ctx, nunion, tre_stack_pop_int(stack));
1028 if (!c && depth<0) {
1029 ctx->submatch_id = subid;
1034 nbranch = tre_stack_pop_voidptr(stack);
1035 nunion = tre_stack_pop_voidptr(stack);
1044 /***********************************************************************
1046 ***********************************************************************/
1051 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1056 Algorithms to setup tags so that submatch addressing can be done.
1060 /* Inserts a catenation node to the root of the tree given in `node'.
1061 As the left child a new tag with number `tag_id' to `node' is added,
1062 and the right child is the old root. */
1063 static reg_errcode_t
1064 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1066 tre_catenation_t *c;
1068 c = tre_mem_alloc(mem, sizeof(*c));
1071 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
1072 if (c->left == NULL)
1074 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1075 if (c->right == NULL)
1078 c->right->obj = node->obj;
1079 c->right->type = node->type;
1080 c->right->nullable = -1;
1081 c->right->submatch_id = -1;
1082 c->right->firstpos = NULL;
1083 c->right->lastpos = NULL;
1084 c->right->num_tags = 0;
1086 node->type = CATENATION;
1090 /* Inserts a catenation node to the root of the tree given in `node'.
1091 As the right child a new tag with number `tag_id' to `node' is added,
1092 and the left child is the old root. */
1093 static reg_errcode_t
1094 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1096 tre_catenation_t *c;
1098 c = tre_mem_alloc(mem, sizeof(*c));
1101 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
1102 if (c->right == NULL)
1104 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1105 if (c->left == NULL)
1108 c->left->obj = node->obj;
1109 c->left->type = node->type;
1110 c->left->nullable = -1;
1111 c->left->submatch_id = -1;
1112 c->left->firstpos = NULL;
1113 c->left->lastpos = NULL;
1114 c->left->num_tags = 0;
1116 node->type = CATENATION;
1122 ADDTAGS_AFTER_ITERATION,
1123 ADDTAGS_AFTER_UNION_LEFT,
1124 ADDTAGS_AFTER_UNION_RIGHT,
1125 ADDTAGS_AFTER_CAT_LEFT,
1126 ADDTAGS_AFTER_CAT_RIGHT,
1127 ADDTAGS_SET_SUBMATCH_END
1128 } tre_addtags_symbol_t;
1137 /* Go through `regset' and set submatch data for submatches that are
1140 tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
1144 for (i = 0; regset[i] >= 0; i++)
1146 int id = regset[i] / 2;
1147 int start = !(regset[i] % 2);
1149 tnfa->submatch_data[id].so_tag = tag;
1151 tnfa->submatch_data[id].eo_tag = tag;
1157 /* Adds tags to appropriate locations in the parse tree in `tree', so that
1158 subexpressions marked for submatch addressing can be traced. */
1159 static reg_errcode_t
1160 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
1163 reg_errcode_t status = REG_OK;
1164 tre_addtags_symbol_t symbol;
1165 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
1166 int bottom = tre_stack_num_objects(stack);
1167 /* True for first pass (counting number of needed tags) */
1168 int first_pass = (mem == NULL || tnfa == NULL);
1169 int *regset, *orig_regset;
1170 int num_tags = 0; /* Total number of tags. */
1171 int num_minimals = 0; /* Number of special minimal tags. */
1172 int tag = 0; /* The tag that is to be added next. */
1173 int next_tag = 1; /* Next tag to use after this one. */
1174 int *parents; /* Stack of submatches the current submatch is
1176 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
1177 tre_tag_states_t *saved_states;
1179 tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
1183 tnfa->minimal_tags[0] = -1;
1186 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
1190 orig_regset = regset;
1192 parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
1193 if (parents == NULL)
1200 saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
1201 if (saved_states == NULL)
1210 for (i = 0; i <= tnfa->num_submatches; i++)
1211 saved_states[i].tag = -1;
1214 STACK_PUSH(stack, voidptr, node);
1215 STACK_PUSH(stack, int, ADDTAGS_RECURSE);
1217 while (tre_stack_num_objects(stack) > bottom)
1219 if (status != REG_OK)
1222 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
1226 case ADDTAGS_SET_SUBMATCH_END:
1228 int id = tre_stack_pop_int(stack);
1231 /* Add end of this submatch to regset. */
1232 for (i = 0; regset[i] >= 0; i++);
1233 regset[i] = id * 2 + 1;
1236 /* Pop this submatch from the parents stack. */
1237 for (i = 0; parents[i] >= 0; i++);
1238 parents[i - 1] = -1;
1242 case ADDTAGS_RECURSE:
1243 node = tre_stack_pop_voidptr(stack);
1245 if (node->submatch_id >= 0)
1247 int id = node->submatch_id;
1251 /* Add start of this submatch to regset. */
1252 for (i = 0; regset[i] >= 0; i++);
1258 for (i = 0; parents[i] >= 0; i++);
1259 tnfa->submatch_data[id].parents = NULL;
1262 int *p = xmalloc(sizeof(*p) * (i + 1));
1265 status = REG_ESPACE;
1268 assert(tnfa->submatch_data[id].parents == NULL);
1269 tnfa->submatch_data[id].parents = p;
1270 for (i = 0; parents[i] >= 0; i++)
1276 /* Add end of this submatch to regset after processing this
1278 STACK_PUSHX(stack, int, node->submatch_id);
1279 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
1286 tre_literal_t *lit = node->obj;
1288 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1293 /* Regset is not empty, so add a tag before the
1294 literal or backref. */
1297 status = tre_add_tag_left(mem, node, tag);
1298 tnfa->tag_directions[tag] = direction;
1299 if (minimal_tag >= 0)
1301 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1302 tnfa->minimal_tags[i] = tag;
1303 tnfa->minimal_tags[i + 1] = minimal_tag;
1304 tnfa->minimal_tags[i + 2] = -1;
1308 tre_purge_regset(regset, tnfa, tag);
1323 assert(!IS_TAG(lit));
1329 tre_catenation_t *cat = node->obj;
1330 tre_ast_node_t *left = cat->left;
1331 tre_ast_node_t *right = cat->right;
1332 int reserved_tag = -1;
1335 /* After processing right child. */
1336 STACK_PUSHX(stack, voidptr, node);
1337 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
1339 /* Process right child. */
1340 STACK_PUSHX(stack, voidptr, right);
1341 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1343 /* After processing left child. */
1344 STACK_PUSHX(stack, int, next_tag + left->num_tags);
1345 if (left->num_tags > 0 && right->num_tags > 0)
1347 /* Reserve the next tag to the right child. */
1348 reserved_tag = next_tag;
1351 STACK_PUSHX(stack, int, reserved_tag);
1352 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
1354 /* Process left child. */
1355 STACK_PUSHX(stack, voidptr, left);
1356 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1362 tre_iteration_t *iter = node->obj;
1366 STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
1370 STACK_PUSHX(stack, int, tag);
1371 STACK_PUSHX(stack, int, iter->minimal);
1373 STACK_PUSHX(stack, voidptr, node);
1374 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
1376 STACK_PUSHX(stack, voidptr, iter->arg);
1377 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1379 /* Regset is not empty, so add a tag here. */
1380 if (regset[0] >= 0 || iter->minimal)
1385 status = tre_add_tag_left(mem, node, tag);
1387 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
1389 tnfa->tag_directions[tag] = direction;
1390 if (minimal_tag >= 0)
1392 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1393 tnfa->minimal_tags[i] = tag;
1394 tnfa->minimal_tags[i + 1] = minimal_tag;
1395 tnfa->minimal_tags[i + 2] = -1;
1399 tre_purge_regset(regset, tnfa, tag);
1407 direction = TRE_TAG_MINIMIZE;
1412 tre_union_t *uni = node->obj;
1413 tre_ast_node_t *left = uni->left;
1414 tre_ast_node_t *right = uni->right;
1420 left_tag = next_tag;
1421 right_tag = next_tag + 1;
1426 right_tag = next_tag;
1429 /* After processing right child. */
1430 STACK_PUSHX(stack, int, right_tag);
1431 STACK_PUSHX(stack, int, left_tag);
1432 STACK_PUSHX(stack, voidptr, regset);
1433 STACK_PUSHX(stack, int, regset[0] >= 0);
1434 STACK_PUSHX(stack, voidptr, node);
1435 STACK_PUSHX(stack, voidptr, right);
1436 STACK_PUSHX(stack, voidptr, left);
1437 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
1439 /* Process right child. */
1440 STACK_PUSHX(stack, voidptr, right);
1441 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1443 /* After processing left child. */
1444 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
1446 /* Process left child. */
1447 STACK_PUSHX(stack, voidptr, left);
1448 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1450 /* Regset is not empty, so add a tag here. */
1456 status = tre_add_tag_left(mem, node, tag);
1457 tnfa->tag_directions[tag] = direction;
1458 if (minimal_tag >= 0)
1460 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1461 tnfa->minimal_tags[i] = tag;
1462 tnfa->minimal_tags[i + 1] = minimal_tag;
1463 tnfa->minimal_tags[i + 2] = -1;
1467 tre_purge_regset(regset, tnfa, tag);
1476 if (node->num_submatches > 0)
1478 /* The next two tags are reserved for markers. */
1488 if (node->submatch_id >= 0)
1491 /* Push this submatch on the parents stack. */
1492 for (i = 0; parents[i] >= 0; i++);
1493 parents[i] = node->submatch_id;
1494 parents[i + 1] = -1;
1497 break; /* end case: ADDTAGS_RECURSE */
1499 case ADDTAGS_AFTER_ITERATION:
1503 node = tre_stack_pop_voidptr(stack);
1506 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
1507 + tre_stack_pop_int(stack);
1512 minimal = tre_stack_pop_int(stack);
1513 enter_tag = tre_stack_pop_int(stack);
1515 minimal_tag = enter_tag;
1521 direction = TRE_TAG_MINIMIZE;
1523 direction = TRE_TAG_MAXIMIZE;
1528 case ADDTAGS_AFTER_CAT_LEFT:
1530 int new_tag = tre_stack_pop_int(stack);
1531 next_tag = tre_stack_pop_int(stack);
1539 case ADDTAGS_AFTER_CAT_RIGHT:
1540 node = tre_stack_pop_voidptr(stack);
1542 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
1543 + ((tre_catenation_t *)node->obj)->right->num_tags;
1546 case ADDTAGS_AFTER_UNION_LEFT:
1547 /* Lift the bottom of the `regset' array so that when processing
1548 the right operand the items currently in the array are
1549 invisible. The original bottom was saved at ADDTAGS_UNION and
1550 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1551 while (*regset >= 0)
1555 case ADDTAGS_AFTER_UNION_RIGHT:
1557 int added_tags, tag_left, tag_right;
1558 tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
1559 tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
1560 node = tre_stack_pop_voidptr(stack);
1561 added_tags = tre_stack_pop_int(stack);
1564 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
1565 + ((tre_union_t *)node->obj)->right->num_tags + added_tags
1566 + ((node->num_submatches > 0) ? 2 : 0);
1568 regset = tre_stack_pop_voidptr(stack);
1569 tag_left = tre_stack_pop_int(stack);
1570 tag_right = tre_stack_pop_int(stack);
1572 /* Add tags after both children, the left child gets a smaller
1573 tag than the right child. This guarantees that we prefer
1574 the left child over the right child. */
1575 /* XXX - This is not always necessary (if the children have
1576 tags which must be seen for every match of that child). */
1577 /* XXX - Check if this is the only place where tre_add_tag_right
1578 is used. If so, use tre_add_tag_left (putting the tag before
1579 the child as opposed after the child) and throw away
1580 tre_add_tag_right. */
1581 if (node->num_submatches > 0)
1585 status = tre_add_tag_right(mem, left, tag_left);
1586 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1587 status = tre_add_tag_right(mem, right, tag_right);
1588 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1592 direction = TRE_TAG_MAXIMIZE;
1600 } /* end switch(symbol) */
1601 } /* end while(tre_stack_num_objects(stack) > bottom) */
1604 tre_purge_regset(regset, tnfa, tag);
1606 if (!first_pass && minimal_tag >= 0)
1609 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1610 tnfa->minimal_tags[i] = tag;
1611 tnfa->minimal_tags[i + 1] = minimal_tag;
1612 tnfa->minimal_tags[i + 2] = -1;
1617 assert(tree->num_tags == num_tags);
1618 tnfa->end_tag = num_tags;
1619 tnfa->num_tags = num_tags;
1620 tnfa->num_minimals = num_minimals;
1623 xfree(saved_states);
1630 AST to TNFA compilation routines.
1636 } tre_copyast_symbol_t;
1638 /* Flags for tre_copy_ast(). */
1639 #define COPY_REMOVE_TAGS 1
1640 #define COPY_MAXIMIZE_FIRST_TAG 2
1642 static reg_errcode_t
1643 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1644 int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1645 tre_ast_node_t **copy, int *max_pos)
1647 reg_errcode_t status = REG_OK;
1648 int bottom = tre_stack_num_objects(stack);
1651 tre_ast_node_t **result = copy;
1652 tre_copyast_symbol_t symbol;
1654 STACK_PUSH(stack, voidptr, ast);
1655 STACK_PUSH(stack, int, COPY_RECURSE);
1657 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1659 tre_ast_node_t *node;
1660 if (status != REG_OK)
1663 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1666 case COPY_SET_RESULT_PTR:
1667 result = tre_stack_pop_voidptr(stack);
1670 node = tre_stack_pop_voidptr(stack);
1675 tre_literal_t *lit = node->obj;
1676 int pos = lit->position;
1677 int min = lit->code_min;
1678 int max = lit->code_max;
1679 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1681 /* XXX - e.g. [ab] has only one position but two
1682 nodes, so we are creating holes in the state space
1683 here. Not fatal, just wastes memory. */
1687 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1689 /* Change this tag to empty. */
1693 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1696 /* Maximize the first tag. */
1697 tag_directions[max] = TRE_TAG_MAXIMIZE;
1700 *result = tre_ast_new_literal(mem, min, max, pos);
1701 if (*result == NULL)
1702 status = REG_ESPACE;
1704 tre_literal_t *p = (*result)->obj;
1705 p->class = lit->class;
1706 p->neg_classes = lit->neg_classes;
1715 tre_union_t *uni = node->obj;
1717 *result = tre_ast_new_union(mem, uni->left, uni->right);
1718 if (*result == NULL)
1720 status = REG_ESPACE;
1723 tmp = (*result)->obj;
1724 result = &tmp->left;
1725 STACK_PUSHX(stack, voidptr, uni->right);
1726 STACK_PUSHX(stack, int, COPY_RECURSE);
1727 STACK_PUSHX(stack, voidptr, &tmp->right);
1728 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1729 STACK_PUSHX(stack, voidptr, uni->left);
1730 STACK_PUSHX(stack, int, COPY_RECURSE);
1735 tre_catenation_t *cat = node->obj;
1736 tre_catenation_t *tmp;
1737 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1738 if (*result == NULL)
1740 status = REG_ESPACE;
1743 tmp = (*result)->obj;
1746 result = &tmp->left;
1748 STACK_PUSHX(stack, voidptr, cat->right);
1749 STACK_PUSHX(stack, int, COPY_RECURSE);
1750 STACK_PUSHX(stack, voidptr, &tmp->right);
1751 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1752 STACK_PUSHX(stack, voidptr, cat->left);
1753 STACK_PUSHX(stack, int, COPY_RECURSE);
1758 tre_iteration_t *iter = node->obj;
1759 STACK_PUSHX(stack, voidptr, iter->arg);
1760 STACK_PUSHX(stack, int, COPY_RECURSE);
1761 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1762 iter->max, iter->minimal);
1763 if (*result == NULL)
1765 status = REG_ESPACE;
1768 iter = (*result)->obj;
1769 result = &iter->arg;
1779 *pos_add += num_copied;
1786 } tre_expand_ast_symbol_t;
1788 /* Expands each iteration node that has a finite nonzero minimum or maximum
1789 iteration count to a catenated sequence of copies of the node. */
1790 static reg_errcode_t
1791 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1792 int *position, tre_tag_direction_t *tag_directions)
1794 reg_errcode_t status = REG_OK;
1795 int bottom = tre_stack_num_objects(stack);
1797 int pos_add_total = 0;
1801 STACK_PUSHR(stack, voidptr, ast);
1802 STACK_PUSHR(stack, int, EXPAND_RECURSE);
1803 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1805 tre_ast_node_t *node;
1806 tre_expand_ast_symbol_t symbol;
1808 if (status != REG_OK)
1811 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1812 node = tre_stack_pop_voidptr(stack);
1815 case EXPAND_RECURSE:
1820 tre_literal_t *lit= node->obj;
1821 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1823 lit->position += pos_add;
1824 if (lit->position > max_pos)
1825 max_pos = lit->position;
1831 tre_union_t *uni = node->obj;
1832 STACK_PUSHX(stack, voidptr, uni->right);
1833 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1834 STACK_PUSHX(stack, voidptr, uni->left);
1835 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1840 tre_catenation_t *cat = node->obj;
1841 STACK_PUSHX(stack, voidptr, cat->right);
1842 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1843 STACK_PUSHX(stack, voidptr, cat->left);
1844 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1849 tre_iteration_t *iter = node->obj;
1850 STACK_PUSHX(stack, int, pos_add);
1851 STACK_PUSHX(stack, voidptr, node);
1852 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1853 STACK_PUSHX(stack, voidptr, iter->arg);
1854 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1855 /* If we are going to expand this node at EXPAND_AFTER_ITER
1856 then don't increase the `pos' fields of the nodes now, it
1857 will get done when expanding. */
1858 if (iter->min > 1 || iter->max > 1)
1868 case EXPAND_AFTER_ITER:
1870 tre_iteration_t *iter = node->obj;
1872 pos_add = tre_stack_pop_int(stack);
1873 pos_add_last = pos_add;
1874 if (iter->min > 1 || iter->max > 1)
1876 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1878 int pos_add_save = pos_add;
1880 /* Create a catenated sequence of copies of the node. */
1881 for (j = 0; j < iter->min; j++)
1883 tre_ast_node_t *copy;
1884 /* Remove tags from all but the last copy. */
1885 int flags = ((j + 1 < iter->min)
1887 : COPY_MAXIMIZE_FIRST_TAG);
1888 pos_add_save = pos_add;
1889 status = tre_copy_ast(mem, stack, iter->arg, flags,
1890 &pos_add, tag_directions, ©,
1892 if (status != REG_OK)
1895 seq1 = tre_ast_new_catenation(mem, seq1, copy);
1902 if (iter->max == -1)
1904 /* No upper limit. */
1905 pos_add_save = pos_add;
1906 status = tre_copy_ast(mem, stack, iter->arg, 0,
1907 &pos_add, NULL, &seq2, &max_pos);
1908 if (status != REG_OK)
1910 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1916 for (j = iter->min; j < iter->max; j++)
1918 tre_ast_node_t *tmp, *copy;
1919 pos_add_save = pos_add;
1920 status = tre_copy_ast(mem, stack, iter->arg, 0,
1921 &pos_add, NULL, ©, &max_pos);
1922 if (status != REG_OK)
1925 seq2 = tre_ast_new_catenation(mem, copy, seq2);
1930 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1933 seq2 = tre_ast_new_union(mem, tmp, seq2);
1939 pos_add = pos_add_save;
1942 else if (seq2 != NULL)
1943 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1946 node->obj = seq1->obj;
1947 node->type = seq1->type;
1951 pos_add_total += pos_add - pos_add_last;
1952 if (iter_depth == 0)
1953 pos_add = pos_add_total;
1963 *position += pos_add_total;
1965 /* `max_pos' should never be larger than `*position' if the above
1966 code works, but just an extra safeguard let's make sure
1967 `*position' is set large enough so enough memory will be
1968 allocated for the transition table. */
1969 if (max_pos > *position)
1970 *position = max_pos;
1975 static tre_pos_and_tags_t *
1976 tre_set_empty(tre_mem_t mem)
1978 tre_pos_and_tags_t *new_set;
1980 new_set = tre_mem_calloc(mem, sizeof(*new_set));
1981 if (new_set == NULL)
1984 new_set[0].position = -1;
1985 new_set[0].code_min = -1;
1986 new_set[0].code_max = -1;
1991 static tre_pos_and_tags_t *
1992 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
1993 tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
1995 tre_pos_and_tags_t *new_set;
1997 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
1998 if (new_set == NULL)
2001 new_set[0].position = position;
2002 new_set[0].code_min = code_min;
2003 new_set[0].code_max = code_max;
2004 new_set[0].class = class;
2005 new_set[0].neg_classes = neg_classes;
2006 new_set[0].backref = backref;
2007 new_set[1].position = -1;
2008 new_set[1].code_min = -1;
2009 new_set[1].code_max = -1;
2014 static tre_pos_and_tags_t *
2015 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2016 int *tags, int assertions)
2019 tre_pos_and_tags_t *new_set;
2023 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2024 for (s1 = 0; set1[s1].position >= 0; s1++);
2025 for (s2 = 0; set2[s2].position >= 0; s2++);
2026 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2030 for (s1 = 0; set1[s1].position >= 0; s1++)
2032 new_set[s1].position = set1[s1].position;
2033 new_set[s1].code_min = set1[s1].code_min;
2034 new_set[s1].code_max = set1[s1].code_max;
2035 new_set[s1].assertions = set1[s1].assertions | assertions;
2036 new_set[s1].class = set1[s1].class;
2037 new_set[s1].neg_classes = set1[s1].neg_classes;
2038 new_set[s1].backref = set1[s1].backref;
2039 if (set1[s1].tags == NULL && tags == NULL)
2040 new_set[s1].tags = NULL;
2043 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2044 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2045 * (i + num_tags + 1)));
2046 if (new_tags == NULL)
2048 for (j = 0; j < i; j++)
2049 new_tags[j] = set1[s1].tags[j];
2050 for (i = 0; i < num_tags; i++)
2051 new_tags[j + i] = tags[i];
2052 new_tags[j + i] = -1;
2053 new_set[s1].tags = new_tags;
2057 for (s2 = 0; set2[s2].position >= 0; s2++)
2059 new_set[s1 + s2].position = set2[s2].position;
2060 new_set[s1 + s2].code_min = set2[s2].code_min;
2061 new_set[s1 + s2].code_max = set2[s2].code_max;
2062 /* XXX - why not | assertions here as well? */
2063 new_set[s1 + s2].assertions = set2[s2].assertions;
2064 new_set[s1 + s2].class = set2[s2].class;
2065 new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
2066 new_set[s1 + s2].backref = set2[s2].backref;
2067 if (set2[s2].tags == NULL)
2068 new_set[s1 + s2].tags = NULL;
2071 for (i = 0; set2[s2].tags[i] >= 0; i++);
2072 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2073 if (new_tags == NULL)
2075 for (j = 0; j < i; j++)
2076 new_tags[j] = set2[s2].tags[j];
2078 new_set[s1 + s2].tags = new_tags;
2081 new_set[s1 + s2].position = -1;
2085 /* Finds the empty path through `node' which is the one that should be
2086 taken according to POSIX.2 rules, and adds the tags on that path to
2087 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2088 set to the number of tags seen on the path. */
2089 static reg_errcode_t
2090 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2091 int *assertions, int *num_tags_seen)
2095 tre_catenation_t *cat;
2096 tre_iteration_t *iter;
2098 int bottom = tre_stack_num_objects(stack);
2099 reg_errcode_t status = REG_OK;
2103 status = tre_stack_push_voidptr(stack, node);
2105 /* Walk through the tree recursively. */
2106 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2108 node = tre_stack_pop_voidptr(stack);
2113 lit = (tre_literal_t *)node->obj;
2114 switch (lit->code_min)
2117 if (lit->code_max >= 0)
2121 /* Add the tag to `tags'. */
2122 for (i = 0; tags[i] >= 0; i++)
2123 if (tags[i] == lit->code_max)
2127 tags[i] = lit->code_max;
2136 assert(lit->code_max >= 1
2137 || lit->code_max <= ASSERT_LAST);
2138 if (assertions != NULL)
2139 *assertions |= lit->code_max;
2150 /* Subexpressions starting earlier take priority over ones
2151 starting later, so we prefer the left subexpression over the
2152 right subexpression. */
2153 uni = (tre_union_t *)node->obj;
2154 if (uni->left->nullable)
2155 STACK_PUSHX(stack, voidptr, uni->left)
2156 else if (uni->right->nullable)
2157 STACK_PUSHX(stack, voidptr, uni->right)
2163 /* The path must go through both children. */
2164 cat = (tre_catenation_t *)node->obj;
2165 assert(cat->left->nullable);
2166 assert(cat->right->nullable);
2167 STACK_PUSHX(stack, voidptr, cat->left);
2168 STACK_PUSHX(stack, voidptr, cat->right);
2172 /* A match with an empty string is preferred over no match at
2173 all, so we go through the argument if possible. */
2174 iter = (tre_iteration_t *)node->obj;
2175 if (iter->arg->nullable)
2176 STACK_PUSHX(stack, voidptr, iter->arg);
2192 NFL_POST_CATENATION,
2194 } tre_nfl_stack_symbol_t;
2197 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2198 the nodes of the AST `tree'. */
2199 static reg_errcode_t
2200 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2202 int bottom = tre_stack_num_objects(stack);
2204 STACK_PUSHR(stack, voidptr, tree);
2205 STACK_PUSHR(stack, int, NFL_RECURSE);
2207 while (tre_stack_num_objects(stack) > bottom)
2209 tre_nfl_stack_symbol_t symbol;
2210 tre_ast_node_t *node;
2212 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2213 node = tre_stack_pop_voidptr(stack);
2221 tre_literal_t *lit = (tre_literal_t *)node->obj;
2222 if (IS_BACKREF(lit))
2224 /* Back references: nullable = false, firstpos = {i},
2227 node->firstpos = tre_set_one(mem, lit->position, 0,
2228 TRE_CHAR_MAX, 0, NULL, -1);
2229 if (!node->firstpos)
2231 node->lastpos = tre_set_one(mem, lit->position, 0,
2232 TRE_CHAR_MAX, 0, NULL,
2233 (int)lit->code_max);
2237 else if (lit->code_min < 0)
2239 /* Tags, empty strings, params, and zero width assertions:
2240 nullable = true, firstpos = {}, and lastpos = {}. */
2242 node->firstpos = tre_set_empty(mem);
2243 if (!node->firstpos)
2245 node->lastpos = tre_set_empty(mem);
2251 /* Literal at position i: nullable = false, firstpos = {i},
2255 tre_set_one(mem, lit->position, (int)lit->code_min,
2256 (int)lit->code_max, 0, NULL, -1);
2257 if (!node->firstpos)
2259 node->lastpos = tre_set_one(mem, lit->position,
2262 lit->class, lit->neg_classes,
2271 /* Compute the attributes for the two subtrees, and after that
2273 STACK_PUSHR(stack, voidptr, node);
2274 STACK_PUSHR(stack, int, NFL_POST_UNION);
2275 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2276 STACK_PUSHR(stack, int, NFL_RECURSE);
2277 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2278 STACK_PUSHR(stack, int, NFL_RECURSE);
2282 /* Compute the attributes for the two subtrees, and after that
2284 STACK_PUSHR(stack, voidptr, node);
2285 STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2286 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2287 STACK_PUSHR(stack, int, NFL_RECURSE);
2288 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2289 STACK_PUSHR(stack, int, NFL_RECURSE);
2293 /* Compute the attributes for the subtree, and after that for
2295 STACK_PUSHR(stack, voidptr, node);
2296 STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2297 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2298 STACK_PUSHR(stack, int, NFL_RECURSE);
2301 break; /* end case: NFL_RECURSE */
2303 case NFL_POST_UNION:
2305 tre_union_t *uni = (tre_union_t *)node->obj;
2306 node->nullable = uni->left->nullable || uni->right->nullable;
2307 node->firstpos = tre_set_union(mem, uni->left->firstpos,
2308 uni->right->firstpos, NULL, 0);
2309 if (!node->firstpos)
2311 node->lastpos = tre_set_union(mem, uni->left->lastpos,
2312 uni->right->lastpos, NULL, 0);
2318 case NFL_POST_ITERATION:
2320 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2322 if (iter->min == 0 || iter->arg->nullable)
2326 node->firstpos = iter->arg->firstpos;
2327 node->lastpos = iter->arg->lastpos;
2331 case NFL_POST_CATENATION:
2333 int num_tags, *tags, assertions;
2334 reg_errcode_t status;
2335 tre_catenation_t *cat = node->obj;
2336 node->nullable = cat->left->nullable && cat->right->nullable;
2338 /* Compute firstpos. */
2339 if (cat->left->nullable)
2341 /* The left side matches the empty string. Make a first pass
2342 with tre_match_empty() to get the number of tags and
2344 status = tre_match_empty(stack, cat->left,
2345 NULL, NULL, &num_tags);
2346 if (status != REG_OK)
2348 /* Allocate arrays for the tags and parameters. */
2349 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2354 /* Second pass with tre_mach_empty() to get the list of
2355 tags and parameters. */
2356 status = tre_match_empty(stack, cat->left, tags,
2358 if (status != REG_OK)
2364 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2367 if (!node->firstpos)
2372 node->firstpos = cat->left->firstpos;
2375 /* Compute lastpos. */
2376 if (cat->right->nullable)
2378 /* The right side matches the empty string. Make a first pass
2379 with tre_match_empty() to get the number of tags and
2381 status = tre_match_empty(stack, cat->right,
2382 NULL, NULL, &num_tags);
2383 if (status != REG_OK)
2385 /* Allocate arrays for the tags and parameters. */
2386 tags = xmalloc(sizeof(int) * (num_tags + 1));
2391 /* Second pass with tre_mach_empty() to get the list of
2392 tags and parameters. */
2393 status = tre_match_empty(stack, cat->right, tags,
2395 if (status != REG_OK)
2401 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2409 node->lastpos = cat->right->lastpos;
2424 /* Adds a transition from each position in `p1' to each position in `p2'. */
2425 static reg_errcode_t
2426 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2427 tre_tnfa_transition_t *transitions,
2428 int *counts, int *offs)
2430 tre_pos_and_tags_t *orig_p2 = p2;
2431 tre_tnfa_transition_t *trans;
2432 int i, j, k, l, dup, prev_p2_pos;
2434 if (transitions != NULL)
2435 while (p1->position >= 0)
2439 while (p2->position >= 0)
2441 /* Optimization: if this position was already handled, skip it. */
2442 if (p2->position == prev_p2_pos)
2447 prev_p2_pos = p2->position;
2448 /* Set `trans' to point to the next unused transition from
2449 position `p1->position'. */
2450 trans = transitions + offs[p1->position];
2451 while (trans->state != NULL)
2454 /* If we find a previous transition from `p1->position' to
2455 `p2->position', it is overwritten. This can happen only
2456 if there are nested loops in the regexp, like in "((a)*)*".
2457 In POSIX.2 repetition using the outer loop is always
2458 preferred over using the inner loop. Therefore the
2459 transition for the inner loop is useless and can be thrown
2461 /* XXX - The same position is used for all nodes in a bracket
2462 expression, so this optimization cannot be used (it will
2463 break bracket expressions) unless I figure out a way to
2465 if (trans->state_id == p2->position)
2473 if (trans->state == NULL)
2474 (trans + 1)->state = NULL;
2475 /* Use the character ranges, assertions, etc. from `p1' for
2476 the transition from `p1' to `p2'. */
2477 trans->code_min = p1->code_min;
2478 trans->code_max = p1->code_max;
2479 trans->state = transitions + offs[p2->position];
2480 trans->state_id = p2->position;
2481 trans->assertions = p1->assertions | p2->assertions
2482 | (p1->class ? ASSERT_CHAR_CLASS : 0)
2483 | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
2484 if (p1->backref >= 0)
2486 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
2487 assert(p2->backref < 0);
2488 trans->u.backref = p1->backref;
2489 trans->assertions |= ASSERT_BACKREF;
2492 trans->u.class = p1->class;
2493 if (p1->neg_classes != NULL)
2495 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
2496 trans->neg_classes =
2497 xmalloc(sizeof(*trans->neg_classes) * (i + 1));
2498 if (trans->neg_classes == NULL)
2500 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2501 trans->neg_classes[i] = p1->neg_classes[i];
2502 trans->neg_classes[i] = (tre_ctype_t)0;
2505 trans->neg_classes = NULL;
2507 /* Find out how many tags this transition has. */
2509 if (p1->tags != NULL)
2510 while(p1->tags[i] >= 0)
2513 if (p2->tags != NULL)
2514 while(p2->tags[j] >= 0)
2517 /* If we are overwriting a transition, free the old tag array. */
2518 if (trans->tags != NULL)
2522 /* If there were any tags, allocate an array and fill it. */
2525 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2529 if (p1->tags != NULL)
2530 while(p1->tags[i] >= 0)
2532 trans->tags[i] = p1->tags[i];
2537 if (p2->tags != NULL)
2538 while (p2->tags[j] >= 0)
2540 /* Don't add duplicates. */
2542 for (k = 0; k < i; k++)
2543 if (trans->tags[k] == p2->tags[j])
2549 trans->tags[l++] = p2->tags[j];
2552 trans->tags[l] = -1;
2560 /* Compute a maximum limit for the number of transitions leaving
2562 while (p1->position >= 0)
2565 while (p2->position >= 0)
2567 counts[p1->position]++;
2575 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2576 labelled with one character range (there are no transitions on empty
2577 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2579 static reg_errcode_t
2580 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2581 int *counts, int *offs)
2584 tre_catenation_t *cat;
2585 tre_iteration_t *iter;
2586 reg_errcode_t errcode = REG_OK;
2588 /* XXX - recurse using a stack!. */
2594 uni = (tre_union_t *)node->obj;
2595 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2596 if (errcode != REG_OK)
2598 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2602 cat = (tre_catenation_t *)node->obj;
2603 /* Add a transition from each position in cat->left->lastpos
2604 to each position in cat->right->firstpos. */
2605 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2606 transitions, counts, offs);
2607 if (errcode != REG_OK)
2609 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2610 if (errcode != REG_OK)
2612 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2616 iter = (tre_iteration_t *)node->obj;
2617 assert(iter->max == -1 || iter->max == 1);
2619 if (iter->max == -1)
2621 assert(iter->min == 0 || iter->min == 1);
2622 /* Add a transition from each last position in the iterated
2623 expression to each first position. */
2624 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2625 transitions, counts, offs);
2626 if (errcode != REG_OK)
2629 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2636 #define ERROR_EXIT(err) \
2640 if (/*CONSTCOND*/1) \
2643 while (/*CONSTCOND*/0)
2647 regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
2650 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2651 tre_pos_and_tags_t *p;
2652 int *counts = NULL, *offs = NULL;
2654 tre_tnfa_transition_t *transitions, *initial;
2655 tre_tnfa_t *tnfa = NULL;
2656 tre_submatch_data_t *submatch_data;
2657 tre_tag_direction_t *tag_directions = NULL;
2658 reg_errcode_t errcode;
2661 /* Parse context. */
2662 tre_parse_ctx_t parse_ctx;
2664 /* Allocate a stack used throughout the compilation process for various
2666 stack = tre_stack_new(512, 10240, 128);
2669 /* Allocate a fast memory allocator. */
2670 mem = tre_mem_new();
2673 tre_stack_destroy(stack);
2677 /* Parse the regexp. */
2678 memset(&parse_ctx, 0, sizeof(parse_ctx));
2679 parse_ctx.mem = mem;
2680 parse_ctx.stack = stack;
2681 parse_ctx.re = regex;
2682 parse_ctx.cflags = cflags;
2683 parse_ctx.max_backref = -1;
2684 errcode = tre_parse(&parse_ctx);
2685 if (errcode != REG_OK)
2686 ERROR_EXIT(errcode);
2687 preg->re_nsub = parse_ctx.submatch_id - 1;
2691 tre_ast_print(tree);
2692 #endif /* TRE_DEBUG */
2694 /* Referring to nonexistent subexpressions is illegal. */
2695 if (parse_ctx.max_backref > (int)preg->re_nsub)
2696 ERROR_EXIT(REG_ESUBREG);
2698 /* Allocate the TNFA struct. */
2699 tnfa = xcalloc(1, sizeof(tre_tnfa_t));
2701 ERROR_EXIT(REG_ESPACE);
2702 tnfa->have_backrefs = parse_ctx.max_backref >= 0;
2703 tnfa->have_approx = 0;
2704 tnfa->num_submatches = parse_ctx.submatch_id;
2706 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
2707 regexp does not have back references, this can be skipped. */
2708 if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
2711 /* Figure out how many tags we will need. */
2712 errcode = tre_add_tags(NULL, stack, tree, tnfa);
2713 if (errcode != REG_OK)
2714 ERROR_EXIT(errcode);
2716 if (tnfa->num_tags > 0)
2718 tag_directions = xmalloc(sizeof(*tag_directions)
2719 * (tnfa->num_tags + 1));
2720 if (tag_directions == NULL)
2721 ERROR_EXIT(REG_ESPACE);
2722 tnfa->tag_directions = tag_directions;
2723 memset(tag_directions, -1,
2724 sizeof(*tag_directions) * (tnfa->num_tags + 1));
2726 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
2727 sizeof(*tnfa->minimal_tags));
2728 if (tnfa->minimal_tags == NULL)
2729 ERROR_EXIT(REG_ESPACE);
2731 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
2732 sizeof(*submatch_data));
2733 if (submatch_data == NULL)
2734 ERROR_EXIT(REG_ESPACE);
2735 tnfa->submatch_data = submatch_data;
2737 errcode = tre_add_tags(mem, stack, tree, tnfa);
2738 if (errcode != REG_OK)
2739 ERROR_EXIT(errcode);
2743 /* Expand iteration nodes. */
2744 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
2746 if (errcode != REG_OK)
2747 ERROR_EXIT(errcode);
2749 /* Add a dummy node for the final state.
2750 XXX - For certain patterns this dummy node can be optimized away,
2751 for example "a*" or "ab*". Figure out a simple way to detect
2752 this possibility. */
2754 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
2755 if (tmp_ast_r == NULL)
2756 ERROR_EXIT(REG_ESPACE);
2758 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
2760 ERROR_EXIT(REG_ESPACE);
2762 errcode = tre_compute_nfl(mem, stack, tree);
2763 if (errcode != REG_OK)
2764 ERROR_EXIT(errcode);
2766 counts = xmalloc(sizeof(int) * parse_ctx.position);
2768 ERROR_EXIT(REG_ESPACE);
2770 offs = xmalloc(sizeof(int) * parse_ctx.position);
2772 ERROR_EXIT(REG_ESPACE);
2774 for (i = 0; i < parse_ctx.position; i++)
2776 tre_ast_to_tnfa(tree, NULL, counts, NULL);
2779 for (i = 0; i < parse_ctx.position; i++)
2782 add += counts[i] + 1;
2785 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
2786 if (transitions == NULL)
2787 ERROR_EXIT(REG_ESPACE);
2788 tnfa->transitions = transitions;
2789 tnfa->num_transitions = add;
2791 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2792 if (errcode != REG_OK)
2793 ERROR_EXIT(errcode);
2795 tnfa->firstpos_chars = NULL;
2799 while (p->position >= 0)
2805 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2806 if (initial == NULL)
2807 ERROR_EXIT(REG_ESPACE);
2808 tnfa->initial = initial;
2811 for (p = tree->firstpos; p->position >= 0; p++)
2813 initial[i].state = transitions + offs[p->position];
2814 initial[i].state_id = p->position;
2815 initial[i].tags = NULL;
2816 /* Copy the arrays p->tags, and p->params, they are allocated
2817 from a tre_mem object. */
2821 for (j = 0; p->tags[j] >= 0; j++);
2822 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
2823 if (!initial[i].tags)
2824 ERROR_EXIT(REG_ESPACE);
2825 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2827 initial[i].assertions = p->assertions;
2830 initial[i].state = NULL;
2832 tnfa->num_transitions = add;
2833 tnfa->final = transitions + offs[tree->lastpos[0].position];
2834 tnfa->num_states = parse_ctx.position;
2835 tnfa->cflags = cflags;
2837 tre_mem_destroy(mem);
2838 tre_stack_destroy(stack);
2842 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2846 /* Free everything that was allocated and return the error code. */
2847 tre_mem_destroy(mem);
2849 tre_stack_destroy(stack);
2854 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2863 regfree(regex_t *preg)
2867 tre_tnfa_transition_t *trans;
2869 tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2873 for (i = 0; i < tnfa->num_transitions; i++)
2874 if (tnfa->transitions[i].state)
2876 if (tnfa->transitions[i].tags)
2877 xfree(tnfa->transitions[i].tags);
2878 if (tnfa->transitions[i].neg_classes)
2879 xfree(tnfa->transitions[i].neg_classes);
2881 if (tnfa->transitions)
2882 xfree(tnfa->transitions);
2886 for (trans = tnfa->initial; trans->state; trans++)
2891 xfree(tnfa->initial);
2894 if (tnfa->submatch_data)
2896 for (i = 0; i < tnfa->num_submatches; i++)
2897 if (tnfa->submatch_data[i].parents)
2898 xfree(tnfa->submatch_data[i].parents);
2899 xfree(tnfa->submatch_data);
2902 if (tnfa->tag_directions)
2903 xfree(tnfa->tag_directions);
2904 if (tnfa->firstpos_chars)
2905 xfree(tnfa->firstpos_chars);
2906 if (tnfa->minimal_tags)
2907 xfree(tnfa->minimal_tags);