2 regcomp.c - TRE POSIX compatible regex compilation functions.
4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 /***********************************************************************
45 ***********************************************************************/
54 tre_ctype_t *neg_classes;
59 /***********************************************************************
60 from tre-ast.c and tre-ast.h
61 ***********************************************************************/
63 /* The different AST node types. */
71 /* Special subtypes of TRE_LITERAL. */
72 #define EMPTY -1 /* Empty leaf (denotes empty string). */
73 #define ASSERTION -2 /* Assertion leaf. */
74 #define TAG -3 /* Tag leaf. */
75 #define BACKREF -4 /* Back reference leaf. */
77 #define IS_SPECIAL(x) ((x)->code_min < 0)
78 #define IS_EMPTY(x) ((x)->code_min == EMPTY)
79 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
80 #define IS_TAG(x) ((x)->code_min == TAG)
81 #define IS_BACKREF(x) ((x)->code_min == BACKREF)
84 /* A generic AST node. All AST nodes consist of this node on the top
85 level with `obj' pointing to the actual content. */
87 tre_ast_type_t type; /* Type of the node. */
88 void *obj; /* Pointer to actual node. */
93 tre_pos_and_tags_t *firstpos;
94 tre_pos_and_tags_t *lastpos;
98 /* A "literal" node. These are created for assertions, back references,
99 tags, matching parameter settings, and all expressions that match one
106 tre_ctype_t *neg_classes;
109 /* A "catenation" node. These are created when two regexps are concatenated.
110 If there are more than one subexpressions in sequence, the `left' part
111 holds all but the last, and `right' part holds the last subexpression
112 (catenation is left associative). */
114 tre_ast_node_t *left;
115 tre_ast_node_t *right;
118 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
121 /* Subexpression to match. */
123 /* Minimum number of consecutive matches. */
125 /* Maximum number of consecutive matches. */
127 /* If 0, match as many characters as possible, if 1 match as few as
128 possible. Note that this does not always mean the same thing as
129 matching as many/few repetitions as possible. */
130 unsigned int minimal:1;
133 /* An "union" node. These are created for the "|" operator. */
135 tre_ast_node_t *left;
136 tre_ast_node_t *right;
140 static tre_ast_node_t *
141 tre_ast_new_node(tre_mem_t mem, int type, void *obj)
143 tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node);
149 node->submatch_id = -1;
153 static tre_ast_node_t *
154 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
156 tre_ast_node_t *node;
159 lit = tre_mem_calloc(mem, sizeof *lit);
160 node = tre_ast_new_node(mem, LITERAL, lit);
163 lit->code_min = code_min;
164 lit->code_max = code_max;
165 lit->position = position;
169 static tre_ast_node_t *
170 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal)
172 tre_ast_node_t *node;
173 tre_iteration_t *iter;
175 iter = tre_mem_calloc(mem, sizeof *iter);
176 node = tre_ast_new_node(mem, ITERATION, iter);
182 iter->minimal = minimal;
183 node->num_submatches = arg->num_submatches;
187 static tre_ast_node_t *
188 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
190 tre_ast_node_t *node;
195 un = tre_mem_calloc(mem, sizeof *un);
196 node = tre_ast_new_node(mem, UNION, un);
201 node->num_submatches = left->num_submatches + right->num_submatches;
205 static tre_ast_node_t *
206 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
208 tre_ast_node_t *node;
209 tre_catenation_t *cat;
213 cat = tre_mem_calloc(mem, sizeof *cat);
214 node = tre_ast_new_node(mem, CATENATION, cat);
219 node->num_submatches = left->num_submatches + right->num_submatches;
224 /***********************************************************************
225 from tre-stack.c and tre-stack.h
226 ***********************************************************************/
228 typedef struct tre_stack_rec tre_stack_t;
230 /* Creates a new stack object. `size' is initial size in bytes, `max_size'
231 is maximum size, and `increment' specifies how much more space will be
232 allocated with realloc() if all space gets used up. Returns the stack
233 object or NULL if out of memory. */
235 tre_stack_new(int size, int max_size, int increment);
237 /* Frees the stack object. */
239 tre_stack_destroy(tre_stack_t *s);
241 /* Returns the current number of objects in the stack. */
243 tre_stack_num_objects(tre_stack_t *s);
245 /* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
246 `value' on top of stack `s'. Returns REG_ESPACE if out of memory.
247 This tries to realloc() more space before failing if maximum size
248 has not yet been reached. Returns REG_OK if successful. */
249 #define declare_pushf(typetag, type) \
250 static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
252 declare_pushf(voidptr, void *);
253 declare_pushf(int, int);
255 /* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
256 element off of stack `s' and returns it. The stack must not be
258 #define declare_popf(typetag, type) \
259 static type tre_stack_pop_ ## typetag(tre_stack_t *s)
261 declare_popf(voidptr, void *);
262 declare_popf(int, int);
264 /* Just to save some typing. */
265 #define STACK_PUSH(s, typetag, value) \
268 status = tre_stack_push_ ## typetag(s, value); \
270 while (/*CONSTCOND*/0)
272 #define STACK_PUSHX(s, typetag, value) \
274 status = tre_stack_push_ ## typetag(s, value); \
275 if (status != REG_OK) \
279 #define STACK_PUSHR(s, typetag, value) \
281 reg_errcode_t _status; \
282 _status = tre_stack_push_ ## typetag(s, value); \
283 if (_status != REG_OK) \
287 union tre_stack_item {
292 struct tre_stack_rec {
297 union tre_stack_item *stack;
302 tre_stack_new(int size, int max_size, int increment)
306 s = xmalloc(sizeof(*s));
309 s->stack = xmalloc(sizeof(*s->stack) * size);
310 if (s->stack == NULL)
316 s->max_size = max_size;
317 s->increment = increment;
324 tre_stack_destroy(tre_stack_t *s)
331 tre_stack_num_objects(tre_stack_t *s)
337 tre_stack_push(tre_stack_t *s, union tre_stack_item value)
339 if (s->ptr < s->size)
341 s->stack[s->ptr] = value;
346 if (s->size >= s->max_size)
352 union tre_stack_item *new_buffer;
354 new_size = s->size + s->increment;
355 if (new_size > s->max_size)
356 new_size = s->max_size;
357 new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
358 if (new_buffer == NULL)
362 assert(new_size > s->size);
364 s->stack = new_buffer;
365 tre_stack_push(s, value);
371 #define define_pushf(typetag, type) \
372 declare_pushf(typetag, type) { \
373 union tre_stack_item item; \
374 item.typetag ## _value = value; \
375 return tre_stack_push(s, item); \
378 define_pushf(int, int)
379 define_pushf(voidptr, void *)
381 #define define_popf(typetag, type) \
382 declare_popf(typetag, type) { \
383 return s->stack[--s->ptr].typetag ## _value; \
386 define_popf(int, int)
387 define_popf(voidptr, void *)
390 /***********************************************************************
391 from tre-parse.c and tre-parse.h
392 ***********************************************************************/
396 /* Memory allocator. The AST is allocated using this. */
398 /* Stack used for keeping track of regexp syntax. */
400 /* The parsed node after a parse function returns. */
402 /* Position in the regexp pattern after a parse function returns. */
404 /* The first character of the regexp. */
406 /* Current submatch ID. */
408 /* Current position (number of literal). */
410 /* The highest back reference or -1 if none seen so far. */
412 /* Compilation flags. */
416 /* Some macros for expanding \w, \s, etc. */
417 static const struct {
419 const char *expansion;
421 {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
422 {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
423 {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
424 {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
428 /* Expands a macro delimited by `regex' and `regex_end' to `buf', which
429 must have at least `len' items. Sets buf[0] to zero if the there
430 is no match in `tre_macros'. */
431 static const char *tre_expand_macro(const char *s)
434 for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);
435 return tre_macros[i].expansion;
439 tre_compare_lit(const void *a, const void *b)
441 const tre_literal_t *const *la = a;
442 const tre_literal_t *const *lb = b;
443 /* assumes the range of valid code_min is < INT_MAX */
444 return la[0]->code_min - lb[0]->code_min;
454 static tre_literal_t *tre_new_lit(struct literals *p)
457 if (p->len >= p->cap) {
461 a = xrealloc(p->a, p->cap * sizeof *p->a);
467 *a = tre_mem_calloc(p->mem, sizeof **a);
471 static int add_icase_literals(struct literals *ls, int min, int max)
475 for (c=min; c<=max; ) {
476 /* assumes islower(c) and isupper(c) are exclusive
477 and toupper(c)!=c if islower(c).
478 multiple opposite case characters are not supported */
479 if (tre_islower(c)) {
480 b = e = tre_toupper(c);
481 for (c++, e++; c<=max; c++, e++)
482 if (tre_toupper(c) != e) break;
483 } else if (tre_isupper(c)) {
484 b = e = tre_tolower(c);
485 for (c++, e++; c<=max; c++, e++)
486 if (tre_tolower(c) != e) break;
491 lit = tre_new_lit(ls);
502 /* Maximum number of character classes in a negated bracket expression. */
503 #define MAX_NEG_CLASSES 64
508 tre_ctype_t a[MAX_NEG_CLASSES];
511 // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
515 Bracket = '[' List ']' | '[^' List ']'
516 List = Term | List Term
517 Term = Char | Range | Chclass | Eqclass
518 Range = Char '-' Char | Char '-' '-'
519 Char = Coll | coll_single
521 Coll = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]'
522 Eqclass = '[=' coll_single '=]' | '[=' coll_multi '=]'
523 Chclass = '[:' class ':]'
525 coll_single is a single char collating element but it can be
526 '-' only at the beginning or end of a List and
527 ']' only at the beginning of a List and
528 '^' anywhere except after the openning '['
531 static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, struct literals *ls, struct neg *neg)
533 const char *start = s;
541 len = mbtowc(&wc, s, -1);
543 return *s ? REG_BADPAT : REG_EBRACK;
544 if (*s == ']' && s != start) {
548 if (*s == '-' && s != start && s[1] != ']' &&
549 /* extension: [a-z--@] is accepted as [a-z]|[--@] */
550 (s[1] != '-' || s[2] == ']'))
552 if (*s == '[' && (s[1] == '.' || s[1] == '='))
553 /* collating symbols and equivalence classes are not supported */
555 if (*s == '[' && s[1] == ':') {
556 char tmp[CHARCLASS_NAME_MAX+1];
558 for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) {
562 class = tre_ctype(tmp);
566 if (!class || s[len+1] != ']')
574 if (*s == '-' && s[1] != ']') {
576 len = mbtowc(&wc, s, -1);
578 /* XXX - Should use collation order instead of
579 encoding values in character ranges. */
580 if (len <= 0 || min > max)
586 if (class && neg->negate) {
587 if (neg->len >= MAX_NEG_CLASSES)
589 neg->a[neg->len++] = class;
591 tre_literal_t *lit = tre_new_lit(ls);
599 /* Add opposite-case codepoints if REG_ICASE is present.
600 It seems that POSIX requires that bracket negation
601 should happen before case-folding, but most practical
602 implementations do it the other way around. Changing
603 the order would need efficient representation of
604 case-fold ranges and bracket range sets even with
605 simple patterns so this is ok for now. */
606 if (ctx->cflags & REG_ICASE && !class)
607 if (add_icase_literals(ls, min, max))
613 static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s)
615 int i, max, min, negmax, negmin;
616 tre_ast_node_t *node = 0, *n;
626 ls.a = xmalloc(ls.cap * sizeof *ls.a);
630 neg.negate = *s == '^';
634 err = parse_bracket_terms(ctx, s, &ls, &neg);
636 goto parse_bracket_done;
639 /* Sort the array if we need to negate it. */
640 qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
641 /* extra lit for the last negated range */
642 lit = tre_new_lit(&ls);
645 goto parse_bracket_done;
647 lit->code_min = TRE_CHAR_MAX+1;
648 lit->code_max = TRE_CHAR_MAX+1;
650 /* negated classes */
652 nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a);
655 goto parse_bracket_done;
657 memcpy(nc, neg.a, neg.len*sizeof *neg.a);
662 /* Build a union of the items in the array, negated if necessary. */
664 for (i = 0; i < ls.len; i++) {
671 negmin = MAX(max + 1, negmin);
675 lit->code_min = negmin;
676 lit->code_max = negmax;
679 lit->position = ctx->position;
680 lit->neg_classes = nc;
681 n = tre_ast_new_node(ctx->mem, LITERAL, lit);
682 node = tre_ast_new_union(ctx->mem, node, n);
696 static const char *parse_dup_count(const char *s, int *n)
703 *n = 10 * *n + (*s - '0');
705 if (!isdigit(*s) || *n > RE_DUP_MAX)
711 static const char *parse_dup(const char *s, int ere, int *pmin, int *pmax)
715 s = parse_dup_count(s, &min);
717 s = parse_dup_count(s+1, &max);
722 (max < min && max >= 0) ||
726 (!ere && *s++ != '\\') ||
735 static int hexval(unsigned c)
737 if (c-'0'<10) return c-'0';
739 if (c-'a'<6) return c-'a'+10;
743 static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int subid)
745 if (node->submatch_id >= 0) {
746 tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
749 n = tre_ast_new_catenation(ctx->mem, n, node);
752 n->num_submatches = node->num_submatches;
755 node->submatch_id = subid;
756 node->num_submatches++;
763 Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^' Branch '$'
764 Branch = Atom | Branch Atom
765 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch '\)' | back_ref
766 Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count '\}'
768 (leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
771 Regex = Branch | Regex '|' Branch
772 Branch = Atom | Branch Atom
773 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex ')' | '^' | '$'
774 Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count ',' Count '}'
776 (a*+?, ^*, $+, \X, {, (|a) are unspecified)
779 static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
781 int len, ere = ctx->cflags & REG_EXTENDED;
783 tre_ast_node_t *node;
787 return parse_bracket(ctx, s+1);
789 p = tre_expand_macro(s+1);
791 /* assume \X expansion is a single atom */
792 reg_errcode_t err = parse_atom(ctx, p);
796 /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
801 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
804 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
807 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
810 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
820 for (i=0; i<len && v<0x110000; i++) {
831 node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
837 /* extension: treat \+, \? as repetitions in BRE */
838 /* reject repetitions after empty expression in BRE */
842 /* extension: treat \| as alternation in BRE */
844 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
850 if (!ere && (unsigned)*s-'1' < 9) {
853 node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++);
854 ctx->max_backref = MAX(val, ctx->max_backref);
856 /* extension: accept unknown escaped char
864 if (ctx->cflags & REG_NEWLINE) {
865 tre_ast_node_t *tmp1, *tmp2;
866 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++);
867 tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX, ctx->position++);
869 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
873 node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++);
878 /* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
879 if (!ere && s != ctx->re)
881 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
885 /* '$' is special everywhere in EREs, and in the end of the string in BREs. */
888 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
895 /* reject repetitions after empty expression in ERE */
902 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
906 len = mbtowc(&wc, s, -1);
909 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) {
910 tre_ast_node_t *tmp1, *tmp2;
911 /* multiple opposite case characters are not supported */
912 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc), ctx->position);
913 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc), ctx->position);
915 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
919 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
933 #define PUSHPTR(err, s, v) do { \
934 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \
938 #define PUSHINT(err, s, v) do { \
939 if ((err = tre_stack_push_int(s, v)) != REG_OK) \
943 static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
945 tre_ast_node_t *nbranch=0, *nunion=0;
946 int ere = ctx->cflags & REG_EXTENDED;
947 const char *s = ctx->re;
951 tre_stack_t *stack = ctx->stack;
953 PUSHINT(err, stack, subid++);
955 if ((!ere && *s == '\\' && s[1] == '(') ||
956 (ere && *s == '(')) {
957 PUSHPTR(err, stack, nunion);
958 PUSHPTR(err, stack, nbranch);
959 PUSHINT(err, stack, subid++);
964 nbranch = nunion = 0;
967 if ((!ere && *s == '\\' && s[1] == ')') ||
968 (ere && *s == ')' && depth)) {
969 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
973 err = parse_atom(ctx, s);
983 if (*s!='\\' && *s!='*') {
986 if (*s!='+' && *s!='?' && *s!='{')
991 /* extension: treat \+, \? as repetitions in BRE */
992 if (*s=='\\' && s[1]!='+' && s[1]!='?' && s[1]!='{')
997 /* handle ^* at the start of a complete BRE. */
998 if (!ere && s==ctx->re+1 && s[-1]=='^')
1001 /* extension: multiple consecutive *+?{,} is unspecified,
1002 but (a+)+ has to be supported so accepting a++ makes
1003 sense, note however that the RE_DUP_MAX limit can be
1004 circumvented: (a{255}){255} uses a lot of memory.. */
1006 s = parse_dup(s+1, ere, &min, &max);
1019 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
1021 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
1026 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
1027 if ((ere && *s == '|') ||
1028 (ere && *s == ')' && depth) ||
1029 (!ere && *s == '\\' && s[1] == ')') ||
1030 /* extension: treat \| as alternation in BRE */
1031 (!ere && *s == '\\' && s[1] == '|') ||
1033 /* extension: empty branch is unspecified (), (|a), (a|)
1034 here they are not rejected but match on empty string */
1036 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
1039 if (c == '\\' && s[1] == '|') {
1041 } else if (c == '|') {
1045 if (!depth) return REG_EPAREN;
1047 } else if (c == ')')
1050 err = marksub(ctx, nunion, tre_stack_pop_int(stack));
1053 if (!c && depth<0) {
1054 ctx->submatch_id = subid;
1059 nbranch = tre_stack_pop_voidptr(stack);
1060 nunion = tre_stack_pop_voidptr(stack);
1068 /***********************************************************************
1070 ***********************************************************************/
1075 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1080 Algorithms to setup tags so that submatch addressing can be done.
1084 /* Inserts a catenation node to the root of the tree given in `node'.
1085 As the left child a new tag with number `tag_id' to `node' is added,
1086 and the right child is the old root. */
1087 static reg_errcode_t
1088 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1090 tre_catenation_t *c;
1092 c = tre_mem_alloc(mem, sizeof(*c));
1095 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
1096 if (c->left == NULL)
1098 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1099 if (c->right == NULL)
1102 c->right->obj = node->obj;
1103 c->right->type = node->type;
1104 c->right->nullable = -1;
1105 c->right->submatch_id = -1;
1106 c->right->firstpos = NULL;
1107 c->right->lastpos = NULL;
1108 c->right->num_tags = 0;
1109 c->right->num_submatches = 0;
1111 node->type = CATENATION;
1115 /* Inserts a catenation node to the root of the tree given in `node'.
1116 As the right child a new tag with number `tag_id' to `node' is added,
1117 and the left child is the old root. */
1118 static reg_errcode_t
1119 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1121 tre_catenation_t *c;
1123 c = tre_mem_alloc(mem, sizeof(*c));
1126 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
1127 if (c->right == NULL)
1129 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1130 if (c->left == NULL)
1133 c->left->obj = node->obj;
1134 c->left->type = node->type;
1135 c->left->nullable = -1;
1136 c->left->submatch_id = -1;
1137 c->left->firstpos = NULL;
1138 c->left->lastpos = NULL;
1139 c->left->num_tags = 0;
1140 c->left->num_submatches = 0;
1142 node->type = CATENATION;
1148 ADDTAGS_AFTER_ITERATION,
1149 ADDTAGS_AFTER_UNION_LEFT,
1150 ADDTAGS_AFTER_UNION_RIGHT,
1151 ADDTAGS_AFTER_CAT_LEFT,
1152 ADDTAGS_AFTER_CAT_RIGHT,
1153 ADDTAGS_SET_SUBMATCH_END
1154 } tre_addtags_symbol_t;
1163 /* Go through `regset' and set submatch data for submatches that are
1166 tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
1170 for (i = 0; regset[i] >= 0; i++)
1172 int id = regset[i] / 2;
1173 int start = !(regset[i] % 2);
1175 tnfa->submatch_data[id].so_tag = tag;
1177 tnfa->submatch_data[id].eo_tag = tag;
1183 /* Adds tags to appropriate locations in the parse tree in `tree', so that
1184 subexpressions marked for submatch addressing can be traced. */
1185 static reg_errcode_t
1186 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
1189 reg_errcode_t status = REG_OK;
1190 tre_addtags_symbol_t symbol;
1191 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
1192 int bottom = tre_stack_num_objects(stack);
1193 /* True for first pass (counting number of needed tags) */
1194 int first_pass = (mem == NULL || tnfa == NULL);
1195 int *regset, *orig_regset;
1196 int num_tags = 0; /* Total number of tags. */
1197 int num_minimals = 0; /* Number of special minimal tags. */
1198 int tag = 0; /* The tag that is to be added next. */
1199 int next_tag = 1; /* Next tag to use after this one. */
1200 int *parents; /* Stack of submatches the current submatch is
1202 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
1203 tre_tag_states_t *saved_states;
1205 tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
1209 tnfa->minimal_tags[0] = -1;
1212 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
1216 orig_regset = regset;
1218 parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
1219 if (parents == NULL)
1226 saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
1227 if (saved_states == NULL)
1236 for (i = 0; i <= tnfa->num_submatches; i++)
1237 saved_states[i].tag = -1;
1240 STACK_PUSH(stack, voidptr, node);
1241 STACK_PUSH(stack, int, ADDTAGS_RECURSE);
1243 while (tre_stack_num_objects(stack) > bottom)
1245 if (status != REG_OK)
1248 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
1252 case ADDTAGS_SET_SUBMATCH_END:
1254 int id = tre_stack_pop_int(stack);
1257 /* Add end of this submatch to regset. */
1258 for (i = 0; regset[i] >= 0; i++);
1259 regset[i] = id * 2 + 1;
1262 /* Pop this submatch from the parents stack. */
1263 for (i = 0; parents[i] >= 0; i++);
1264 parents[i - 1] = -1;
1268 case ADDTAGS_RECURSE:
1269 node = tre_stack_pop_voidptr(stack);
1271 if (node->submatch_id >= 0)
1273 int id = node->submatch_id;
1277 /* Add start of this submatch to regset. */
1278 for (i = 0; regset[i] >= 0; i++);
1284 for (i = 0; parents[i] >= 0; i++);
1285 tnfa->submatch_data[id].parents = NULL;
1288 int *p = xmalloc(sizeof(*p) * (i + 1));
1291 status = REG_ESPACE;
1294 assert(tnfa->submatch_data[id].parents == NULL);
1295 tnfa->submatch_data[id].parents = p;
1296 for (i = 0; parents[i] >= 0; i++)
1302 /* Add end of this submatch to regset after processing this
1304 STACK_PUSHX(stack, int, node->submatch_id);
1305 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
1312 tre_literal_t *lit = node->obj;
1314 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1319 /* Regset is not empty, so add a tag before the
1320 literal or backref. */
1323 status = tre_add_tag_left(mem, node, tag);
1324 tnfa->tag_directions[tag] = direction;
1325 if (minimal_tag >= 0)
1327 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1328 tnfa->minimal_tags[i] = tag;
1329 tnfa->minimal_tags[i + 1] = minimal_tag;
1330 tnfa->minimal_tags[i + 2] = -1;
1334 tre_purge_regset(regset, tnfa, tag);
1349 assert(!IS_TAG(lit));
1355 tre_catenation_t *cat = node->obj;
1356 tre_ast_node_t *left = cat->left;
1357 tre_ast_node_t *right = cat->right;
1358 int reserved_tag = -1;
1361 /* After processing right child. */
1362 STACK_PUSHX(stack, voidptr, node);
1363 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
1365 /* Process right child. */
1366 STACK_PUSHX(stack, voidptr, right);
1367 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1369 /* After processing left child. */
1370 STACK_PUSHX(stack, int, next_tag + left->num_tags);
1371 if (left->num_tags > 0 && right->num_tags > 0)
1373 /* Reserve the next tag to the right child. */
1374 reserved_tag = next_tag;
1377 STACK_PUSHX(stack, int, reserved_tag);
1378 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
1380 /* Process left child. */
1381 STACK_PUSHX(stack, voidptr, left);
1382 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1388 tre_iteration_t *iter = node->obj;
1392 STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
1396 STACK_PUSHX(stack, int, tag);
1397 STACK_PUSHX(stack, int, iter->minimal);
1399 STACK_PUSHX(stack, voidptr, node);
1400 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
1402 STACK_PUSHX(stack, voidptr, iter->arg);
1403 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1405 /* Regset is not empty, so add a tag here. */
1406 if (regset[0] >= 0 || iter->minimal)
1411 status = tre_add_tag_left(mem, node, tag);
1413 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
1415 tnfa->tag_directions[tag] = direction;
1416 if (minimal_tag >= 0)
1418 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1419 tnfa->minimal_tags[i] = tag;
1420 tnfa->minimal_tags[i + 1] = minimal_tag;
1421 tnfa->minimal_tags[i + 2] = -1;
1425 tre_purge_regset(regset, tnfa, tag);
1433 direction = TRE_TAG_MINIMIZE;
1438 tre_union_t *uni = node->obj;
1439 tre_ast_node_t *left = uni->left;
1440 tre_ast_node_t *right = uni->right;
1446 left_tag = next_tag;
1447 right_tag = next_tag + 1;
1452 right_tag = next_tag;
1455 /* After processing right child. */
1456 STACK_PUSHX(stack, int, right_tag);
1457 STACK_PUSHX(stack, int, left_tag);
1458 STACK_PUSHX(stack, voidptr, regset);
1459 STACK_PUSHX(stack, int, regset[0] >= 0);
1460 STACK_PUSHX(stack, voidptr, node);
1461 STACK_PUSHX(stack, voidptr, right);
1462 STACK_PUSHX(stack, voidptr, left);
1463 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
1465 /* Process right child. */
1466 STACK_PUSHX(stack, voidptr, right);
1467 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1469 /* After processing left child. */
1470 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
1472 /* Process left child. */
1473 STACK_PUSHX(stack, voidptr, left);
1474 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1476 /* Regset is not empty, so add a tag here. */
1482 status = tre_add_tag_left(mem, node, tag);
1483 tnfa->tag_directions[tag] = direction;
1484 if (minimal_tag >= 0)
1486 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1487 tnfa->minimal_tags[i] = tag;
1488 tnfa->minimal_tags[i + 1] = minimal_tag;
1489 tnfa->minimal_tags[i + 2] = -1;
1493 tre_purge_regset(regset, tnfa, tag);
1502 if (node->num_submatches > 0)
1504 /* The next two tags are reserved for markers. */
1514 if (node->submatch_id >= 0)
1517 /* Push this submatch on the parents stack. */
1518 for (i = 0; parents[i] >= 0; i++);
1519 parents[i] = node->submatch_id;
1520 parents[i + 1] = -1;
1523 break; /* end case: ADDTAGS_RECURSE */
1525 case ADDTAGS_AFTER_ITERATION:
1529 node = tre_stack_pop_voidptr(stack);
1532 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
1533 + tre_stack_pop_int(stack);
1538 minimal = tre_stack_pop_int(stack);
1539 enter_tag = tre_stack_pop_int(stack);
1541 minimal_tag = enter_tag;
1547 direction = TRE_TAG_MINIMIZE;
1549 direction = TRE_TAG_MAXIMIZE;
1554 case ADDTAGS_AFTER_CAT_LEFT:
1556 int new_tag = tre_stack_pop_int(stack);
1557 next_tag = tre_stack_pop_int(stack);
1565 case ADDTAGS_AFTER_CAT_RIGHT:
1566 node = tre_stack_pop_voidptr(stack);
1568 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
1569 + ((tre_catenation_t *)node->obj)->right->num_tags;
1572 case ADDTAGS_AFTER_UNION_LEFT:
1573 /* Lift the bottom of the `regset' array so that when processing
1574 the right operand the items currently in the array are
1575 invisible. The original bottom was saved at ADDTAGS_UNION and
1576 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1577 while (*regset >= 0)
1581 case ADDTAGS_AFTER_UNION_RIGHT:
1583 int added_tags, tag_left, tag_right;
1584 tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
1585 tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
1586 node = tre_stack_pop_voidptr(stack);
1587 added_tags = tre_stack_pop_int(stack);
1590 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
1591 + ((tre_union_t *)node->obj)->right->num_tags + added_tags
1592 + ((node->num_submatches > 0) ? 2 : 0);
1594 regset = tre_stack_pop_voidptr(stack);
1595 tag_left = tre_stack_pop_int(stack);
1596 tag_right = tre_stack_pop_int(stack);
1598 /* Add tags after both children, the left child gets a smaller
1599 tag than the right child. This guarantees that we prefer
1600 the left child over the right child. */
1601 /* XXX - This is not always necessary (if the children have
1602 tags which must be seen for every match of that child). */
1603 /* XXX - Check if this is the only place where tre_add_tag_right
1604 is used. If so, use tre_add_tag_left (putting the tag before
1605 the child as opposed after the child) and throw away
1606 tre_add_tag_right. */
1607 if (node->num_submatches > 0)
1611 status = tre_add_tag_right(mem, left, tag_left);
1612 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1613 if (status == REG_OK)
1614 status = tre_add_tag_right(mem, right, tag_right);
1615 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1619 direction = TRE_TAG_MAXIMIZE;
1627 } /* end switch(symbol) */
1628 } /* end while(tre_stack_num_objects(stack) > bottom) */
1631 tre_purge_regset(regset, tnfa, tag);
1633 if (!first_pass && minimal_tag >= 0)
1636 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1637 tnfa->minimal_tags[i] = tag;
1638 tnfa->minimal_tags[i + 1] = minimal_tag;
1639 tnfa->minimal_tags[i + 2] = -1;
1644 assert(tree->num_tags == num_tags);
1645 tnfa->end_tag = num_tags;
1646 tnfa->num_tags = num_tags;
1647 tnfa->num_minimals = num_minimals;
1650 xfree(saved_states);
1657 AST to TNFA compilation routines.
1663 } tre_copyast_symbol_t;
1665 /* Flags for tre_copy_ast(). */
1666 #define COPY_REMOVE_TAGS 1
1667 #define COPY_MAXIMIZE_FIRST_TAG 2
1669 static reg_errcode_t
1670 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1671 int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1672 tre_ast_node_t **copy, int *max_pos)
1674 reg_errcode_t status = REG_OK;
1675 int bottom = tre_stack_num_objects(stack);
1678 tre_ast_node_t **result = copy;
1679 tre_copyast_symbol_t symbol;
1681 STACK_PUSH(stack, voidptr, ast);
1682 STACK_PUSH(stack, int, COPY_RECURSE);
1684 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1686 tre_ast_node_t *node;
1687 if (status != REG_OK)
1690 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1693 case COPY_SET_RESULT_PTR:
1694 result = tre_stack_pop_voidptr(stack);
1697 node = tre_stack_pop_voidptr(stack);
1702 tre_literal_t *lit = node->obj;
1703 int pos = lit->position;
1704 int min = lit->code_min;
1705 int max = lit->code_max;
1706 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1708 /* XXX - e.g. [ab] has only one position but two
1709 nodes, so we are creating holes in the state space
1710 here. Not fatal, just wastes memory. */
1714 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1716 /* Change this tag to empty. */
1720 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1723 /* Maximize the first tag. */
1724 tag_directions[max] = TRE_TAG_MAXIMIZE;
1727 *result = tre_ast_new_literal(mem, min, max, pos);
1728 if (*result == NULL)
1729 status = REG_ESPACE;
1731 tre_literal_t *p = (*result)->obj;
1732 p->class = lit->class;
1733 p->neg_classes = lit->neg_classes;
1742 tre_union_t *uni = node->obj;
1744 *result = tre_ast_new_union(mem, uni->left, uni->right);
1745 if (*result == NULL)
1747 status = REG_ESPACE;
1750 tmp = (*result)->obj;
1751 result = &tmp->left;
1752 STACK_PUSHX(stack, voidptr, uni->right);
1753 STACK_PUSHX(stack, int, COPY_RECURSE);
1754 STACK_PUSHX(stack, voidptr, &tmp->right);
1755 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1756 STACK_PUSHX(stack, voidptr, uni->left);
1757 STACK_PUSHX(stack, int, COPY_RECURSE);
1762 tre_catenation_t *cat = node->obj;
1763 tre_catenation_t *tmp;
1764 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1765 if (*result == NULL)
1767 status = REG_ESPACE;
1770 tmp = (*result)->obj;
1773 result = &tmp->left;
1775 STACK_PUSHX(stack, voidptr, cat->right);
1776 STACK_PUSHX(stack, int, COPY_RECURSE);
1777 STACK_PUSHX(stack, voidptr, &tmp->right);
1778 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1779 STACK_PUSHX(stack, voidptr, cat->left);
1780 STACK_PUSHX(stack, int, COPY_RECURSE);
1785 tre_iteration_t *iter = node->obj;
1786 STACK_PUSHX(stack, voidptr, iter->arg);
1787 STACK_PUSHX(stack, int, COPY_RECURSE);
1788 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1789 iter->max, iter->minimal);
1790 if (*result == NULL)
1792 status = REG_ESPACE;
1795 iter = (*result)->obj;
1796 result = &iter->arg;
1806 *pos_add += num_copied;
1813 } tre_expand_ast_symbol_t;
1815 /* Expands each iteration node that has a finite nonzero minimum or maximum
1816 iteration count to a catenated sequence of copies of the node. */
1817 static reg_errcode_t
1818 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1819 int *position, tre_tag_direction_t *tag_directions)
1821 reg_errcode_t status = REG_OK;
1822 int bottom = tre_stack_num_objects(stack);
1824 int pos_add_total = 0;
1828 STACK_PUSHR(stack, voidptr, ast);
1829 STACK_PUSHR(stack, int, EXPAND_RECURSE);
1830 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1832 tre_ast_node_t *node;
1833 tre_expand_ast_symbol_t symbol;
1835 if (status != REG_OK)
1838 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1839 node = tre_stack_pop_voidptr(stack);
1842 case EXPAND_RECURSE:
1847 tre_literal_t *lit= node->obj;
1848 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1850 lit->position += pos_add;
1851 if (lit->position > max_pos)
1852 max_pos = lit->position;
1858 tre_union_t *uni = node->obj;
1859 STACK_PUSHX(stack, voidptr, uni->right);
1860 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1861 STACK_PUSHX(stack, voidptr, uni->left);
1862 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1867 tre_catenation_t *cat = node->obj;
1868 STACK_PUSHX(stack, voidptr, cat->right);
1869 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1870 STACK_PUSHX(stack, voidptr, cat->left);
1871 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1876 tre_iteration_t *iter = node->obj;
1877 STACK_PUSHX(stack, int, pos_add);
1878 STACK_PUSHX(stack, voidptr, node);
1879 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1880 STACK_PUSHX(stack, voidptr, iter->arg);
1881 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1882 /* If we are going to expand this node at EXPAND_AFTER_ITER
1883 then don't increase the `pos' fields of the nodes now, it
1884 will get done when expanding. */
1885 if (iter->min > 1 || iter->max > 1)
1895 case EXPAND_AFTER_ITER:
1897 tre_iteration_t *iter = node->obj;
1899 pos_add = tre_stack_pop_int(stack);
1900 pos_add_last = pos_add;
1901 if (iter->min > 1 || iter->max > 1)
1903 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1905 int pos_add_save = pos_add;
1907 /* Create a catenated sequence of copies of the node. */
1908 for (j = 0; j < iter->min; j++)
1910 tre_ast_node_t *copy;
1911 /* Remove tags from all but the last copy. */
1912 int flags = ((j + 1 < iter->min)
1914 : COPY_MAXIMIZE_FIRST_TAG);
1915 pos_add_save = pos_add;
1916 status = tre_copy_ast(mem, stack, iter->arg, flags,
1917 &pos_add, tag_directions, ©,
1919 if (status != REG_OK)
1922 seq1 = tre_ast_new_catenation(mem, seq1, copy);
1929 if (iter->max == -1)
1931 /* No upper limit. */
1932 pos_add_save = pos_add;
1933 status = tre_copy_ast(mem, stack, iter->arg, 0,
1934 &pos_add, NULL, &seq2, &max_pos);
1935 if (status != REG_OK)
1937 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1943 for (j = iter->min; j < iter->max; j++)
1945 tre_ast_node_t *tmp, *copy;
1946 pos_add_save = pos_add;
1947 status = tre_copy_ast(mem, stack, iter->arg, 0,
1948 &pos_add, NULL, ©, &max_pos);
1949 if (status != REG_OK)
1952 seq2 = tre_ast_new_catenation(mem, copy, seq2);
1957 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1960 seq2 = tre_ast_new_union(mem, tmp, seq2);
1966 pos_add = pos_add_save;
1969 else if (seq2 != NULL)
1970 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1973 node->obj = seq1->obj;
1974 node->type = seq1->type;
1978 pos_add_total += pos_add - pos_add_last;
1979 if (iter_depth == 0)
1980 pos_add = pos_add_total;
1990 *position += pos_add_total;
1992 /* `max_pos' should never be larger than `*position' if the above
1993 code works, but just an extra safeguard let's make sure
1994 `*position' is set large enough so enough memory will be
1995 allocated for the transition table. */
1996 if (max_pos > *position)
1997 *position = max_pos;
2002 static tre_pos_and_tags_t *
2003 tre_set_empty(tre_mem_t mem)
2005 tre_pos_and_tags_t *new_set;
2007 new_set = tre_mem_calloc(mem, sizeof(*new_set));
2008 if (new_set == NULL)
2011 new_set[0].position = -1;
2012 new_set[0].code_min = -1;
2013 new_set[0].code_max = -1;
2018 static tre_pos_and_tags_t *
2019 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2020 tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
2022 tre_pos_and_tags_t *new_set;
2024 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2025 if (new_set == NULL)
2028 new_set[0].position = position;
2029 new_set[0].code_min = code_min;
2030 new_set[0].code_max = code_max;
2031 new_set[0].class = class;
2032 new_set[0].neg_classes = neg_classes;
2033 new_set[0].backref = backref;
2034 new_set[1].position = -1;
2035 new_set[1].code_min = -1;
2036 new_set[1].code_max = -1;
2041 static tre_pos_and_tags_t *
2042 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2043 int *tags, int assertions)
2046 tre_pos_and_tags_t *new_set;
2050 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2051 for (s1 = 0; set1[s1].position >= 0; s1++);
2052 for (s2 = 0; set2[s2].position >= 0; s2++);
2053 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2057 for (s1 = 0; set1[s1].position >= 0; s1++)
2059 new_set[s1].position = set1[s1].position;
2060 new_set[s1].code_min = set1[s1].code_min;
2061 new_set[s1].code_max = set1[s1].code_max;
2062 new_set[s1].assertions = set1[s1].assertions | assertions;
2063 new_set[s1].class = set1[s1].class;
2064 new_set[s1].neg_classes = set1[s1].neg_classes;
2065 new_set[s1].backref = set1[s1].backref;
2066 if (set1[s1].tags == NULL && tags == NULL)
2067 new_set[s1].tags = NULL;
2070 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2071 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2072 * (i + num_tags + 1)));
2073 if (new_tags == NULL)
2075 for (j = 0; j < i; j++)
2076 new_tags[j] = set1[s1].tags[j];
2077 for (i = 0; i < num_tags; i++)
2078 new_tags[j + i] = tags[i];
2079 new_tags[j + i] = -1;
2080 new_set[s1].tags = new_tags;
2084 for (s2 = 0; set2[s2].position >= 0; s2++)
2086 new_set[s1 + s2].position = set2[s2].position;
2087 new_set[s1 + s2].code_min = set2[s2].code_min;
2088 new_set[s1 + s2].code_max = set2[s2].code_max;
2089 /* XXX - why not | assertions here as well? */
2090 new_set[s1 + s2].assertions = set2[s2].assertions;
2091 new_set[s1 + s2].class = set2[s2].class;
2092 new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
2093 new_set[s1 + s2].backref = set2[s2].backref;
2094 if (set2[s2].tags == NULL)
2095 new_set[s1 + s2].tags = NULL;
2098 for (i = 0; set2[s2].tags[i] >= 0; i++);
2099 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2100 if (new_tags == NULL)
2102 for (j = 0; j < i; j++)
2103 new_tags[j] = set2[s2].tags[j];
2105 new_set[s1 + s2].tags = new_tags;
2108 new_set[s1 + s2].position = -1;
2112 /* Finds the empty path through `node' which is the one that should be
2113 taken according to POSIX.2 rules, and adds the tags on that path to
2114 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2115 set to the number of tags seen on the path. */
2116 static reg_errcode_t
2117 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2118 int *assertions, int *num_tags_seen)
2122 tre_catenation_t *cat;
2123 tre_iteration_t *iter;
2125 int bottom = tre_stack_num_objects(stack);
2126 reg_errcode_t status = REG_OK;
2130 status = tre_stack_push_voidptr(stack, node);
2132 /* Walk through the tree recursively. */
2133 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2135 node = tre_stack_pop_voidptr(stack);
2140 lit = (tre_literal_t *)node->obj;
2141 switch (lit->code_min)
2144 if (lit->code_max >= 0)
2148 /* Add the tag to `tags'. */
2149 for (i = 0; tags[i] >= 0; i++)
2150 if (tags[i] == lit->code_max)
2154 tags[i] = lit->code_max;
2163 assert(lit->code_max >= 1
2164 || lit->code_max <= ASSERT_LAST);
2165 if (assertions != NULL)
2166 *assertions |= lit->code_max;
2177 /* Subexpressions starting earlier take priority over ones
2178 starting later, so we prefer the left subexpression over the
2179 right subexpression. */
2180 uni = (tre_union_t *)node->obj;
2181 if (uni->left->nullable)
2182 STACK_PUSHX(stack, voidptr, uni->left)
2183 else if (uni->right->nullable)
2184 STACK_PUSHX(stack, voidptr, uni->right)
2190 /* The path must go through both children. */
2191 cat = (tre_catenation_t *)node->obj;
2192 assert(cat->left->nullable);
2193 assert(cat->right->nullable);
2194 STACK_PUSHX(stack, voidptr, cat->left);
2195 STACK_PUSHX(stack, voidptr, cat->right);
2199 /* A match with an empty string is preferred over no match at
2200 all, so we go through the argument if possible. */
2201 iter = (tre_iteration_t *)node->obj;
2202 if (iter->arg->nullable)
2203 STACK_PUSHX(stack, voidptr, iter->arg);
2219 NFL_POST_CATENATION,
2221 } tre_nfl_stack_symbol_t;
2224 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2225 the nodes of the AST `tree'. */
2226 static reg_errcode_t
2227 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2229 int bottom = tre_stack_num_objects(stack);
2231 STACK_PUSHR(stack, voidptr, tree);
2232 STACK_PUSHR(stack, int, NFL_RECURSE);
2234 while (tre_stack_num_objects(stack) > bottom)
2236 tre_nfl_stack_symbol_t symbol;
2237 tre_ast_node_t *node;
2239 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2240 node = tre_stack_pop_voidptr(stack);
2248 tre_literal_t *lit = (tre_literal_t *)node->obj;
2249 if (IS_BACKREF(lit))
2251 /* Back references: nullable = false, firstpos = {i},
2254 node->firstpos = tre_set_one(mem, lit->position, 0,
2255 TRE_CHAR_MAX, 0, NULL, -1);
2256 if (!node->firstpos)
2258 node->lastpos = tre_set_one(mem, lit->position, 0,
2259 TRE_CHAR_MAX, 0, NULL,
2260 (int)lit->code_max);
2264 else if (lit->code_min < 0)
2266 /* Tags, empty strings, params, and zero width assertions:
2267 nullable = true, firstpos = {}, and lastpos = {}. */
2269 node->firstpos = tre_set_empty(mem);
2270 if (!node->firstpos)
2272 node->lastpos = tre_set_empty(mem);
2278 /* Literal at position i: nullable = false, firstpos = {i},
2282 tre_set_one(mem, lit->position, (int)lit->code_min,
2283 (int)lit->code_max, 0, NULL, -1);
2284 if (!node->firstpos)
2286 node->lastpos = tre_set_one(mem, lit->position,
2289 lit->class, lit->neg_classes,
2298 /* Compute the attributes for the two subtrees, and after that
2300 STACK_PUSHR(stack, voidptr, node);
2301 STACK_PUSHR(stack, int, NFL_POST_UNION);
2302 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2303 STACK_PUSHR(stack, int, NFL_RECURSE);
2304 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2305 STACK_PUSHR(stack, int, NFL_RECURSE);
2309 /* Compute the attributes for the two subtrees, and after that
2311 STACK_PUSHR(stack, voidptr, node);
2312 STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2313 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2314 STACK_PUSHR(stack, int, NFL_RECURSE);
2315 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2316 STACK_PUSHR(stack, int, NFL_RECURSE);
2320 /* Compute the attributes for the subtree, and after that for
2322 STACK_PUSHR(stack, voidptr, node);
2323 STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2324 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2325 STACK_PUSHR(stack, int, NFL_RECURSE);
2328 break; /* end case: NFL_RECURSE */
2330 case NFL_POST_UNION:
2332 tre_union_t *uni = (tre_union_t *)node->obj;
2333 node->nullable = uni->left->nullable || uni->right->nullable;
2334 node->firstpos = tre_set_union(mem, uni->left->firstpos,
2335 uni->right->firstpos, NULL, 0);
2336 if (!node->firstpos)
2338 node->lastpos = tre_set_union(mem, uni->left->lastpos,
2339 uni->right->lastpos, NULL, 0);
2345 case NFL_POST_ITERATION:
2347 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2349 if (iter->min == 0 || iter->arg->nullable)
2353 node->firstpos = iter->arg->firstpos;
2354 node->lastpos = iter->arg->lastpos;
2358 case NFL_POST_CATENATION:
2360 int num_tags, *tags, assertions;
2361 reg_errcode_t status;
2362 tre_catenation_t *cat = node->obj;
2363 node->nullable = cat->left->nullable && cat->right->nullable;
2365 /* Compute firstpos. */
2366 if (cat->left->nullable)
2368 /* The left side matches the empty string. Make a first pass
2369 with tre_match_empty() to get the number of tags and
2371 status = tre_match_empty(stack, cat->left,
2372 NULL, NULL, &num_tags);
2373 if (status != REG_OK)
2375 /* Allocate arrays for the tags and parameters. */
2376 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2381 /* Second pass with tre_mach_empty() to get the list of
2382 tags and parameters. */
2383 status = tre_match_empty(stack, cat->left, tags,
2385 if (status != REG_OK)
2391 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2394 if (!node->firstpos)
2399 node->firstpos = cat->left->firstpos;
2402 /* Compute lastpos. */
2403 if (cat->right->nullable)
2405 /* The right side matches the empty string. Make a first pass
2406 with tre_match_empty() to get the number of tags and
2408 status = tre_match_empty(stack, cat->right,
2409 NULL, NULL, &num_tags);
2410 if (status != REG_OK)
2412 /* Allocate arrays for the tags and parameters. */
2413 tags = xmalloc(sizeof(int) * (num_tags + 1));
2418 /* Second pass with tre_mach_empty() to get the list of
2419 tags and parameters. */
2420 status = tre_match_empty(stack, cat->right, tags,
2422 if (status != REG_OK)
2428 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2436 node->lastpos = cat->right->lastpos;
2451 /* Adds a transition from each position in `p1' to each position in `p2'. */
2452 static reg_errcode_t
2453 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2454 tre_tnfa_transition_t *transitions,
2455 int *counts, int *offs)
2457 tre_pos_and_tags_t *orig_p2 = p2;
2458 tre_tnfa_transition_t *trans;
2459 int i, j, k, l, dup, prev_p2_pos;
2461 if (transitions != NULL)
2462 while (p1->position >= 0)
2466 while (p2->position >= 0)
2468 /* Optimization: if this position was already handled, skip it. */
2469 if (p2->position == prev_p2_pos)
2474 prev_p2_pos = p2->position;
2475 /* Set `trans' to point to the next unused transition from
2476 position `p1->position'. */
2477 trans = transitions + offs[p1->position];
2478 while (trans->state != NULL)
2481 /* If we find a previous transition from `p1->position' to
2482 `p2->position', it is overwritten. This can happen only
2483 if there are nested loops in the regexp, like in "((a)*)*".
2484 In POSIX.2 repetition using the outer loop is always
2485 preferred over using the inner loop. Therefore the
2486 transition for the inner loop is useless and can be thrown
2488 /* XXX - The same position is used for all nodes in a bracket
2489 expression, so this optimization cannot be used (it will
2490 break bracket expressions) unless I figure out a way to
2492 if (trans->state_id == p2->position)
2500 if (trans->state == NULL)
2501 (trans + 1)->state = NULL;
2502 /* Use the character ranges, assertions, etc. from `p1' for
2503 the transition from `p1' to `p2'. */
2504 trans->code_min = p1->code_min;
2505 trans->code_max = p1->code_max;
2506 trans->state = transitions + offs[p2->position];
2507 trans->state_id = p2->position;
2508 trans->assertions = p1->assertions | p2->assertions
2509 | (p1->class ? ASSERT_CHAR_CLASS : 0)
2510 | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
2511 if (p1->backref >= 0)
2513 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
2514 assert(p2->backref < 0);
2515 trans->u.backref = p1->backref;
2516 trans->assertions |= ASSERT_BACKREF;
2519 trans->u.class = p1->class;
2520 if (p1->neg_classes != NULL)
2522 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
2523 trans->neg_classes =
2524 xmalloc(sizeof(*trans->neg_classes) * (i + 1));
2525 if (trans->neg_classes == NULL)
2527 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2528 trans->neg_classes[i] = p1->neg_classes[i];
2529 trans->neg_classes[i] = (tre_ctype_t)0;
2532 trans->neg_classes = NULL;
2534 /* Find out how many tags this transition has. */
2536 if (p1->tags != NULL)
2537 while(p1->tags[i] >= 0)
2540 if (p2->tags != NULL)
2541 while(p2->tags[j] >= 0)
2544 /* If we are overwriting a transition, free the old tag array. */
2545 if (trans->tags != NULL)
2549 /* If there were any tags, allocate an array and fill it. */
2552 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2556 if (p1->tags != NULL)
2557 while(p1->tags[i] >= 0)
2559 trans->tags[i] = p1->tags[i];
2564 if (p2->tags != NULL)
2565 while (p2->tags[j] >= 0)
2567 /* Don't add duplicates. */
2569 for (k = 0; k < i; k++)
2570 if (trans->tags[k] == p2->tags[j])
2576 trans->tags[l++] = p2->tags[j];
2579 trans->tags[l] = -1;
2587 /* Compute a maximum limit for the number of transitions leaving
2589 while (p1->position >= 0)
2592 while (p2->position >= 0)
2594 counts[p1->position]++;
2602 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2603 labelled with one character range (there are no transitions on empty
2604 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2606 static reg_errcode_t
2607 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2608 int *counts, int *offs)
2611 tre_catenation_t *cat;
2612 tre_iteration_t *iter;
2613 reg_errcode_t errcode = REG_OK;
2615 /* XXX - recurse using a stack!. */
2621 uni = (tre_union_t *)node->obj;
2622 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2623 if (errcode != REG_OK)
2625 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2629 cat = (tre_catenation_t *)node->obj;
2630 /* Add a transition from each position in cat->left->lastpos
2631 to each position in cat->right->firstpos. */
2632 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2633 transitions, counts, offs);
2634 if (errcode != REG_OK)
2636 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2637 if (errcode != REG_OK)
2639 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2643 iter = (tre_iteration_t *)node->obj;
2644 assert(iter->max == -1 || iter->max == 1);
2646 if (iter->max == -1)
2648 assert(iter->min == 0 || iter->min == 1);
2649 /* Add a transition from each last position in the iterated
2650 expression to each first position. */
2651 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2652 transitions, counts, offs);
2653 if (errcode != REG_OK)
2656 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2663 #define ERROR_EXIT(err) \
2667 if (/*CONSTCOND*/1) \
2670 while (/*CONSTCOND*/0)
2674 regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
2677 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2678 tre_pos_and_tags_t *p;
2679 int *counts = NULL, *offs = NULL;
2681 tre_tnfa_transition_t *transitions, *initial;
2682 tre_tnfa_t *tnfa = NULL;
2683 tre_submatch_data_t *submatch_data;
2684 tre_tag_direction_t *tag_directions = NULL;
2685 reg_errcode_t errcode;
2688 /* Parse context. */
2689 tre_parse_ctx_t parse_ctx;
2691 /* Allocate a stack used throughout the compilation process for various
2693 stack = tre_stack_new(512, 1024000, 128);
2696 /* Allocate a fast memory allocator. */
2697 mem = tre_mem_new();
2700 tre_stack_destroy(stack);
2704 /* Parse the regexp. */
2705 memset(&parse_ctx, 0, sizeof(parse_ctx));
2706 parse_ctx.mem = mem;
2707 parse_ctx.stack = stack;
2708 parse_ctx.re = regex;
2709 parse_ctx.cflags = cflags;
2710 parse_ctx.max_backref = -1;
2711 errcode = tre_parse(&parse_ctx);
2712 if (errcode != REG_OK)
2713 ERROR_EXIT(errcode);
2714 preg->re_nsub = parse_ctx.submatch_id - 1;
2718 tre_ast_print(tree);
2719 #endif /* TRE_DEBUG */
2721 /* Referring to nonexistent subexpressions is illegal. */
2722 if (parse_ctx.max_backref > (int)preg->re_nsub)
2723 ERROR_EXIT(REG_ESUBREG);
2725 /* Allocate the TNFA struct. */
2726 tnfa = xcalloc(1, sizeof(tre_tnfa_t));
2728 ERROR_EXIT(REG_ESPACE);
2729 tnfa->have_backrefs = parse_ctx.max_backref >= 0;
2730 tnfa->have_approx = 0;
2731 tnfa->num_submatches = parse_ctx.submatch_id;
2733 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
2734 regexp does not have back references, this can be skipped. */
2735 if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
2738 /* Figure out how many tags we will need. */
2739 errcode = tre_add_tags(NULL, stack, tree, tnfa);
2740 if (errcode != REG_OK)
2741 ERROR_EXIT(errcode);
2743 if (tnfa->num_tags > 0)
2745 tag_directions = xmalloc(sizeof(*tag_directions)
2746 * (tnfa->num_tags + 1));
2747 if (tag_directions == NULL)
2748 ERROR_EXIT(REG_ESPACE);
2749 tnfa->tag_directions = tag_directions;
2750 memset(tag_directions, -1,
2751 sizeof(*tag_directions) * (tnfa->num_tags + 1));
2753 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
2754 sizeof(*tnfa->minimal_tags));
2755 if (tnfa->minimal_tags == NULL)
2756 ERROR_EXIT(REG_ESPACE);
2758 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
2759 sizeof(*submatch_data));
2760 if (submatch_data == NULL)
2761 ERROR_EXIT(REG_ESPACE);
2762 tnfa->submatch_data = submatch_data;
2764 errcode = tre_add_tags(mem, stack, tree, tnfa);
2765 if (errcode != REG_OK)
2766 ERROR_EXIT(errcode);
2770 /* Expand iteration nodes. */
2771 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
2773 if (errcode != REG_OK)
2774 ERROR_EXIT(errcode);
2776 /* Add a dummy node for the final state.
2777 XXX - For certain patterns this dummy node can be optimized away,
2778 for example "a*" or "ab*". Figure out a simple way to detect
2779 this possibility. */
2781 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
2782 if (tmp_ast_r == NULL)
2783 ERROR_EXIT(REG_ESPACE);
2785 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
2787 ERROR_EXIT(REG_ESPACE);
2789 errcode = tre_compute_nfl(mem, stack, tree);
2790 if (errcode != REG_OK)
2791 ERROR_EXIT(errcode);
2793 counts = xmalloc(sizeof(int) * parse_ctx.position);
2795 ERROR_EXIT(REG_ESPACE);
2797 offs = xmalloc(sizeof(int) * parse_ctx.position);
2799 ERROR_EXIT(REG_ESPACE);
2801 for (i = 0; i < parse_ctx.position; i++)
2803 tre_ast_to_tnfa(tree, NULL, counts, NULL);
2806 for (i = 0; i < parse_ctx.position; i++)
2809 add += counts[i] + 1;
2812 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
2813 if (transitions == NULL)
2814 ERROR_EXIT(REG_ESPACE);
2815 tnfa->transitions = transitions;
2816 tnfa->num_transitions = add;
2818 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2819 if (errcode != REG_OK)
2820 ERROR_EXIT(errcode);
2822 tnfa->firstpos_chars = NULL;
2826 while (p->position >= 0)
2832 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2833 if (initial == NULL)
2834 ERROR_EXIT(REG_ESPACE);
2835 tnfa->initial = initial;
2838 for (p = tree->firstpos; p->position >= 0; p++)
2840 initial[i].state = transitions + offs[p->position];
2841 initial[i].state_id = p->position;
2842 initial[i].tags = NULL;
2843 /* Copy the arrays p->tags, and p->params, they are allocated
2844 from a tre_mem object. */
2848 for (j = 0; p->tags[j] >= 0; j++);
2849 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
2850 if (!initial[i].tags)
2851 ERROR_EXIT(REG_ESPACE);
2852 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2854 initial[i].assertions = p->assertions;
2857 initial[i].state = NULL;
2859 tnfa->num_transitions = add;
2860 tnfa->final = transitions + offs[tree->lastpos[0].position];
2861 tnfa->num_states = parse_ctx.position;
2862 tnfa->cflags = cflags;
2864 tre_mem_destroy(mem);
2865 tre_stack_destroy(stack);
2869 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2873 /* Free everything that was allocated and return the error code. */
2874 tre_mem_destroy(mem);
2876 tre_stack_destroy(stack);
2881 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2890 regfree(regex_t *preg)
2894 tre_tnfa_transition_t *trans;
2896 tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2900 for (i = 0; i < tnfa->num_transitions; i++)
2901 if (tnfa->transitions[i].state)
2903 if (tnfa->transitions[i].tags)
2904 xfree(tnfa->transitions[i].tags);
2905 if (tnfa->transitions[i].neg_classes)
2906 xfree(tnfa->transitions[i].neg_classes);
2908 if (tnfa->transitions)
2909 xfree(tnfa->transitions);
2913 for (trans = tnfa->initial; trans->state; trans++)
2918 xfree(tnfa->initial);
2921 if (tnfa->submatch_data)
2923 for (i = 0; i < tnfa->num_submatches; i++)
2924 if (tnfa->submatch_data[i].parents)
2925 xfree(tnfa->submatch_data[i].parents);
2926 xfree(tnfa->submatch_data);
2929 if (tnfa->tag_directions)
2930 xfree(tnfa->tag_directions);
2931 if (tnfa->firstpos_chars)
2932 xfree(tnfa->firstpos_chars);
2933 if (tnfa->minimal_tags)
2934 xfree(tnfa->minimal_tags);