2 regcomp.c - TRE POSIX compatible regex compilation functions.
4 Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions
11 1. Redistributions of source code must retain the above copyright
12 notice, this list of conditions and the following disclaimer.
14 2. Redistributions in binary form must reproduce the above copyright
15 notice, this list of conditions and the following disclaimer in the
16 documentation and/or other materials provided with the distribution.
18 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
43 /***********************************************************************
45 ***********************************************************************/
54 tre_ctype_t *neg_classes;
59 /***********************************************************************
60 from tre-ast.c and tre-ast.h
61 ***********************************************************************/
63 /* The different AST node types. */
71 /* Special subtypes of TRE_LITERAL. */
72 #define EMPTY -1 /* Empty leaf (denotes empty string). */
73 #define ASSERTION -2 /* Assertion leaf. */
74 #define TAG -3 /* Tag leaf. */
75 #define BACKREF -4 /* Back reference leaf. */
77 #define IS_SPECIAL(x) ((x)->code_min < 0)
78 #define IS_EMPTY(x) ((x)->code_min == EMPTY)
79 #define IS_ASSERTION(x) ((x)->code_min == ASSERTION)
80 #define IS_TAG(x) ((x)->code_min == TAG)
81 #define IS_BACKREF(x) ((x)->code_min == BACKREF)
84 /* A generic AST node. All AST nodes consist of this node on the top
85 level with `obj' pointing to the actual content. */
87 tre_ast_type_t type; /* Type of the node. */
88 void *obj; /* Pointer to actual node. */
93 tre_pos_and_tags_t *firstpos;
94 tre_pos_and_tags_t *lastpos;
98 /* A "literal" node. These are created for assertions, back references,
99 tags, matching parameter settings, and all expressions that match one
106 tre_ctype_t *neg_classes;
109 /* A "catenation" node. These are created when two regexps are concatenated.
110 If there are more than one subexpressions in sequence, the `left' part
111 holds all but the last, and `right' part holds the last subexpression
112 (catenation is left associative). */
114 tre_ast_node_t *left;
115 tre_ast_node_t *right;
118 /* An "iteration" node. These are created for the "*", "+", "?", and "{m,n}"
121 /* Subexpression to match. */
123 /* Minimum number of consecutive matches. */
125 /* Maximum number of consecutive matches. */
127 /* If 0, match as many characters as possible, if 1 match as few as
128 possible. Note that this does not always mean the same thing as
129 matching as many/few repetitions as possible. */
130 unsigned int minimal:1;
133 /* An "union" node. These are created for the "|" operator. */
135 tre_ast_node_t *left;
136 tre_ast_node_t *right;
140 static tre_ast_node_t *
141 tre_ast_new_node(tre_mem_t mem, int type, void *obj)
143 tre_ast_node_t *node = tre_mem_calloc(mem, sizeof *node);
149 node->submatch_id = -1;
153 static tre_ast_node_t *
154 tre_ast_new_literal(tre_mem_t mem, int code_min, int code_max, int position)
156 tre_ast_node_t *node;
159 lit = tre_mem_calloc(mem, sizeof *lit);
160 node = tre_ast_new_node(mem, LITERAL, lit);
163 lit->code_min = code_min;
164 lit->code_max = code_max;
165 lit->position = position;
169 static tre_ast_node_t *
170 tre_ast_new_iter(tre_mem_t mem, tre_ast_node_t *arg, int min, int max, int minimal)
172 tre_ast_node_t *node;
173 tre_iteration_t *iter;
175 iter = tre_mem_calloc(mem, sizeof *iter);
176 node = tre_ast_new_node(mem, ITERATION, iter);
182 iter->minimal = minimal;
183 node->num_submatches = arg->num_submatches;
187 static tre_ast_node_t *
188 tre_ast_new_union(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
190 tre_ast_node_t *node;
195 un = tre_mem_calloc(mem, sizeof *un);
196 node = tre_ast_new_node(mem, UNION, un);
201 node->num_submatches = left->num_submatches + right->num_submatches;
205 static tre_ast_node_t *
206 tre_ast_new_catenation(tre_mem_t mem, tre_ast_node_t *left, tre_ast_node_t *right)
208 tre_ast_node_t *node;
209 tre_catenation_t *cat;
213 cat = tre_mem_calloc(mem, sizeof *cat);
214 node = tre_ast_new_node(mem, CATENATION, cat);
219 node->num_submatches = left->num_submatches + right->num_submatches;
224 /***********************************************************************
225 from tre-stack.c and tre-stack.h
226 ***********************************************************************/
228 typedef struct tre_stack_rec tre_stack_t;
230 /* Creates a new stack object. `size' is initial size in bytes, `max_size'
231 is maximum size, and `increment' specifies how much more space will be
232 allocated with realloc() if all space gets used up. Returns the stack
233 object or NULL if out of memory. */
235 tre_stack_new(int size, int max_size, int increment);
237 /* Frees the stack object. */
239 tre_stack_destroy(tre_stack_t *s);
241 /* Returns the current number of objects in the stack. */
243 tre_stack_num_objects(tre_stack_t *s);
245 /* Each tre_stack_push_*(tre_stack_t *s, <type> value) function pushes
246 `value' on top of stack `s'. Returns REG_ESPACE if out of memory.
247 This tries to realloc() more space before failing if maximum size
248 has not yet been reached. Returns REG_OK if successful. */
249 #define declare_pushf(typetag, type) \
250 static reg_errcode_t tre_stack_push_ ## typetag(tre_stack_t *s, type value)
252 declare_pushf(voidptr, void *);
253 declare_pushf(int, int);
255 /* Each tre_stack_pop_*(tre_stack_t *s) function pops the topmost
256 element off of stack `s' and returns it. The stack must not be
258 #define declare_popf(typetag, type) \
259 static type tre_stack_pop_ ## typetag(tre_stack_t *s)
261 declare_popf(voidptr, void *);
262 declare_popf(int, int);
264 /* Just to save some typing. */
265 #define STACK_PUSH(s, typetag, value) \
268 status = tre_stack_push_ ## typetag(s, value); \
270 while (/*CONSTCOND*/0)
272 #define STACK_PUSHX(s, typetag, value) \
274 status = tre_stack_push_ ## typetag(s, value); \
275 if (status != REG_OK) \
279 #define STACK_PUSHR(s, typetag, value) \
281 reg_errcode_t _status; \
282 _status = tre_stack_push_ ## typetag(s, value); \
283 if (_status != REG_OK) \
287 union tre_stack_item {
292 struct tre_stack_rec {
297 union tre_stack_item *stack;
302 tre_stack_new(int size, int max_size, int increment)
306 s = xmalloc(sizeof(*s));
309 s->stack = xmalloc(sizeof(*s->stack) * size);
310 if (s->stack == NULL)
316 s->max_size = max_size;
317 s->increment = increment;
324 tre_stack_destroy(tre_stack_t *s)
331 tre_stack_num_objects(tre_stack_t *s)
337 tre_stack_push(tre_stack_t *s, union tre_stack_item value)
339 if (s->ptr < s->size)
341 s->stack[s->ptr] = value;
346 if (s->size >= s->max_size)
352 union tre_stack_item *new_buffer;
354 new_size = s->size + s->increment;
355 if (new_size > s->max_size)
356 new_size = s->max_size;
357 new_buffer = xrealloc(s->stack, sizeof(*new_buffer) * new_size);
358 if (new_buffer == NULL)
362 assert(new_size > s->size);
364 s->stack = new_buffer;
365 tre_stack_push(s, value);
371 #define define_pushf(typetag, type) \
372 declare_pushf(typetag, type) { \
373 union tre_stack_item item; \
374 item.typetag ## _value = value; \
375 return tre_stack_push(s, item); \
378 define_pushf(int, int)
379 define_pushf(voidptr, void *)
381 #define define_popf(typetag, type) \
382 declare_popf(typetag, type) { \
383 return s->stack[--s->ptr].typetag ## _value; \
386 define_popf(int, int)
387 define_popf(voidptr, void *)
390 /***********************************************************************
391 from tre-parse.c and tre-parse.h
392 ***********************************************************************/
396 /* Memory allocator. The AST is allocated using this. */
398 /* Stack used for keeping track of regexp syntax. */
400 /* The parsed node after a parse function returns. */
402 /* Position in the regexp pattern after a parse function returns. */
404 /* The first character of the regexp. */
406 /* Current submatch ID. */
408 /* Current position (number of literal). */
410 /* The highest back reference or -1 if none seen so far. */
412 /* Compilation flags. */
416 /* Some macros for expanding \w, \s, etc. */
417 static const struct {
419 const char *expansion;
421 {'t', "\t"}, {'n', "\n"}, {'r', "\r"},
422 {'f', "\f"}, {'a', "\a"}, {'e', "\033"},
423 {'w', "[[:alnum:]_]"}, {'W', "[^[:alnum:]_]"}, {'s', "[[:space:]]"},
424 {'S', "[^[:space:]]"}, {'d', "[[:digit:]]"}, {'D', "[^[:digit:]]"},
428 /* Expands a macro delimited by `regex' and `regex_end' to `buf', which
429 must have at least `len' items. Sets buf[0] to zero if the there
430 is no match in `tre_macros'. */
431 static const char *tre_expand_macro(const char *s)
434 for (i = 0; tre_macros[i].c && tre_macros[i].c != *s; i++);
435 return tre_macros[i].expansion;
439 tre_compare_lit(const void *a, const void *b)
441 const tre_literal_t *const *la = a;
442 const tre_literal_t *const *lb = b;
443 /* assumes the range of valid code_min is < INT_MAX */
444 return la[0]->code_min - lb[0]->code_min;
454 static tre_literal_t *tre_new_lit(struct literals *p)
457 if (p->len >= p->cap) {
461 a = xrealloc(p->a, p->cap * sizeof *p->a);
467 *a = tre_mem_calloc(p->mem, sizeof **a);
471 static int add_icase_literals(struct literals *ls, int min, int max)
475 for (c=min; c<=max; ) {
476 /* assumes islower(c) and isupper(c) are exclusive
477 and toupper(c)!=c if islower(c).
478 multiple opposite case characters are not supported */
479 if (tre_islower(c)) {
480 b = e = tre_toupper(c);
481 for (c++, e++; c<=max; c++, e++)
482 if (tre_toupper(c) != e) break;
483 } else if (tre_isupper(c)) {
484 b = e = tre_tolower(c);
485 for (c++, e++; c<=max; c++, e++)
486 if (tre_tolower(c) != e) break;
491 lit = tre_new_lit(ls);
502 /* Maximum number of character classes in a negated bracket expression. */
503 #define MAX_NEG_CLASSES 64
508 tre_ctype_t a[MAX_NEG_CLASSES];
511 // TODO: parse bracket into a set of non-overlapping [lo,hi] ranges
515 Bracket = '[' List ']' | '[^' List ']'
516 List = Term | List Term
517 Term = Char | Range | Chclass | Eqclass
518 Range = Char '-' Char | Char '-' '-'
519 Char = Coll | coll_single
521 Coll = '[.' coll_single '.]' | '[.' coll_multi '.]' | '[.' Meta '.]'
522 Eqclass = '[=' coll_single '=]' | '[=' coll_multi '=]'
523 Chclass = '[:' class ':]'
525 coll_single is a single char collating element but it can be
526 '-' only at the beginning or end of a List and
527 ']' only at the beginning of a List and
528 '^' anywhere except after the openning '['
531 static reg_errcode_t parse_bracket_terms(tre_parse_ctx_t *ctx, const char *s, struct literals *ls, struct neg *neg)
533 const char *start = s;
541 len = mbtowc(&wc, s, -1);
543 return *s ? REG_BADPAT : REG_EBRACK;
544 if (*s == ']' && s != start) {
548 if (*s == '-' && s != start && s[1] != ']' &&
549 /* extension: [a-z--@] is accepted as [a-z]|[--@] */
550 (s[1] != '-' || s[2] == ']'))
552 if (*s == '[' && (s[1] == '.' || s[1] == '='))
553 /* collating symbols and equivalence classes are not supported */
555 if (*s == '[' && s[1] == ':') {
556 char tmp[CHARCLASS_NAME_MAX+1];
558 for (len=0; len < CHARCLASS_NAME_MAX && s[len]; len++) {
562 class = tre_ctype(tmp);
566 if (!class || s[len+1] != ']')
574 if (*s == '-' && s[1] != ']') {
576 len = mbtowc(&wc, s, -1);
578 /* XXX - Should use collation order instead of
579 encoding values in character ranges. */
580 if (len <= 0 || min > max)
586 if (class && neg->negate) {
587 if (neg->len >= MAX_NEG_CLASSES)
589 neg->a[neg->len++] = class;
591 tre_literal_t *lit = tre_new_lit(ls);
599 /* Add opposite-case codepoints if REG_ICASE is present.
600 It seems that POSIX requires that bracket negation
601 should happen before case-folding, but most practical
602 implementations do it the other way around. Changing
603 the order would need efficient representation of
604 case-fold ranges and bracket range sets even with
605 simple patterns so this is ok for now. */
606 if (ctx->cflags & REG_ICASE && !class)
607 if (add_icase_literals(ls, min, max))
613 static reg_errcode_t parse_bracket(tre_parse_ctx_t *ctx, const char *s)
615 int i, max, min, negmax, negmin;
616 tre_ast_node_t *node = 0, *n;
626 ls.a = xmalloc(ls.cap * sizeof *ls.a);
630 neg.negate = *s == '^';
634 err = parse_bracket_terms(ctx, s, &ls, &neg);
636 goto parse_bracket_done;
639 /* Sort the array if we need to negate it. */
640 qsort(ls.a, ls.len, sizeof *ls.a, tre_compare_lit);
641 /* extra lit for the last negated range */
642 lit = tre_new_lit(&ls);
645 goto parse_bracket_done;
647 lit->code_min = TRE_CHAR_MAX+1;
648 lit->code_max = TRE_CHAR_MAX+1;
650 /* negated classes */
652 nc = tre_mem_alloc(ctx->mem, (neg.len+1)*sizeof *neg.a);
655 goto parse_bracket_done;
657 memcpy(nc, neg.a, neg.len*sizeof *neg.a);
662 /* Build a union of the items in the array, negated if necessary. */
664 for (i = 0; i < ls.len; i++) {
671 negmin = MAX(max + 1, negmin);
675 lit->code_min = negmin;
676 lit->code_max = negmax;
679 lit->position = ctx->position;
680 lit->neg_classes = nc;
681 n = tre_ast_new_node(ctx->mem, LITERAL, lit);
682 node = tre_ast_new_union(ctx->mem, node, n);
696 static const char *parse_dup_count(const char *s, int *n)
703 *n = 10 * *n + (*s - '0');
705 if (!isdigit(*s) || *n > RE_DUP_MAX)
711 static reg_errcode_t parse_dup(tre_parse_ctx_t *ctx, const char *s)
715 s = parse_dup_count(s, &min);
717 s = parse_dup_count(s+1, &max);
722 (max < min && max >= 0) ||
726 (!(ctx->cflags & REG_EXTENDED) && *s++ != '\\') ||
731 if (min == 0 && max == 0)
732 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
734 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
741 static int hexval(unsigned c)
743 if (c-'0'<10) return c-'0';
745 if (c-'a'<6) return c-'a'+10;
749 static reg_errcode_t marksub(tre_parse_ctx_t *ctx, tre_ast_node_t *node, int subid)
751 if (node->submatch_id >= 0) {
752 tre_ast_node_t *n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
755 n = tre_ast_new_catenation(ctx->mem, n, node);
758 n->num_submatches = node->num_submatches;
761 node->submatch_id = subid;
762 node->num_submatches++;
769 Regex = Branch | '^' | '$' | '^$' | '^' Branch | Branch '$' | '^' Branch '$'
770 Branch = Atom | Branch Atom
771 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '\(' Branch '\)' | back_ref
772 Dup = '*' | '\{' Count '\}' | '\{' Count ',\}' | '\{' Count ',' Count '\}'
774 (leading ^ and trailing $ in a sub expr may be an anchor or literal as well)
777 Regex = Branch | Regex '|' Branch
778 Branch = Atom | Branch Atom
779 Atom = char | quoted_char | '.' | Bracket | Atom Dup | '(' Regex ')' | '^' | '$'
780 Dup = '*' | '+' | '?' | '{' Count '}' | '{' Count ',}' | '{' Count ',' Count '}'
782 (a*+?, ^*, $+, \X, {, (|a) are unspecified)
785 static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
787 int len, ere = ctx->cflags & REG_EXTENDED;
789 tre_ast_node_t *node;
793 return parse_bracket(ctx, s+1);
795 p = tre_expand_macro(s+1);
797 /* assume \X expansion is a single atom */
798 reg_errcode_t err = parse_atom(ctx, p);
802 /* extensions: \b, \B, \<, \>, \xHH \x{HHHH} */
807 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB, -1);
810 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_WB_NEG, -1);
813 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOW, -1);
816 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOW, -1);
826 for (i=0; i<len && v<0x110000; i++) {
837 node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
843 /* extension: treat \+, \? as repetitions in BRE */
844 /* reject repetitions after empty expression in BRE */
848 /* extension: treat \| as alternation in BRE */
850 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
856 if (!ere && (unsigned)*s-'1' < 9) {
859 node = tre_ast_new_literal(ctx->mem, BACKREF, val, ctx->position++);
860 ctx->max_backref = MAX(val, ctx->max_backref);
862 /* extension: accept unknown escaped char
870 if (ctx->cflags & REG_NEWLINE) {
871 tre_ast_node_t *tmp1, *tmp2;
872 tmp1 = tre_ast_new_literal(ctx->mem, 0, '\n'-1, ctx->position++);
873 tmp2 = tre_ast_new_literal(ctx->mem, '\n'+1, TRE_CHAR_MAX, ctx->position++);
875 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
879 node = tre_ast_new_literal(ctx->mem, 0, TRE_CHAR_MAX, ctx->position++);
884 /* '^' has a special meaning everywhere in EREs, and at beginning of BRE. */
885 if (!ere && s != ctx->re)
887 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_BOL, -1);
891 /* '$' is special everywhere in EREs, and in the end of the string in BREs. */
894 node = tre_ast_new_literal(ctx->mem, ASSERTION, ASSERT_AT_EOL, -1);
902 /* reject repetitions after empty expression in ERE */
909 node = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
913 len = mbtowc(&wc, s, -1);
916 if (ctx->cflags & REG_ICASE && (tre_isupper(wc) || tre_islower(wc))) {
917 tre_ast_node_t *tmp1, *tmp2;
918 /* multiple opposite case characters are not supported */
919 tmp1 = tre_ast_new_literal(ctx->mem, tre_toupper(wc), tre_toupper(wc), ctx->position);
920 tmp2 = tre_ast_new_literal(ctx->mem, tre_tolower(wc), tre_tolower(wc), ctx->position);
922 node = tre_ast_new_union(ctx->mem, tmp1, tmp2);
926 node = tre_ast_new_literal(ctx->mem, wc, wc, ctx->position);
940 #define PUSHPTR(err, s, v) do { \
941 if ((err = tre_stack_push_voidptr(s, v)) != REG_OK) \
945 #define PUSHINT(err, s, v) do { \
946 if ((err = tre_stack_push_int(s, v)) != REG_OK) \
950 static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
952 tre_ast_node_t *nbranch=0, *nunion=0;
953 int ere = ctx->cflags & REG_EXTENDED;
954 const char *s = ctx->re;
958 tre_stack_t *stack = ctx->stack;
960 PUSHINT(err, stack, subid++);
962 if ((!ere && *s == '\\' && s[1] == '(') ||
963 (ere && *s == '(')) {
964 PUSHPTR(err, stack, nunion);
965 PUSHPTR(err, stack, nbranch);
966 PUSHINT(err, stack, subid++);
971 nbranch = nunion = 0;
974 if ((!ere && *s == '\\' && s[1] == ')') ||
975 (ere && *s == ')' && depth)) {
976 ctx->n = tre_ast_new_literal(ctx->mem, EMPTY, -1, -1);
980 err = parse_atom(ctx, s);
987 /* extension: repetitions are rejected after an empty node
988 eg. (+), |*, {2}, but assertions are not treated as empty
989 so ^* or $? are accepted currently. */
991 if (*s!='\\' && *s!='*') {
994 if (*s!='+' && *s!='?' && *s!='{')
999 /* extension: treat \+, \? as repetitions in BRE */
1000 if (*s=='\\' && s[1]!='+' && s[1]!='?' && s[1]!='{')
1005 /* extension: multiple consecutive *+?{,} is unspecified,
1006 but (a+)+ has to be supported so accepting a++ makes
1007 sense, note however that the RE_DUP_MAX limit can be
1008 circumvented: (a{255}){255} uses a lot of memory.. */
1010 err = parse_dup(ctx, s+1);
1021 ctx->n = tre_ast_new_iter(ctx->mem, ctx->n, min, max, 0);
1027 nbranch = tre_ast_new_catenation(ctx->mem, nbranch, ctx->n);
1028 if ((ere && *s == '|') ||
1029 (ere && *s == ')' && depth) ||
1030 (!ere && *s == '\\' && s[1] == ')') ||
1031 /* extension: treat \| as alternation in BRE */
1032 (!ere && *s == '\\' && s[1] == '|') ||
1034 /* extension: empty branch is unspecified (), (|a), (a|)
1035 here they are not rejected but match on empty string */
1037 nunion = tre_ast_new_union(ctx->mem, nunion, nbranch);
1040 if (c == '\\' && s[1] == '|') {
1042 } else if (c == '|') {
1046 if (!depth) return REG_EPAREN;
1048 } else if (c == ')')
1051 err = marksub(ctx, nunion, tre_stack_pop_int(stack));
1054 if (!c && depth<0) {
1055 ctx->submatch_id = subid;
1060 nbranch = tre_stack_pop_voidptr(stack);
1061 nunion = tre_stack_pop_voidptr(stack);
1069 /***********************************************************************
1071 ***********************************************************************/
1076 - Fix tre_ast_to_tnfa() to recurse using a stack instead of recursive
1081 Algorithms to setup tags so that submatch addressing can be done.
1085 /* Inserts a catenation node to the root of the tree given in `node'.
1086 As the left child a new tag with number `tag_id' to `node' is added,
1087 and the right child is the old root. */
1088 static reg_errcode_t
1089 tre_add_tag_left(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1091 tre_catenation_t *c;
1093 c = tre_mem_alloc(mem, sizeof(*c));
1096 c->left = tre_ast_new_literal(mem, TAG, tag_id, -1);
1097 if (c->left == NULL)
1099 c->right = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1100 if (c->right == NULL)
1103 c->right->obj = node->obj;
1104 c->right->type = node->type;
1105 c->right->nullable = -1;
1106 c->right->submatch_id = -1;
1107 c->right->firstpos = NULL;
1108 c->right->lastpos = NULL;
1109 c->right->num_tags = 0;
1111 node->type = CATENATION;
1115 /* Inserts a catenation node to the root of the tree given in `node'.
1116 As the right child a new tag with number `tag_id' to `node' is added,
1117 and the left child is the old root. */
1118 static reg_errcode_t
1119 tre_add_tag_right(tre_mem_t mem, tre_ast_node_t *node, int tag_id)
1121 tre_catenation_t *c;
1123 c = tre_mem_alloc(mem, sizeof(*c));
1126 c->right = tre_ast_new_literal(mem, TAG, tag_id, -1);
1127 if (c->right == NULL)
1129 c->left = tre_mem_alloc(mem, sizeof(tre_ast_node_t));
1130 if (c->left == NULL)
1133 c->left->obj = node->obj;
1134 c->left->type = node->type;
1135 c->left->nullable = -1;
1136 c->left->submatch_id = -1;
1137 c->left->firstpos = NULL;
1138 c->left->lastpos = NULL;
1139 c->left->num_tags = 0;
1141 node->type = CATENATION;
1147 ADDTAGS_AFTER_ITERATION,
1148 ADDTAGS_AFTER_UNION_LEFT,
1149 ADDTAGS_AFTER_UNION_RIGHT,
1150 ADDTAGS_AFTER_CAT_LEFT,
1151 ADDTAGS_AFTER_CAT_RIGHT,
1152 ADDTAGS_SET_SUBMATCH_END
1153 } tre_addtags_symbol_t;
1162 /* Go through `regset' and set submatch data for submatches that are
1165 tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
1169 for (i = 0; regset[i] >= 0; i++)
1171 int id = regset[i] / 2;
1172 int start = !(regset[i] % 2);
1174 tnfa->submatch_data[id].so_tag = tag;
1176 tnfa->submatch_data[id].eo_tag = tag;
1182 /* Adds tags to appropriate locations in the parse tree in `tree', so that
1183 subexpressions marked for submatch addressing can be traced. */
1184 static reg_errcode_t
1185 tre_add_tags(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree,
1188 reg_errcode_t status = REG_OK;
1189 tre_addtags_symbol_t symbol;
1190 tre_ast_node_t *node = tree; /* Tree node we are currently looking at. */
1191 int bottom = tre_stack_num_objects(stack);
1192 /* True for first pass (counting number of needed tags) */
1193 int first_pass = (mem == NULL || tnfa == NULL);
1194 int *regset, *orig_regset;
1195 int num_tags = 0; /* Total number of tags. */
1196 int num_minimals = 0; /* Number of special minimal tags. */
1197 int tag = 0; /* The tag that is to be added next. */
1198 int next_tag = 1; /* Next tag to use after this one. */
1199 int *parents; /* Stack of submatches the current submatch is
1201 int minimal_tag = -1; /* Tag that marks the beginning of a minimal match. */
1202 tre_tag_states_t *saved_states;
1204 tre_tag_direction_t direction = TRE_TAG_MINIMIZE;
1208 tnfa->minimal_tags[0] = -1;
1211 regset = xmalloc(sizeof(*regset) * ((tnfa->num_submatches + 1) * 2));
1215 orig_regset = regset;
1217 parents = xmalloc(sizeof(*parents) * (tnfa->num_submatches + 1));
1218 if (parents == NULL)
1225 saved_states = xmalloc(sizeof(*saved_states) * (tnfa->num_submatches + 1));
1226 if (saved_states == NULL)
1235 for (i = 0; i <= tnfa->num_submatches; i++)
1236 saved_states[i].tag = -1;
1239 STACK_PUSH(stack, voidptr, node);
1240 STACK_PUSH(stack, int, ADDTAGS_RECURSE);
1242 while (tre_stack_num_objects(stack) > bottom)
1244 if (status != REG_OK)
1247 symbol = (tre_addtags_symbol_t)tre_stack_pop_int(stack);
1251 case ADDTAGS_SET_SUBMATCH_END:
1253 int id = tre_stack_pop_int(stack);
1256 /* Add end of this submatch to regset. */
1257 for (i = 0; regset[i] >= 0; i++);
1258 regset[i] = id * 2 + 1;
1261 /* Pop this submatch from the parents stack. */
1262 for (i = 0; parents[i] >= 0; i++);
1263 parents[i - 1] = -1;
1267 case ADDTAGS_RECURSE:
1268 node = tre_stack_pop_voidptr(stack);
1270 if (node->submatch_id >= 0)
1272 int id = node->submatch_id;
1276 /* Add start of this submatch to regset. */
1277 for (i = 0; regset[i] >= 0; i++);
1283 for (i = 0; parents[i] >= 0; i++);
1284 tnfa->submatch_data[id].parents = NULL;
1287 int *p = xmalloc(sizeof(*p) * (i + 1));
1290 status = REG_ESPACE;
1293 assert(tnfa->submatch_data[id].parents == NULL);
1294 tnfa->submatch_data[id].parents = p;
1295 for (i = 0; parents[i] >= 0; i++)
1301 /* Add end of this submatch to regset after processing this
1303 STACK_PUSHX(stack, int, node->submatch_id);
1304 STACK_PUSHX(stack, int, ADDTAGS_SET_SUBMATCH_END);
1311 tre_literal_t *lit = node->obj;
1313 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1318 /* Regset is not empty, so add a tag before the
1319 literal or backref. */
1322 status = tre_add_tag_left(mem, node, tag);
1323 tnfa->tag_directions[tag] = direction;
1324 if (minimal_tag >= 0)
1326 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1327 tnfa->minimal_tags[i] = tag;
1328 tnfa->minimal_tags[i + 1] = minimal_tag;
1329 tnfa->minimal_tags[i + 2] = -1;
1333 tre_purge_regset(regset, tnfa, tag);
1348 assert(!IS_TAG(lit));
1354 tre_catenation_t *cat = node->obj;
1355 tre_ast_node_t *left = cat->left;
1356 tre_ast_node_t *right = cat->right;
1357 int reserved_tag = -1;
1360 /* After processing right child. */
1361 STACK_PUSHX(stack, voidptr, node);
1362 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_RIGHT);
1364 /* Process right child. */
1365 STACK_PUSHX(stack, voidptr, right);
1366 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1368 /* After processing left child. */
1369 STACK_PUSHX(stack, int, next_tag + left->num_tags);
1370 if (left->num_tags > 0 && right->num_tags > 0)
1372 /* Reserve the next tag to the right child. */
1373 reserved_tag = next_tag;
1376 STACK_PUSHX(stack, int, reserved_tag);
1377 STACK_PUSHX(stack, int, ADDTAGS_AFTER_CAT_LEFT);
1379 /* Process left child. */
1380 STACK_PUSHX(stack, voidptr, left);
1381 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1387 tre_iteration_t *iter = node->obj;
1391 STACK_PUSHX(stack, int, regset[0] >= 0 || iter->minimal);
1395 STACK_PUSHX(stack, int, tag);
1396 STACK_PUSHX(stack, int, iter->minimal);
1398 STACK_PUSHX(stack, voidptr, node);
1399 STACK_PUSHX(stack, int, ADDTAGS_AFTER_ITERATION);
1401 STACK_PUSHX(stack, voidptr, iter->arg);
1402 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1404 /* Regset is not empty, so add a tag here. */
1405 if (regset[0] >= 0 || iter->minimal)
1410 status = tre_add_tag_left(mem, node, tag);
1412 tnfa->tag_directions[tag] = TRE_TAG_MAXIMIZE;
1414 tnfa->tag_directions[tag] = direction;
1415 if (minimal_tag >= 0)
1417 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1418 tnfa->minimal_tags[i] = tag;
1419 tnfa->minimal_tags[i + 1] = minimal_tag;
1420 tnfa->minimal_tags[i + 2] = -1;
1424 tre_purge_regset(regset, tnfa, tag);
1432 direction = TRE_TAG_MINIMIZE;
1437 tre_union_t *uni = node->obj;
1438 tre_ast_node_t *left = uni->left;
1439 tre_ast_node_t *right = uni->right;
1445 left_tag = next_tag;
1446 right_tag = next_tag + 1;
1451 right_tag = next_tag;
1454 /* After processing right child. */
1455 STACK_PUSHX(stack, int, right_tag);
1456 STACK_PUSHX(stack, int, left_tag);
1457 STACK_PUSHX(stack, voidptr, regset);
1458 STACK_PUSHX(stack, int, regset[0] >= 0);
1459 STACK_PUSHX(stack, voidptr, node);
1460 STACK_PUSHX(stack, voidptr, right);
1461 STACK_PUSHX(stack, voidptr, left);
1462 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_RIGHT);
1464 /* Process right child. */
1465 STACK_PUSHX(stack, voidptr, right);
1466 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1468 /* After processing left child. */
1469 STACK_PUSHX(stack, int, ADDTAGS_AFTER_UNION_LEFT);
1471 /* Process left child. */
1472 STACK_PUSHX(stack, voidptr, left);
1473 STACK_PUSHX(stack, int, ADDTAGS_RECURSE);
1475 /* Regset is not empty, so add a tag here. */
1481 status = tre_add_tag_left(mem, node, tag);
1482 tnfa->tag_directions[tag] = direction;
1483 if (minimal_tag >= 0)
1485 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1486 tnfa->minimal_tags[i] = tag;
1487 tnfa->minimal_tags[i + 1] = minimal_tag;
1488 tnfa->minimal_tags[i + 2] = -1;
1492 tre_purge_regset(regset, tnfa, tag);
1501 if (node->num_submatches > 0)
1503 /* The next two tags are reserved for markers. */
1513 if (node->submatch_id >= 0)
1516 /* Push this submatch on the parents stack. */
1517 for (i = 0; parents[i] >= 0; i++);
1518 parents[i] = node->submatch_id;
1519 parents[i + 1] = -1;
1522 break; /* end case: ADDTAGS_RECURSE */
1524 case ADDTAGS_AFTER_ITERATION:
1528 node = tre_stack_pop_voidptr(stack);
1531 node->num_tags = ((tre_iteration_t *)node->obj)->arg->num_tags
1532 + tre_stack_pop_int(stack);
1537 minimal = tre_stack_pop_int(stack);
1538 enter_tag = tre_stack_pop_int(stack);
1540 minimal_tag = enter_tag;
1546 direction = TRE_TAG_MINIMIZE;
1548 direction = TRE_TAG_MAXIMIZE;
1553 case ADDTAGS_AFTER_CAT_LEFT:
1555 int new_tag = tre_stack_pop_int(stack);
1556 next_tag = tre_stack_pop_int(stack);
1564 case ADDTAGS_AFTER_CAT_RIGHT:
1565 node = tre_stack_pop_voidptr(stack);
1567 node->num_tags = ((tre_catenation_t *)node->obj)->left->num_tags
1568 + ((tre_catenation_t *)node->obj)->right->num_tags;
1571 case ADDTAGS_AFTER_UNION_LEFT:
1572 /* Lift the bottom of the `regset' array so that when processing
1573 the right operand the items currently in the array are
1574 invisible. The original bottom was saved at ADDTAGS_UNION and
1575 will be restored at ADDTAGS_AFTER_UNION_RIGHT below. */
1576 while (*regset >= 0)
1580 case ADDTAGS_AFTER_UNION_RIGHT:
1582 int added_tags, tag_left, tag_right;
1583 tre_ast_node_t *left = tre_stack_pop_voidptr(stack);
1584 tre_ast_node_t *right = tre_stack_pop_voidptr(stack);
1585 node = tre_stack_pop_voidptr(stack);
1586 added_tags = tre_stack_pop_int(stack);
1589 node->num_tags = ((tre_union_t *)node->obj)->left->num_tags
1590 + ((tre_union_t *)node->obj)->right->num_tags + added_tags
1591 + ((node->num_submatches > 0) ? 2 : 0);
1593 regset = tre_stack_pop_voidptr(stack);
1594 tag_left = tre_stack_pop_int(stack);
1595 tag_right = tre_stack_pop_int(stack);
1597 /* Add tags after both children, the left child gets a smaller
1598 tag than the right child. This guarantees that we prefer
1599 the left child over the right child. */
1600 /* XXX - This is not always necessary (if the children have
1601 tags which must be seen for every match of that child). */
1602 /* XXX - Check if this is the only place where tre_add_tag_right
1603 is used. If so, use tre_add_tag_left (putting the tag before
1604 the child as opposed after the child) and throw away
1605 tre_add_tag_right. */
1606 if (node->num_submatches > 0)
1610 status = tre_add_tag_right(mem, left, tag_left);
1611 tnfa->tag_directions[tag_left] = TRE_TAG_MAXIMIZE;
1612 if (status == REG_OK)
1613 status = tre_add_tag_right(mem, right, tag_right);
1614 tnfa->tag_directions[tag_right] = TRE_TAG_MAXIMIZE;
1618 direction = TRE_TAG_MAXIMIZE;
1626 } /* end switch(symbol) */
1627 } /* end while(tre_stack_num_objects(stack) > bottom) */
1630 tre_purge_regset(regset, tnfa, tag);
1632 if (!first_pass && minimal_tag >= 0)
1635 for (i = 0; tnfa->minimal_tags[i] >= 0; i++);
1636 tnfa->minimal_tags[i] = tag;
1637 tnfa->minimal_tags[i + 1] = minimal_tag;
1638 tnfa->minimal_tags[i + 2] = -1;
1643 assert(tree->num_tags == num_tags);
1644 tnfa->end_tag = num_tags;
1645 tnfa->num_tags = num_tags;
1646 tnfa->num_minimals = num_minimals;
1649 xfree(saved_states);
1656 AST to TNFA compilation routines.
1662 } tre_copyast_symbol_t;
1664 /* Flags for tre_copy_ast(). */
1665 #define COPY_REMOVE_TAGS 1
1666 #define COPY_MAXIMIZE_FIRST_TAG 2
1668 static reg_errcode_t
1669 tre_copy_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1670 int flags, int *pos_add, tre_tag_direction_t *tag_directions,
1671 tre_ast_node_t **copy, int *max_pos)
1673 reg_errcode_t status = REG_OK;
1674 int bottom = tre_stack_num_objects(stack);
1677 tre_ast_node_t **result = copy;
1678 tre_copyast_symbol_t symbol;
1680 STACK_PUSH(stack, voidptr, ast);
1681 STACK_PUSH(stack, int, COPY_RECURSE);
1683 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1685 tre_ast_node_t *node;
1686 if (status != REG_OK)
1689 symbol = (tre_copyast_symbol_t)tre_stack_pop_int(stack);
1692 case COPY_SET_RESULT_PTR:
1693 result = tre_stack_pop_voidptr(stack);
1696 node = tre_stack_pop_voidptr(stack);
1701 tre_literal_t *lit = node->obj;
1702 int pos = lit->position;
1703 int min = lit->code_min;
1704 int max = lit->code_max;
1705 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1707 /* XXX - e.g. [ab] has only one position but two
1708 nodes, so we are creating holes in the state space
1709 here. Not fatal, just wastes memory. */
1713 else if (IS_TAG(lit) && (flags & COPY_REMOVE_TAGS))
1715 /* Change this tag to empty. */
1719 else if (IS_TAG(lit) && (flags & COPY_MAXIMIZE_FIRST_TAG)
1722 /* Maximize the first tag. */
1723 tag_directions[max] = TRE_TAG_MAXIMIZE;
1726 *result = tre_ast_new_literal(mem, min, max, pos);
1727 if (*result == NULL)
1728 status = REG_ESPACE;
1730 tre_literal_t *p = (*result)->obj;
1731 p->class = lit->class;
1732 p->neg_classes = lit->neg_classes;
1741 tre_union_t *uni = node->obj;
1743 *result = tre_ast_new_union(mem, uni->left, uni->right);
1744 if (*result == NULL)
1746 status = REG_ESPACE;
1749 tmp = (*result)->obj;
1750 result = &tmp->left;
1751 STACK_PUSHX(stack, voidptr, uni->right);
1752 STACK_PUSHX(stack, int, COPY_RECURSE);
1753 STACK_PUSHX(stack, voidptr, &tmp->right);
1754 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1755 STACK_PUSHX(stack, voidptr, uni->left);
1756 STACK_PUSHX(stack, int, COPY_RECURSE);
1761 tre_catenation_t *cat = node->obj;
1762 tre_catenation_t *tmp;
1763 *result = tre_ast_new_catenation(mem, cat->left, cat->right);
1764 if (*result == NULL)
1766 status = REG_ESPACE;
1769 tmp = (*result)->obj;
1772 result = &tmp->left;
1774 STACK_PUSHX(stack, voidptr, cat->right);
1775 STACK_PUSHX(stack, int, COPY_RECURSE);
1776 STACK_PUSHX(stack, voidptr, &tmp->right);
1777 STACK_PUSHX(stack, int, COPY_SET_RESULT_PTR);
1778 STACK_PUSHX(stack, voidptr, cat->left);
1779 STACK_PUSHX(stack, int, COPY_RECURSE);
1784 tre_iteration_t *iter = node->obj;
1785 STACK_PUSHX(stack, voidptr, iter->arg);
1786 STACK_PUSHX(stack, int, COPY_RECURSE);
1787 *result = tre_ast_new_iter(mem, iter->arg, iter->min,
1788 iter->max, iter->minimal);
1789 if (*result == NULL)
1791 status = REG_ESPACE;
1794 iter = (*result)->obj;
1795 result = &iter->arg;
1805 *pos_add += num_copied;
1812 } tre_expand_ast_symbol_t;
1814 /* Expands each iteration node that has a finite nonzero minimum or maximum
1815 iteration count to a catenated sequence of copies of the node. */
1816 static reg_errcode_t
1817 tre_expand_ast(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *ast,
1818 int *position, tre_tag_direction_t *tag_directions)
1820 reg_errcode_t status = REG_OK;
1821 int bottom = tre_stack_num_objects(stack);
1823 int pos_add_total = 0;
1827 STACK_PUSHR(stack, voidptr, ast);
1828 STACK_PUSHR(stack, int, EXPAND_RECURSE);
1829 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
1831 tre_ast_node_t *node;
1832 tre_expand_ast_symbol_t symbol;
1834 if (status != REG_OK)
1837 symbol = (tre_expand_ast_symbol_t)tre_stack_pop_int(stack);
1838 node = tre_stack_pop_voidptr(stack);
1841 case EXPAND_RECURSE:
1846 tre_literal_t *lit= node->obj;
1847 if (!IS_SPECIAL(lit) || IS_BACKREF(lit))
1849 lit->position += pos_add;
1850 if (lit->position > max_pos)
1851 max_pos = lit->position;
1857 tre_union_t *uni = node->obj;
1858 STACK_PUSHX(stack, voidptr, uni->right);
1859 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1860 STACK_PUSHX(stack, voidptr, uni->left);
1861 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1866 tre_catenation_t *cat = node->obj;
1867 STACK_PUSHX(stack, voidptr, cat->right);
1868 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1869 STACK_PUSHX(stack, voidptr, cat->left);
1870 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1875 tre_iteration_t *iter = node->obj;
1876 STACK_PUSHX(stack, int, pos_add);
1877 STACK_PUSHX(stack, voidptr, node);
1878 STACK_PUSHX(stack, int, EXPAND_AFTER_ITER);
1879 STACK_PUSHX(stack, voidptr, iter->arg);
1880 STACK_PUSHX(stack, int, EXPAND_RECURSE);
1881 /* If we are going to expand this node at EXPAND_AFTER_ITER
1882 then don't increase the `pos' fields of the nodes now, it
1883 will get done when expanding. */
1884 if (iter->min > 1 || iter->max > 1)
1894 case EXPAND_AFTER_ITER:
1896 tre_iteration_t *iter = node->obj;
1898 pos_add = tre_stack_pop_int(stack);
1899 pos_add_last = pos_add;
1900 if (iter->min > 1 || iter->max > 1)
1902 tre_ast_node_t *seq1 = NULL, *seq2 = NULL;
1904 int pos_add_save = pos_add;
1906 /* Create a catenated sequence of copies of the node. */
1907 for (j = 0; j < iter->min; j++)
1909 tre_ast_node_t *copy;
1910 /* Remove tags from all but the last copy. */
1911 int flags = ((j + 1 < iter->min)
1913 : COPY_MAXIMIZE_FIRST_TAG);
1914 pos_add_save = pos_add;
1915 status = tre_copy_ast(mem, stack, iter->arg, flags,
1916 &pos_add, tag_directions, ©,
1918 if (status != REG_OK)
1921 seq1 = tre_ast_new_catenation(mem, seq1, copy);
1928 if (iter->max == -1)
1930 /* No upper limit. */
1931 pos_add_save = pos_add;
1932 status = tre_copy_ast(mem, stack, iter->arg, 0,
1933 &pos_add, NULL, &seq2, &max_pos);
1934 if (status != REG_OK)
1936 seq2 = tre_ast_new_iter(mem, seq2, 0, -1, 0);
1942 for (j = iter->min; j < iter->max; j++)
1944 tre_ast_node_t *tmp, *copy;
1945 pos_add_save = pos_add;
1946 status = tre_copy_ast(mem, stack, iter->arg, 0,
1947 &pos_add, NULL, ©, &max_pos);
1948 if (status != REG_OK)
1951 seq2 = tre_ast_new_catenation(mem, copy, seq2);
1956 tmp = tre_ast_new_literal(mem, EMPTY, -1, -1);
1959 seq2 = tre_ast_new_union(mem, tmp, seq2);
1965 pos_add = pos_add_save;
1968 else if (seq2 != NULL)
1969 seq1 = tre_ast_new_catenation(mem, seq1, seq2);
1972 node->obj = seq1->obj;
1973 node->type = seq1->type;
1977 pos_add_total += pos_add - pos_add_last;
1978 if (iter_depth == 0)
1979 pos_add = pos_add_total;
1989 *position += pos_add_total;
1991 /* `max_pos' should never be larger than `*position' if the above
1992 code works, but just an extra safeguard let's make sure
1993 `*position' is set large enough so enough memory will be
1994 allocated for the transition table. */
1995 if (max_pos > *position)
1996 *position = max_pos;
2001 static tre_pos_and_tags_t *
2002 tre_set_empty(tre_mem_t mem)
2004 tre_pos_and_tags_t *new_set;
2006 new_set = tre_mem_calloc(mem, sizeof(*new_set));
2007 if (new_set == NULL)
2010 new_set[0].position = -1;
2011 new_set[0].code_min = -1;
2012 new_set[0].code_max = -1;
2017 static tre_pos_and_tags_t *
2018 tre_set_one(tre_mem_t mem, int position, int code_min, int code_max,
2019 tre_ctype_t class, tre_ctype_t *neg_classes, int backref)
2021 tre_pos_and_tags_t *new_set;
2023 new_set = tre_mem_calloc(mem, sizeof(*new_set) * 2);
2024 if (new_set == NULL)
2027 new_set[0].position = position;
2028 new_set[0].code_min = code_min;
2029 new_set[0].code_max = code_max;
2030 new_set[0].class = class;
2031 new_set[0].neg_classes = neg_classes;
2032 new_set[0].backref = backref;
2033 new_set[1].position = -1;
2034 new_set[1].code_min = -1;
2035 new_set[1].code_max = -1;
2040 static tre_pos_and_tags_t *
2041 tre_set_union(tre_mem_t mem, tre_pos_and_tags_t *set1, tre_pos_and_tags_t *set2,
2042 int *tags, int assertions)
2045 tre_pos_and_tags_t *new_set;
2049 for (num_tags = 0; tags != NULL && tags[num_tags] >= 0; num_tags++);
2050 for (s1 = 0; set1[s1].position >= 0; s1++);
2051 for (s2 = 0; set2[s2].position >= 0; s2++);
2052 new_set = tre_mem_calloc(mem, sizeof(*new_set) * (s1 + s2 + 1));
2056 for (s1 = 0; set1[s1].position >= 0; s1++)
2058 new_set[s1].position = set1[s1].position;
2059 new_set[s1].code_min = set1[s1].code_min;
2060 new_set[s1].code_max = set1[s1].code_max;
2061 new_set[s1].assertions = set1[s1].assertions | assertions;
2062 new_set[s1].class = set1[s1].class;
2063 new_set[s1].neg_classes = set1[s1].neg_classes;
2064 new_set[s1].backref = set1[s1].backref;
2065 if (set1[s1].tags == NULL && tags == NULL)
2066 new_set[s1].tags = NULL;
2069 for (i = 0; set1[s1].tags != NULL && set1[s1].tags[i] >= 0; i++);
2070 new_tags = tre_mem_alloc(mem, (sizeof(*new_tags)
2071 * (i + num_tags + 1)));
2072 if (new_tags == NULL)
2074 for (j = 0; j < i; j++)
2075 new_tags[j] = set1[s1].tags[j];
2076 for (i = 0; i < num_tags; i++)
2077 new_tags[j + i] = tags[i];
2078 new_tags[j + i] = -1;
2079 new_set[s1].tags = new_tags;
2083 for (s2 = 0; set2[s2].position >= 0; s2++)
2085 new_set[s1 + s2].position = set2[s2].position;
2086 new_set[s1 + s2].code_min = set2[s2].code_min;
2087 new_set[s1 + s2].code_max = set2[s2].code_max;
2088 /* XXX - why not | assertions here as well? */
2089 new_set[s1 + s2].assertions = set2[s2].assertions;
2090 new_set[s1 + s2].class = set2[s2].class;
2091 new_set[s1 + s2].neg_classes = set2[s2].neg_classes;
2092 new_set[s1 + s2].backref = set2[s2].backref;
2093 if (set2[s2].tags == NULL)
2094 new_set[s1 + s2].tags = NULL;
2097 for (i = 0; set2[s2].tags[i] >= 0; i++);
2098 new_tags = tre_mem_alloc(mem, sizeof(*new_tags) * (i + 1));
2099 if (new_tags == NULL)
2101 for (j = 0; j < i; j++)
2102 new_tags[j] = set2[s2].tags[j];
2104 new_set[s1 + s2].tags = new_tags;
2107 new_set[s1 + s2].position = -1;
2111 /* Finds the empty path through `node' which is the one that should be
2112 taken according to POSIX.2 rules, and adds the tags on that path to
2113 `tags'. `tags' may be NULL. If `num_tags_seen' is not NULL, it is
2114 set to the number of tags seen on the path. */
2115 static reg_errcode_t
2116 tre_match_empty(tre_stack_t *stack, tre_ast_node_t *node, int *tags,
2117 int *assertions, int *num_tags_seen)
2121 tre_catenation_t *cat;
2122 tre_iteration_t *iter;
2124 int bottom = tre_stack_num_objects(stack);
2125 reg_errcode_t status = REG_OK;
2129 status = tre_stack_push_voidptr(stack, node);
2131 /* Walk through the tree recursively. */
2132 while (status == REG_OK && tre_stack_num_objects(stack) > bottom)
2134 node = tre_stack_pop_voidptr(stack);
2139 lit = (tre_literal_t *)node->obj;
2140 switch (lit->code_min)
2143 if (lit->code_max >= 0)
2147 /* Add the tag to `tags'. */
2148 for (i = 0; tags[i] >= 0; i++)
2149 if (tags[i] == lit->code_max)
2153 tags[i] = lit->code_max;
2162 assert(lit->code_max >= 1
2163 || lit->code_max <= ASSERT_LAST);
2164 if (assertions != NULL)
2165 *assertions |= lit->code_max;
2176 /* Subexpressions starting earlier take priority over ones
2177 starting later, so we prefer the left subexpression over the
2178 right subexpression. */
2179 uni = (tre_union_t *)node->obj;
2180 if (uni->left->nullable)
2181 STACK_PUSHX(stack, voidptr, uni->left)
2182 else if (uni->right->nullable)
2183 STACK_PUSHX(stack, voidptr, uni->right)
2189 /* The path must go through both children. */
2190 cat = (tre_catenation_t *)node->obj;
2191 assert(cat->left->nullable);
2192 assert(cat->right->nullable);
2193 STACK_PUSHX(stack, voidptr, cat->left);
2194 STACK_PUSHX(stack, voidptr, cat->right);
2198 /* A match with an empty string is preferred over no match at
2199 all, so we go through the argument if possible. */
2200 iter = (tre_iteration_t *)node->obj;
2201 if (iter->arg->nullable)
2202 STACK_PUSHX(stack, voidptr, iter->arg);
2218 NFL_POST_CATENATION,
2220 } tre_nfl_stack_symbol_t;
2223 /* Computes and fills in the fields `nullable', `firstpos', and `lastpos' for
2224 the nodes of the AST `tree'. */
2225 static reg_errcode_t
2226 tre_compute_nfl(tre_mem_t mem, tre_stack_t *stack, tre_ast_node_t *tree)
2228 int bottom = tre_stack_num_objects(stack);
2230 STACK_PUSHR(stack, voidptr, tree);
2231 STACK_PUSHR(stack, int, NFL_RECURSE);
2233 while (tre_stack_num_objects(stack) > bottom)
2235 tre_nfl_stack_symbol_t symbol;
2236 tre_ast_node_t *node;
2238 symbol = (tre_nfl_stack_symbol_t)tre_stack_pop_int(stack);
2239 node = tre_stack_pop_voidptr(stack);
2247 tre_literal_t *lit = (tre_literal_t *)node->obj;
2248 if (IS_BACKREF(lit))
2250 /* Back references: nullable = false, firstpos = {i},
2253 node->firstpos = tre_set_one(mem, lit->position, 0,
2254 TRE_CHAR_MAX, 0, NULL, -1);
2255 if (!node->firstpos)
2257 node->lastpos = tre_set_one(mem, lit->position, 0,
2258 TRE_CHAR_MAX, 0, NULL,
2259 (int)lit->code_max);
2263 else if (lit->code_min < 0)
2265 /* Tags, empty strings, params, and zero width assertions:
2266 nullable = true, firstpos = {}, and lastpos = {}. */
2268 node->firstpos = tre_set_empty(mem);
2269 if (!node->firstpos)
2271 node->lastpos = tre_set_empty(mem);
2277 /* Literal at position i: nullable = false, firstpos = {i},
2281 tre_set_one(mem, lit->position, (int)lit->code_min,
2282 (int)lit->code_max, 0, NULL, -1);
2283 if (!node->firstpos)
2285 node->lastpos = tre_set_one(mem, lit->position,
2288 lit->class, lit->neg_classes,
2297 /* Compute the attributes for the two subtrees, and after that
2299 STACK_PUSHR(stack, voidptr, node);
2300 STACK_PUSHR(stack, int, NFL_POST_UNION);
2301 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->right);
2302 STACK_PUSHR(stack, int, NFL_RECURSE);
2303 STACK_PUSHR(stack, voidptr, ((tre_union_t *)node->obj)->left);
2304 STACK_PUSHR(stack, int, NFL_RECURSE);
2308 /* Compute the attributes for the two subtrees, and after that
2310 STACK_PUSHR(stack, voidptr, node);
2311 STACK_PUSHR(stack, int, NFL_POST_CATENATION);
2312 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->right);
2313 STACK_PUSHR(stack, int, NFL_RECURSE);
2314 STACK_PUSHR(stack, voidptr, ((tre_catenation_t *)node->obj)->left);
2315 STACK_PUSHR(stack, int, NFL_RECURSE);
2319 /* Compute the attributes for the subtree, and after that for
2321 STACK_PUSHR(stack, voidptr, node);
2322 STACK_PUSHR(stack, int, NFL_POST_ITERATION);
2323 STACK_PUSHR(stack, voidptr, ((tre_iteration_t *)node->obj)->arg);
2324 STACK_PUSHR(stack, int, NFL_RECURSE);
2327 break; /* end case: NFL_RECURSE */
2329 case NFL_POST_UNION:
2331 tre_union_t *uni = (tre_union_t *)node->obj;
2332 node->nullable = uni->left->nullable || uni->right->nullable;
2333 node->firstpos = tre_set_union(mem, uni->left->firstpos,
2334 uni->right->firstpos, NULL, 0);
2335 if (!node->firstpos)
2337 node->lastpos = tre_set_union(mem, uni->left->lastpos,
2338 uni->right->lastpos, NULL, 0);
2344 case NFL_POST_ITERATION:
2346 tre_iteration_t *iter = (tre_iteration_t *)node->obj;
2348 if (iter->min == 0 || iter->arg->nullable)
2352 node->firstpos = iter->arg->firstpos;
2353 node->lastpos = iter->arg->lastpos;
2357 case NFL_POST_CATENATION:
2359 int num_tags, *tags, assertions;
2360 reg_errcode_t status;
2361 tre_catenation_t *cat = node->obj;
2362 node->nullable = cat->left->nullable && cat->right->nullable;
2364 /* Compute firstpos. */
2365 if (cat->left->nullable)
2367 /* The left side matches the empty string. Make a first pass
2368 with tre_match_empty() to get the number of tags and
2370 status = tre_match_empty(stack, cat->left,
2371 NULL, NULL, &num_tags);
2372 if (status != REG_OK)
2374 /* Allocate arrays for the tags and parameters. */
2375 tags = xmalloc(sizeof(*tags) * (num_tags + 1));
2380 /* Second pass with tre_mach_empty() to get the list of
2381 tags and parameters. */
2382 status = tre_match_empty(stack, cat->left, tags,
2384 if (status != REG_OK)
2390 tre_set_union(mem, cat->right->firstpos, cat->left->firstpos,
2393 if (!node->firstpos)
2398 node->firstpos = cat->left->firstpos;
2401 /* Compute lastpos. */
2402 if (cat->right->nullable)
2404 /* The right side matches the empty string. Make a first pass
2405 with tre_match_empty() to get the number of tags and
2407 status = tre_match_empty(stack, cat->right,
2408 NULL, NULL, &num_tags);
2409 if (status != REG_OK)
2411 /* Allocate arrays for the tags and parameters. */
2412 tags = xmalloc(sizeof(int) * (num_tags + 1));
2417 /* Second pass with tre_mach_empty() to get the list of
2418 tags and parameters. */
2419 status = tre_match_empty(stack, cat->right, tags,
2421 if (status != REG_OK)
2427 tre_set_union(mem, cat->left->lastpos, cat->right->lastpos,
2435 node->lastpos = cat->right->lastpos;
2450 /* Adds a transition from each position in `p1' to each position in `p2'. */
2451 static reg_errcode_t
2452 tre_make_trans(tre_pos_and_tags_t *p1, tre_pos_and_tags_t *p2,
2453 tre_tnfa_transition_t *transitions,
2454 int *counts, int *offs)
2456 tre_pos_and_tags_t *orig_p2 = p2;
2457 tre_tnfa_transition_t *trans;
2458 int i, j, k, l, dup, prev_p2_pos;
2460 if (transitions != NULL)
2461 while (p1->position >= 0)
2465 while (p2->position >= 0)
2467 /* Optimization: if this position was already handled, skip it. */
2468 if (p2->position == prev_p2_pos)
2473 prev_p2_pos = p2->position;
2474 /* Set `trans' to point to the next unused transition from
2475 position `p1->position'. */
2476 trans = transitions + offs[p1->position];
2477 while (trans->state != NULL)
2480 /* If we find a previous transition from `p1->position' to
2481 `p2->position', it is overwritten. This can happen only
2482 if there are nested loops in the regexp, like in "((a)*)*".
2483 In POSIX.2 repetition using the outer loop is always
2484 preferred over using the inner loop. Therefore the
2485 transition for the inner loop is useless and can be thrown
2487 /* XXX - The same position is used for all nodes in a bracket
2488 expression, so this optimization cannot be used (it will
2489 break bracket expressions) unless I figure out a way to
2491 if (trans->state_id == p2->position)
2499 if (trans->state == NULL)
2500 (trans + 1)->state = NULL;
2501 /* Use the character ranges, assertions, etc. from `p1' for
2502 the transition from `p1' to `p2'. */
2503 trans->code_min = p1->code_min;
2504 trans->code_max = p1->code_max;
2505 trans->state = transitions + offs[p2->position];
2506 trans->state_id = p2->position;
2507 trans->assertions = p1->assertions | p2->assertions
2508 | (p1->class ? ASSERT_CHAR_CLASS : 0)
2509 | (p1->neg_classes != NULL ? ASSERT_CHAR_CLASS_NEG : 0);
2510 if (p1->backref >= 0)
2512 assert((trans->assertions & ASSERT_CHAR_CLASS) == 0);
2513 assert(p2->backref < 0);
2514 trans->u.backref = p1->backref;
2515 trans->assertions |= ASSERT_BACKREF;
2518 trans->u.class = p1->class;
2519 if (p1->neg_classes != NULL)
2521 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++);
2522 trans->neg_classes =
2523 xmalloc(sizeof(*trans->neg_classes) * (i + 1));
2524 if (trans->neg_classes == NULL)
2526 for (i = 0; p1->neg_classes[i] != (tre_ctype_t)0; i++)
2527 trans->neg_classes[i] = p1->neg_classes[i];
2528 trans->neg_classes[i] = (tre_ctype_t)0;
2531 trans->neg_classes = NULL;
2533 /* Find out how many tags this transition has. */
2535 if (p1->tags != NULL)
2536 while(p1->tags[i] >= 0)
2539 if (p2->tags != NULL)
2540 while(p2->tags[j] >= 0)
2543 /* If we are overwriting a transition, free the old tag array. */
2544 if (trans->tags != NULL)
2548 /* If there were any tags, allocate an array and fill it. */
2551 trans->tags = xmalloc(sizeof(*trans->tags) * (i + j + 1));
2555 if (p1->tags != NULL)
2556 while(p1->tags[i] >= 0)
2558 trans->tags[i] = p1->tags[i];
2563 if (p2->tags != NULL)
2564 while (p2->tags[j] >= 0)
2566 /* Don't add duplicates. */
2568 for (k = 0; k < i; k++)
2569 if (trans->tags[k] == p2->tags[j])
2575 trans->tags[l++] = p2->tags[j];
2578 trans->tags[l] = -1;
2586 /* Compute a maximum limit for the number of transitions leaving
2588 while (p1->position >= 0)
2591 while (p2->position >= 0)
2593 counts[p1->position]++;
2601 /* Converts the syntax tree to a TNFA. All the transitions in the TNFA are
2602 labelled with one character range (there are no transitions on empty
2603 strings). The TNFA takes O(n^2) space in the worst case, `n' is size of
2605 static reg_errcode_t
2606 tre_ast_to_tnfa(tre_ast_node_t *node, tre_tnfa_transition_t *transitions,
2607 int *counts, int *offs)
2610 tre_catenation_t *cat;
2611 tre_iteration_t *iter;
2612 reg_errcode_t errcode = REG_OK;
2614 /* XXX - recurse using a stack!. */
2620 uni = (tre_union_t *)node->obj;
2621 errcode = tre_ast_to_tnfa(uni->left, transitions, counts, offs);
2622 if (errcode != REG_OK)
2624 errcode = tre_ast_to_tnfa(uni->right, transitions, counts, offs);
2628 cat = (tre_catenation_t *)node->obj;
2629 /* Add a transition from each position in cat->left->lastpos
2630 to each position in cat->right->firstpos. */
2631 errcode = tre_make_trans(cat->left->lastpos, cat->right->firstpos,
2632 transitions, counts, offs);
2633 if (errcode != REG_OK)
2635 errcode = tre_ast_to_tnfa(cat->left, transitions, counts, offs);
2636 if (errcode != REG_OK)
2638 errcode = tre_ast_to_tnfa(cat->right, transitions, counts, offs);
2642 iter = (tre_iteration_t *)node->obj;
2643 assert(iter->max == -1 || iter->max == 1);
2645 if (iter->max == -1)
2647 assert(iter->min == 0 || iter->min == 1);
2648 /* Add a transition from each last position in the iterated
2649 expression to each first position. */
2650 errcode = tre_make_trans(iter->arg->lastpos, iter->arg->firstpos,
2651 transitions, counts, offs);
2652 if (errcode != REG_OK)
2655 errcode = tre_ast_to_tnfa(iter->arg, transitions, counts, offs);
2662 #define ERROR_EXIT(err) \
2666 if (/*CONSTCOND*/1) \
2669 while (/*CONSTCOND*/0)
2673 regcomp(regex_t *restrict preg, const char *restrict regex, int cflags)
2676 tre_ast_node_t *tree, *tmp_ast_l, *tmp_ast_r;
2677 tre_pos_and_tags_t *p;
2678 int *counts = NULL, *offs = NULL;
2680 tre_tnfa_transition_t *transitions, *initial;
2681 tre_tnfa_t *tnfa = NULL;
2682 tre_submatch_data_t *submatch_data;
2683 tre_tag_direction_t *tag_directions = NULL;
2684 reg_errcode_t errcode;
2687 /* Parse context. */
2688 tre_parse_ctx_t parse_ctx;
2690 /* Allocate a stack used throughout the compilation process for various
2692 stack = tre_stack_new(512, 10240, 128);
2695 /* Allocate a fast memory allocator. */
2696 mem = tre_mem_new();
2699 tre_stack_destroy(stack);
2703 /* Parse the regexp. */
2704 memset(&parse_ctx, 0, sizeof(parse_ctx));
2705 parse_ctx.mem = mem;
2706 parse_ctx.stack = stack;
2707 parse_ctx.re = regex;
2708 parse_ctx.cflags = cflags;
2709 parse_ctx.max_backref = -1;
2710 errcode = tre_parse(&parse_ctx);
2711 if (errcode != REG_OK)
2712 ERROR_EXIT(errcode);
2713 preg->re_nsub = parse_ctx.submatch_id - 1;
2717 tre_ast_print(tree);
2718 #endif /* TRE_DEBUG */
2720 /* Referring to nonexistent subexpressions is illegal. */
2721 if (parse_ctx.max_backref > (int)preg->re_nsub)
2722 ERROR_EXIT(REG_ESUBREG);
2724 /* Allocate the TNFA struct. */
2725 tnfa = xcalloc(1, sizeof(tre_tnfa_t));
2727 ERROR_EXIT(REG_ESPACE);
2728 tnfa->have_backrefs = parse_ctx.max_backref >= 0;
2729 tnfa->have_approx = 0;
2730 tnfa->num_submatches = parse_ctx.submatch_id;
2732 /* Set up tags for submatch addressing. If REG_NOSUB is set and the
2733 regexp does not have back references, this can be skipped. */
2734 if (tnfa->have_backrefs || !(cflags & REG_NOSUB))
2737 /* Figure out how many tags we will need. */
2738 errcode = tre_add_tags(NULL, stack, tree, tnfa);
2739 if (errcode != REG_OK)
2740 ERROR_EXIT(errcode);
2742 if (tnfa->num_tags > 0)
2744 tag_directions = xmalloc(sizeof(*tag_directions)
2745 * (tnfa->num_tags + 1));
2746 if (tag_directions == NULL)
2747 ERROR_EXIT(REG_ESPACE);
2748 tnfa->tag_directions = tag_directions;
2749 memset(tag_directions, -1,
2750 sizeof(*tag_directions) * (tnfa->num_tags + 1));
2752 tnfa->minimal_tags = xcalloc((unsigned)tnfa->num_tags * 2 + 1,
2753 sizeof(*tnfa->minimal_tags));
2754 if (tnfa->minimal_tags == NULL)
2755 ERROR_EXIT(REG_ESPACE);
2757 submatch_data = xcalloc((unsigned)parse_ctx.submatch_id,
2758 sizeof(*submatch_data));
2759 if (submatch_data == NULL)
2760 ERROR_EXIT(REG_ESPACE);
2761 tnfa->submatch_data = submatch_data;
2763 errcode = tre_add_tags(mem, stack, tree, tnfa);
2764 if (errcode != REG_OK)
2765 ERROR_EXIT(errcode);
2769 /* Expand iteration nodes. */
2770 errcode = tre_expand_ast(mem, stack, tree, &parse_ctx.position,
2772 if (errcode != REG_OK)
2773 ERROR_EXIT(errcode);
2775 /* Add a dummy node for the final state.
2776 XXX - For certain patterns this dummy node can be optimized away,
2777 for example "a*" or "ab*". Figure out a simple way to detect
2778 this possibility. */
2780 tmp_ast_r = tre_ast_new_literal(mem, 0, 0, parse_ctx.position++);
2781 if (tmp_ast_r == NULL)
2782 ERROR_EXIT(REG_ESPACE);
2784 tree = tre_ast_new_catenation(mem, tmp_ast_l, tmp_ast_r);
2786 ERROR_EXIT(REG_ESPACE);
2788 errcode = tre_compute_nfl(mem, stack, tree);
2789 if (errcode != REG_OK)
2790 ERROR_EXIT(errcode);
2792 counts = xmalloc(sizeof(int) * parse_ctx.position);
2794 ERROR_EXIT(REG_ESPACE);
2796 offs = xmalloc(sizeof(int) * parse_ctx.position);
2798 ERROR_EXIT(REG_ESPACE);
2800 for (i = 0; i < parse_ctx.position; i++)
2802 tre_ast_to_tnfa(tree, NULL, counts, NULL);
2805 for (i = 0; i < parse_ctx.position; i++)
2808 add += counts[i] + 1;
2811 transitions = xcalloc((unsigned)add + 1, sizeof(*transitions));
2812 if (transitions == NULL)
2813 ERROR_EXIT(REG_ESPACE);
2814 tnfa->transitions = transitions;
2815 tnfa->num_transitions = add;
2817 errcode = tre_ast_to_tnfa(tree, transitions, counts, offs);
2818 if (errcode != REG_OK)
2819 ERROR_EXIT(errcode);
2821 tnfa->firstpos_chars = NULL;
2825 while (p->position >= 0)
2831 initial = xcalloc((unsigned)i + 1, sizeof(tre_tnfa_transition_t));
2832 if (initial == NULL)
2833 ERROR_EXIT(REG_ESPACE);
2834 tnfa->initial = initial;
2837 for (p = tree->firstpos; p->position >= 0; p++)
2839 initial[i].state = transitions + offs[p->position];
2840 initial[i].state_id = p->position;
2841 initial[i].tags = NULL;
2842 /* Copy the arrays p->tags, and p->params, they are allocated
2843 from a tre_mem object. */
2847 for (j = 0; p->tags[j] >= 0; j++);
2848 initial[i].tags = xmalloc(sizeof(*p->tags) * (j + 1));
2849 if (!initial[i].tags)
2850 ERROR_EXIT(REG_ESPACE);
2851 memcpy(initial[i].tags, p->tags, sizeof(*p->tags) * (j + 1));
2853 initial[i].assertions = p->assertions;
2856 initial[i].state = NULL;
2858 tnfa->num_transitions = add;
2859 tnfa->final = transitions + offs[tree->lastpos[0].position];
2860 tnfa->num_states = parse_ctx.position;
2861 tnfa->cflags = cflags;
2863 tre_mem_destroy(mem);
2864 tre_stack_destroy(stack);
2868 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2872 /* Free everything that was allocated and return the error code. */
2873 tre_mem_destroy(mem);
2875 tre_stack_destroy(stack);
2880 preg->TRE_REGEX_T_FIELD = (void *)tnfa;
2889 regfree(regex_t *preg)
2893 tre_tnfa_transition_t *trans;
2895 tnfa = (void *)preg->TRE_REGEX_T_FIELD;
2899 for (i = 0; i < tnfa->num_transitions; i++)
2900 if (tnfa->transitions[i].state)
2902 if (tnfa->transitions[i].tags)
2903 xfree(tnfa->transitions[i].tags);
2904 if (tnfa->transitions[i].neg_classes)
2905 xfree(tnfa->transitions[i].neg_classes);
2907 if (tnfa->transitions)
2908 xfree(tnfa->transitions);
2912 for (trans = tnfa->initial; trans->state; trans++)
2917 xfree(tnfa->initial);
2920 if (tnfa->submatch_data)
2922 for (i = 0; i < tnfa->num_submatches; i++)
2923 if (tnfa->submatch_data[i].parents)
2924 xfree(tnfa->submatch_data[i].parents);
2925 xfree(tnfa->submatch_data);
2928 if (tnfa->tag_directions)
2929 xfree(tnfa->tag_directions);
2930 if (tnfa->firstpos_chars)
2931 xfree(tnfa->firstpos_chars);
2932 if (tnfa->minimal_tags)
2933 xfree(tnfa->minimal_tags);