fix missing integer overflow checks in regexec buffer size computations
[musl] / src / regex / regexec.c
1 /*
2   regexec.c - TRE POSIX compatible matching functions (and more).
3
4   Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
5   All rights reserved.
6
7   Redistribution and use in source and binary forms, with or without
8   modification, are permitted provided that the following conditions
9   are met:
10
11     1. Redistributions of source code must retain the above copyright
12        notice, this list of conditions and the following disclaimer.
13
14     2. Redistributions in binary form must reproduce the above copyright
15        notice, this list of conditions and the following disclaimer in the
16        documentation and/or other materials provided with the distribution.
17
18   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
22   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 */
31
32 #include <stdlib.h>
33 #include <string.h>
34 #include <wchar.h>
35 #include <wctype.h>
36 #include <limits.h>
37 #include <stdint.h>
38
39 #include <regex.h>
40
41 #include "tre.h"
42
43 #include <assert.h>
44
45 static void
46 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
47                 const tre_tnfa_t *tnfa, int *tags, int match_eo);
48
49 /***********************************************************************
50  from tre-match-utils.h
51 ***********************************************************************/
52
53 #define GET_NEXT_WCHAR() do {                                                 \
54     prev_c = next_c; pos += pos_add_next;                                     \
55     if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0) {        \
56         if (pos_add_next < 0) { ret = REG_NOMATCH; goto error_exit; }         \
57         else pos_add_next++;                                                  \
58     }                                                                         \
59     str_byte += pos_add_next;                                                 \
60   } while (0)
61
62 #define IS_WORD_CHAR(c)  ((c) == L'_' || tre_isalnum(c))
63
64 #define CHECK_ASSERTIONS(assertions)                                          \
65   (((assertions & ASSERT_AT_BOL)                                              \
66     && (pos > 0 || reg_notbol)                                                \
67     && (prev_c != L'\n' || !reg_newline))                                     \
68    || ((assertions & ASSERT_AT_EOL)                                           \
69        && (next_c != L'\0' || reg_noteol)                                     \
70        && (next_c != L'\n' || !reg_newline))                                  \
71    || ((assertions & ASSERT_AT_BOW)                                           \
72        && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))                    \
73    || ((assertions & ASSERT_AT_EOW)                                           \
74        && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))                    \
75    || ((assertions & ASSERT_AT_WB)                                            \
76        && (pos != 0 && next_c != L'\0'                                        \
77            && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c)))                  \
78    || ((assertions & ASSERT_AT_WB_NEG)                                        \
79        && (pos == 0 || next_c == L'\0'                                        \
80            || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
81
82 #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)                             \
83   (((trans_i->assertions & ASSERT_CHAR_CLASS)                                 \
84        && !(tnfa->cflags & REG_ICASE)                                         \
85        && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class))                 \
86     || ((trans_i->assertions & ASSERT_CHAR_CLASS)                             \
87         && (tnfa->cflags & REG_ICASE)                                         \
88         && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class)     \
89         && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class))    \
90     || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG)                         \
91         && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
92                                       tnfa->cflags & REG_ICASE)))
93
94
95
96
97 /* Returns 1 if `t1' wins `t2', 0 otherwise. */
98 static int
99 tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
100               int *t1, int *t2)
101 {
102   int i;
103   for (i = 0; i < num_tags; i++)
104     {
105       if (tag_directions[i] == TRE_TAG_MINIMIZE)
106         {
107           if (t1[i] < t2[i])
108             return 1;
109           if (t1[i] > t2[i])
110             return 0;
111         }
112       else
113         {
114           if (t1[i] > t2[i])
115             return 1;
116           if (t1[i] < t2[i])
117             return 0;
118         }
119     }
120   /*  assert(0);*/
121   return 0;
122 }
123
124 static int
125 tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
126 {
127   while (*classes != (tre_ctype_t)0)
128     if ((!icase && tre_isctype(wc, *classes))
129         || (icase && (tre_isctype(tre_toupper(wc), *classes)
130                       || tre_isctype(tre_tolower(wc), *classes))))
131       return 1; /* Match. */
132     else
133       classes++;
134   return 0; /* No match. */
135 }
136
137
138 /***********************************************************************
139  from tre-match-parallel.c
140 ***********************************************************************/
141
142 /*
143   This algorithm searches for matches basically by reading characters
144   in the searched string one by one, starting at the beginning.  All
145   matching paths in the TNFA are traversed in parallel.  When two or
146   more paths reach the same state, exactly one is chosen according to
147   tag ordering rules; if returning submatches is not required it does
148   not matter which path is chosen.
149
150   The worst case time required for finding the leftmost and longest
151   match, or determining that there is no match, is always linearly
152   dependent on the length of the text being searched.
153
154   This algorithm cannot handle TNFAs with back referencing nodes.
155   See `tre-match-backtrack.c'.
156 */
157
158 typedef struct {
159   tre_tnfa_transition_t *state;
160   int *tags;
161 } tre_tnfa_reach_t;
162
163 typedef struct {
164   int pos;
165   int **tags;
166 } tre_reach_pos_t;
167
168
169 static reg_errcode_t
170 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string,
171                       int *match_tags, int eflags,
172                       int *match_end_ofs)
173 {
174   /* State variables required by GET_NEXT_WCHAR. */
175   tre_char_t prev_c = 0, next_c = 0;
176   const char *str_byte = string;
177   int pos = -1;
178   int pos_add_next = 1;
179 #ifdef TRE_MBSTATE
180   mbstate_t mbstate;
181 #endif /* TRE_MBSTATE */
182   int reg_notbol = eflags & REG_NOTBOL;
183   int reg_noteol = eflags & REG_NOTEOL;
184   int reg_newline = tnfa->cflags & REG_NEWLINE;
185   reg_errcode_t ret;
186
187   char *buf;
188   tre_tnfa_transition_t *trans_i;
189   tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i;
190   tre_reach_pos_t *reach_pos;
191   int *tag_i;
192   int num_tags, i;
193
194   int match_eo = -1;       /* end offset of match (-1 if no match found yet) */
195   int new_match = 0;
196   int *tmp_tags = NULL;
197   int *tmp_iptr;
198
199 #ifdef TRE_MBSTATE
200   memset(&mbstate, '\0', sizeof(mbstate));
201 #endif /* TRE_MBSTATE */
202
203   if (!match_tags)
204     num_tags = 0;
205   else
206     num_tags = tnfa->num_tags;
207
208   /* Allocate memory for temporary data required for matching.  This needs to
209      be done for every matching operation to be thread safe.  This allocates
210      everything in a single large block with calloc(). */
211   {
212     size_t tbytes, rbytes, pbytes, xbytes, total_bytes;
213     char *tmp_buf;
214
215     /* Ensure that tbytes and xbytes*num_states cannot overflow, and that
216      * they don't contribute more than 1/8 of SIZE_MAX to total_bytes. */
217     if (num_tags > SIZE_MAX/(8 * sizeof(int) * tnfa->num_states))
218       goto error_exit;
219
220     /* Likewise check rbytes. */
221     if (tnfa->num_states+1 > SIZE_MAX/(8 * sizeof(*reach_next)))
222       goto error_exit;
223
224     /* Likewise check pbytes. */
225     if (tnfa->num_states > SIZE_MAX/(8 * sizeof(*reach_pos)))
226       goto error_exit;
227
228     /* Compute the length of the block we need. */
229     tbytes = sizeof(*tmp_tags) * num_tags;
230     rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
231     pbytes = sizeof(*reach_pos) * tnfa->num_states;
232     xbytes = sizeof(int) * num_tags;
233     total_bytes =
234       (sizeof(long) - 1) * 4 /* for alignment paddings */
235       + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes;
236
237     /* Allocate the memory. */
238     buf = calloc(total_bytes, 1);
239     if (buf == NULL)
240       return REG_ESPACE;
241
242     /* Get the various pointers within tmp_buf (properly aligned). */
243     tmp_tags = (void *)buf;
244     tmp_buf = buf + tbytes;
245     tmp_buf += ALIGN(tmp_buf, long);
246     reach_next = (void *)tmp_buf;
247     tmp_buf += rbytes;
248     tmp_buf += ALIGN(tmp_buf, long);
249     reach = (void *)tmp_buf;
250     tmp_buf += rbytes;
251     tmp_buf += ALIGN(tmp_buf, long);
252     reach_pos = (void *)tmp_buf;
253     tmp_buf += pbytes;
254     tmp_buf += ALIGN(tmp_buf, long);
255     for (i = 0; i < tnfa->num_states; i++)
256       {
257         reach[i].tags = (void *)tmp_buf;
258         tmp_buf += xbytes;
259         reach_next[i].tags = (void *)tmp_buf;
260         tmp_buf += xbytes;
261       }
262   }
263
264   for (i = 0; i < tnfa->num_states; i++)
265     reach_pos[i].pos = -1;
266
267   GET_NEXT_WCHAR();
268   pos = 0;
269
270   reach_next_i = reach_next;
271   while (1)
272     {
273       /* If no match found yet, add the initial states to `reach_next'. */
274       if (match_eo < 0)
275         {
276           trans_i = tnfa->initial;
277           while (trans_i->state != NULL)
278             {
279               if (reach_pos[trans_i->state_id].pos < pos)
280                 {
281                   if (trans_i->assertions
282                       && CHECK_ASSERTIONS(trans_i->assertions))
283                     {
284                       trans_i++;
285                       continue;
286                     }
287
288                   reach_next_i->state = trans_i->state;
289                   for (i = 0; i < num_tags; i++)
290                     reach_next_i->tags[i] = -1;
291                   tag_i = trans_i->tags;
292                   if (tag_i)
293                     while (*tag_i >= 0)
294                       {
295                         if (*tag_i < num_tags)
296                           reach_next_i->tags[*tag_i] = pos;
297                         tag_i++;
298                       }
299                   if (reach_next_i->state == tnfa->final)
300                     {
301                       match_eo = pos;
302                       new_match = 1;
303                       for (i = 0; i < num_tags; i++)
304                         match_tags[i] = reach_next_i->tags[i];
305                     }
306                   reach_pos[trans_i->state_id].pos = pos;
307                   reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
308                   reach_next_i++;
309                 }
310               trans_i++;
311             }
312           reach_next_i->state = NULL;
313         }
314       else
315         {
316           if (num_tags == 0 || reach_next_i == reach_next)
317             /* We have found a match. */
318             break;
319         }
320
321       /* Check for end of string. */
322       if (!next_c) break;
323
324       GET_NEXT_WCHAR();
325
326       /* Swap `reach' and `reach_next'. */
327       reach_i = reach;
328       reach = reach_next;
329       reach_next = reach_i;
330
331       /* For each state in `reach', weed out states that don't fulfill the
332          minimal matching conditions. */
333       if (tnfa->num_minimals && new_match)
334         {
335           new_match = 0;
336           reach_next_i = reach_next;
337           for (reach_i = reach; reach_i->state; reach_i++)
338             {
339               int skip = 0;
340               for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2)
341                 {
342                   int end = tnfa->minimal_tags[i];
343                   int start = tnfa->minimal_tags[i + 1];
344                   if (end >= num_tags)
345                     {
346                       skip = 1;
347                       break;
348                     }
349                   else if (reach_i->tags[start] == match_tags[start]
350                            && reach_i->tags[end] < match_tags[end])
351                     {
352                       skip = 1;
353                       break;
354                     }
355                 }
356               if (!skip)
357                 {
358                   reach_next_i->state = reach_i->state;
359                   tmp_iptr = reach_next_i->tags;
360                   reach_next_i->tags = reach_i->tags;
361                   reach_i->tags = tmp_iptr;
362                   reach_next_i++;
363                 }
364             }
365           reach_next_i->state = NULL;
366
367           /* Swap `reach' and `reach_next'. */
368           reach_i = reach;
369           reach = reach_next;
370           reach_next = reach_i;
371         }
372
373       /* For each state in `reach' see if there is a transition leaving with
374          the current input symbol to a state not yet in `reach_next', and
375          add the destination states to `reach_next'. */
376       reach_next_i = reach_next;
377       for (reach_i = reach; reach_i->state; reach_i++)
378         {
379           for (trans_i = reach_i->state; trans_i->state; trans_i++)
380             {
381               /* Does this transition match the input symbol? */
382               if (trans_i->code_min <= (tre_cint_t)prev_c &&
383                   trans_i->code_max >= (tre_cint_t)prev_c)
384                 {
385                   if (trans_i->assertions
386                       && (CHECK_ASSERTIONS(trans_i->assertions)
387                           || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
388                     {
389                       continue;
390                     }
391
392                   /* Compute the tags after this transition. */
393                   for (i = 0; i < num_tags; i++)
394                     tmp_tags[i] = reach_i->tags[i];
395                   tag_i = trans_i->tags;
396                   if (tag_i != NULL)
397                     while (*tag_i >= 0)
398                       {
399                         if (*tag_i < num_tags)
400                           tmp_tags[*tag_i] = pos;
401                         tag_i++;
402                       }
403
404                   if (reach_pos[trans_i->state_id].pos < pos)
405                     {
406                       /* Found an unvisited node. */
407                       reach_next_i->state = trans_i->state;
408                       tmp_iptr = reach_next_i->tags;
409                       reach_next_i->tags = tmp_tags;
410                       tmp_tags = tmp_iptr;
411                       reach_pos[trans_i->state_id].pos = pos;
412                       reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
413
414                       if (reach_next_i->state == tnfa->final
415                           && (match_eo == -1
416                               || (num_tags > 0
417                                   && reach_next_i->tags[0] <= match_tags[0])))
418                         {
419                           match_eo = pos;
420                           new_match = 1;
421                           for (i = 0; i < num_tags; i++)
422                             match_tags[i] = reach_next_i->tags[i];
423                         }
424                       reach_next_i++;
425
426                     }
427                   else
428                     {
429                       assert(reach_pos[trans_i->state_id].pos == pos);
430                       /* Another path has also reached this state.  We choose
431                          the winner by examining the tag values for both
432                          paths. */
433                       if (tre_tag_order(num_tags, tnfa->tag_directions,
434                                         tmp_tags,
435                                         *reach_pos[trans_i->state_id].tags))
436                         {
437                           /* The new path wins. */
438                           tmp_iptr = *reach_pos[trans_i->state_id].tags;
439                           *reach_pos[trans_i->state_id].tags = tmp_tags;
440                           if (trans_i->state == tnfa->final)
441                             {
442                               match_eo = pos;
443                               new_match = 1;
444                               for (i = 0; i < num_tags; i++)
445                                 match_tags[i] = tmp_tags[i];
446                             }
447                           tmp_tags = tmp_iptr;
448                         }
449                     }
450                 }
451             }
452         }
453       reach_next_i->state = NULL;
454     }
455
456   *match_end_ofs = match_eo;
457   ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
458 error_exit:
459   xfree(buf);
460   return ret;
461 }
462
463
464
465 /***********************************************************************
466  from tre-match-backtrack.c
467 ***********************************************************************/
468
469 /*
470   This matcher is for regexps that use back referencing.  Regexp matching
471   with back referencing is an NP-complete problem on the number of back
472   references.  The easiest way to match them is to use a backtracking
473   routine which basically goes through all possible paths in the TNFA
474   and chooses the one which results in the best (leftmost and longest)
475   match.  This can be spectacularly expensive and may run out of stack
476   space, but there really is no better known generic algorithm.  Quoting
477   Henry Spencer from comp.compilers:
478   <URL: http://compilers.iecc.com/comparch/article/93-03-102>
479
480     POSIX.2 REs require longest match, which is really exciting to
481     implement since the obsolete ("basic") variant also includes
482     \<digit>.  I haven't found a better way of tackling this than doing
483     a preliminary match using a DFA (or simulation) on a modified RE
484     that just replicates subREs for \<digit>, and then doing a
485     backtracking match to determine whether the subRE matches were
486     right.  This can be rather slow, but I console myself with the
487     thought that people who use \<digit> deserve very slow execution.
488     (Pun unintentional but very appropriate.)
489
490 */
491
492 typedef struct {
493   int pos;
494   const char *str_byte;
495   tre_tnfa_transition_t *state;
496   int state_id;
497   int next_c;
498   int *tags;
499 #ifdef TRE_MBSTATE
500   mbstate_t mbstate;
501 #endif /* TRE_MBSTATE */
502 } tre_backtrack_item_t;
503
504 typedef struct tre_backtrack_struct {
505   tre_backtrack_item_t item;
506   struct tre_backtrack_struct *prev;
507   struct tre_backtrack_struct *next;
508 } *tre_backtrack_t;
509
510 #ifdef TRE_MBSTATE
511 #define BT_STACK_MBSTATE_IN  stack->item.mbstate = (mbstate)
512 #define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate
513 #else /* !TRE_MBSTATE */
514 #define BT_STACK_MBSTATE_IN
515 #define BT_STACK_MBSTATE_OUT
516 #endif /* !TRE_MBSTATE */
517
518 #define tre_bt_mem_new            tre_mem_new
519 #define tre_bt_mem_alloc          tre_mem_alloc
520 #define tre_bt_mem_destroy        tre_mem_destroy
521
522
523 #define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \
524   do                                                                          \
525     {                                                                         \
526       int i;                                                                  \
527       if (!stack->next)                                                       \
528         {                                                                     \
529           tre_backtrack_t s;                                                  \
530           s = tre_bt_mem_alloc(mem, sizeof(*s));                              \
531           if (!s)                                                             \
532             {                                                                 \
533               tre_bt_mem_destroy(mem);                                        \
534               if (tags)                                                       \
535                 xfree(tags);                                                  \
536               if (pmatch)                                                     \
537                 xfree(pmatch);                                                \
538               if (states_seen)                                                \
539                 xfree(states_seen);                                           \
540               return REG_ESPACE;                                              \
541             }                                                                 \
542           s->prev = stack;                                                    \
543           s->next = NULL;                                                     \
544           s->item.tags = tre_bt_mem_alloc(mem,                                \
545                                           sizeof(*tags) * tnfa->num_tags);    \
546           if (!s->item.tags)                                                  \
547             {                                                                 \
548               tre_bt_mem_destroy(mem);                                        \
549               if (tags)                                                       \
550                 xfree(tags);                                                  \
551               if (pmatch)                                                     \
552                 xfree(pmatch);                                                \
553               if (states_seen)                                                \
554                 xfree(states_seen);                                           \
555               return REG_ESPACE;                                              \
556             }                                                                 \
557           stack->next = s;                                                    \
558           stack = s;                                                          \
559         }                                                                     \
560       else                                                                    \
561         stack = stack->next;                                                  \
562       stack->item.pos = (_pos);                                               \
563       stack->item.str_byte = (_str_byte);                                     \
564       stack->item.state = (_state);                                           \
565       stack->item.state_id = (_state_id);                                     \
566       stack->item.next_c = (_next_c);                                         \
567       for (i = 0; i < tnfa->num_tags; i++)                                    \
568         stack->item.tags[i] = (_tags)[i];                                     \
569       BT_STACK_MBSTATE_IN;                                                    \
570     }                                                                         \
571   while (0)
572
573 #define BT_STACK_POP()                                                        \
574   do                                                                          \
575     {                                                                         \
576       int i;                                                                  \
577       assert(stack->prev);                                                    \
578       pos = stack->item.pos;                                                  \
579       str_byte = stack->item.str_byte;                                        \
580       state = stack->item.state;                                              \
581       next_c = stack->item.next_c;                                            \
582       for (i = 0; i < tnfa->num_tags; i++)                                    \
583         tags[i] = stack->item.tags[i];                                        \
584       BT_STACK_MBSTATE_OUT;                                                   \
585       stack = stack->prev;                                                    \
586     }                                                                         \
587   while (0)
588
589 #undef MIN
590 #define MIN(a, b) ((a) <= (b) ? (a) : (b))
591
592 static reg_errcode_t
593 tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
594                        int *match_tags, int eflags, int *match_end_ofs)
595 {
596   /* State variables required by GET_NEXT_WCHAR. */
597   tre_char_t prev_c = 0, next_c = 0;
598   const char *str_byte = string;
599   int pos = 0;
600   int pos_add_next = 1;
601 #ifdef TRE_MBSTATE
602   mbstate_t mbstate;
603 #endif /* TRE_MBSTATE */
604   int reg_notbol = eflags & REG_NOTBOL;
605   int reg_noteol = eflags & REG_NOTEOL;
606   int reg_newline = tnfa->cflags & REG_NEWLINE;
607
608   /* These are used to remember the necessary values of the above
609      variables to return to the position where the current search
610      started from. */
611   int next_c_start;
612   const char *str_byte_start;
613   int pos_start = -1;
614 #ifdef TRE_MBSTATE
615   mbstate_t mbstate_start;
616 #endif /* TRE_MBSTATE */
617
618   /* End offset of best match so far, or -1 if no match found yet. */
619   int match_eo = -1;
620   /* Tag arrays. */
621   int *next_tags, *tags = NULL;
622   /* Current TNFA state. */
623   tre_tnfa_transition_t *state;
624   int *states_seen = NULL;
625
626   /* Memory allocator to for allocating the backtracking stack. */
627   tre_mem_t mem = tre_bt_mem_new();
628
629   /* The backtracking stack. */
630   tre_backtrack_t stack;
631
632   tre_tnfa_transition_t *trans_i;
633   regmatch_t *pmatch = NULL;
634   int ret;
635
636 #ifdef TRE_MBSTATE
637   memset(&mbstate, '\0', sizeof(mbstate));
638 #endif /* TRE_MBSTATE */
639
640   if (!mem)
641     return REG_ESPACE;
642   stack = tre_bt_mem_alloc(mem, sizeof(*stack));
643   if (!stack)
644     {
645       ret = REG_ESPACE;
646       goto error_exit;
647     }
648   stack->prev = NULL;
649   stack->next = NULL;
650
651   if (tnfa->num_tags)
652     {
653       tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
654       if (!tags)
655         {
656           ret = REG_ESPACE;
657           goto error_exit;
658         }
659     }
660   if (tnfa->num_submatches)
661     {
662       pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches);
663       if (!pmatch)
664         {
665           ret = REG_ESPACE;
666           goto error_exit;
667         }
668     }
669   if (tnfa->num_states)
670     {
671       states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states);
672       if (!states_seen)
673         {
674           ret = REG_ESPACE;
675           goto error_exit;
676         }
677     }
678
679  retry:
680   {
681     int i;
682     for (i = 0; i < tnfa->num_tags; i++)
683       {
684         tags[i] = -1;
685         if (match_tags)
686           match_tags[i] = -1;
687       }
688     for (i = 0; i < tnfa->num_states; i++)
689       states_seen[i] = 0;
690   }
691
692   state = NULL;
693   pos = pos_start;
694   GET_NEXT_WCHAR();
695   pos_start = pos;
696   next_c_start = next_c;
697   str_byte_start = str_byte;
698 #ifdef TRE_MBSTATE
699   mbstate_start = mbstate;
700 #endif /* TRE_MBSTATE */
701
702   /* Handle initial states. */
703   next_tags = NULL;
704   for (trans_i = tnfa->initial; trans_i->state; trans_i++)
705     {
706       if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions))
707         {
708           continue;
709         }
710       if (state == NULL)
711         {
712           /* Start from this state. */
713           state = trans_i->state;
714           next_tags = trans_i->tags;
715         }
716       else
717         {
718           /* Backtrack to this state. */
719           BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,
720                         trans_i->state_id, next_c, tags, mbstate);
721           {
722             int *tmp = trans_i->tags;
723             if (tmp)
724               while (*tmp >= 0)
725                 stack->item.tags[*tmp++] = pos;
726           }
727         }
728     }
729
730   if (next_tags)
731     for (; *next_tags >= 0; next_tags++)
732       tags[*next_tags] = pos;
733
734
735   if (state == NULL)
736     goto backtrack;
737
738   while (1)
739     {
740       tre_tnfa_transition_t *next_state;
741       int empty_br_match;
742
743       if (state == tnfa->final)
744         {
745           if (match_eo < pos
746               || (match_eo == pos
747                   && match_tags
748                   && tre_tag_order(tnfa->num_tags, tnfa->tag_directions,
749                                    tags, match_tags)))
750             {
751               int i;
752               /* This match wins the previous match. */
753               match_eo = pos;
754               if (match_tags)
755                 for (i = 0; i < tnfa->num_tags; i++)
756                   match_tags[i] = tags[i];
757             }
758           /* Our TNFAs never have transitions leaving from the final state,
759              so we jump right to backtracking. */
760           goto backtrack;
761         }
762
763       /* Go to the next character in the input string. */
764       empty_br_match = 0;
765       trans_i = state;
766       if (trans_i->state && trans_i->assertions & ASSERT_BACKREF)
767         {
768           /* This is a back reference state.  All transitions leaving from
769              this state have the same back reference "assertion".  Instead
770              of reading the next character, we match the back reference. */
771           int so, eo, bt = trans_i->u.backref;
772           int bt_len;
773           int result;
774
775           /* Get the substring we need to match against.  Remember to
776              turn off REG_NOSUB temporarily. */
777           tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB,
778                           tnfa, tags, pos);
779           so = pmatch[bt].rm_so;
780           eo = pmatch[bt].rm_eo;
781           bt_len = eo - so;
782
783           result = strncmp((const char*)string + so, str_byte - 1,
784                                  (size_t)bt_len);
785
786           if (result == 0)
787             {
788               /* Back reference matched.  Check for infinite loop. */
789               if (bt_len == 0)
790                 empty_br_match = 1;
791               if (empty_br_match && states_seen[trans_i->state_id])
792                 {
793                   goto backtrack;
794                 }
795
796               states_seen[trans_i->state_id] = empty_br_match;
797
798               /* Advance in input string and resync `prev_c', `next_c'
799                  and pos. */
800               str_byte += bt_len - 1;
801               pos += bt_len - 1;
802               GET_NEXT_WCHAR();
803             }
804           else
805             {
806               goto backtrack;
807             }
808         }
809       else
810         {
811           /* Check for end of string. */
812           if (next_c == L'\0')
813                 goto backtrack;
814
815           /* Read the next character. */
816           GET_NEXT_WCHAR();
817         }
818
819       next_state = NULL;
820       for (trans_i = state; trans_i->state; trans_i++)
821         {
822           if (trans_i->code_min <= (tre_cint_t)prev_c
823               && trans_i->code_max >= (tre_cint_t)prev_c)
824             {
825               if (trans_i->assertions
826                   && (CHECK_ASSERTIONS(trans_i->assertions)
827                       || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
828                 {
829                   continue;
830                 }
831
832               if (next_state == NULL)
833                 {
834                   /* First matching transition. */
835                   next_state = trans_i->state;
836                   next_tags = trans_i->tags;
837                 }
838               else
839                 {
840                   /* Second matching transition.  We may need to backtrack here
841                      to take this transition instead of the first one, so we
842                      push this transition in the backtracking stack so we can
843                      jump back here if needed. */
844                   BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,
845                                 trans_i->state_id, next_c, tags, mbstate);
846                   {
847                     int *tmp;
848                     for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++)
849                       stack->item.tags[*tmp] = pos;
850                   }
851 #if 0 /* XXX - it's important not to look at all transitions here to keep
852          the stack small! */
853                   break;
854 #endif
855                 }
856             }
857         }
858
859       if (next_state != NULL)
860         {
861           /* Matching transitions were found.  Take the first one. */
862           state = next_state;
863
864           /* Update the tag values. */
865           if (next_tags)
866             while (*next_tags >= 0)
867               tags[*next_tags++] = pos;
868         }
869       else
870         {
871         backtrack:
872           /* A matching transition was not found.  Try to backtrack. */
873           if (stack->prev)
874             {
875               if (stack->item.state->assertions & ASSERT_BACKREF)
876                 {
877                   states_seen[stack->item.state_id] = 0;
878                 }
879
880               BT_STACK_POP();
881             }
882           else if (match_eo < 0)
883             {
884               /* Try starting from a later position in the input string. */
885               /* Check for end of string. */
886               if (next_c == L'\0')
887                     {
888                       break;
889                     }
890               next_c = next_c_start;
891 #ifdef TRE_MBSTATE
892               mbstate = mbstate_start;
893 #endif /* TRE_MBSTATE */
894               str_byte = str_byte_start;
895               goto retry;
896             }
897           else
898             {
899               break;
900             }
901         }
902     }
903
904   ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
905   *match_end_ofs = match_eo;
906
907  error_exit:
908   tre_bt_mem_destroy(mem);
909 #ifndef TRE_USE_ALLOCA
910   if (tags)
911     xfree(tags);
912   if (pmatch)
913     xfree(pmatch);
914   if (states_seen)
915     xfree(states_seen);
916 #endif /* !TRE_USE_ALLOCA */
917
918   return ret;
919 }
920
921 /***********************************************************************
922  from regexec.c
923 ***********************************************************************/
924
925 /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
926    endpoint values. */
927 static void
928 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
929                 const tre_tnfa_t *tnfa, int *tags, int match_eo)
930 {
931   tre_submatch_data_t *submatch_data;
932   unsigned int i, j;
933   int *parents;
934
935   i = 0;
936   if (match_eo >= 0 && !(cflags & REG_NOSUB))
937     {
938       /* Construct submatch offsets from the tags. */
939       submatch_data = tnfa->submatch_data;
940       while (i < tnfa->num_submatches && i < nmatch)
941         {
942           if (submatch_data[i].so_tag == tnfa->end_tag)
943             pmatch[i].rm_so = match_eo;
944           else
945             pmatch[i].rm_so = tags[submatch_data[i].so_tag];
946
947           if (submatch_data[i].eo_tag == tnfa->end_tag)
948             pmatch[i].rm_eo = match_eo;
949           else
950             pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
951
952           /* If either of the endpoints were not used, this submatch
953              was not part of the match. */
954           if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
955             pmatch[i].rm_so = pmatch[i].rm_eo = -1;
956
957           i++;
958         }
959       /* Reset all submatches that are not within all of their parent
960          submatches. */
961       i = 0;
962       while (i < tnfa->num_submatches && i < nmatch)
963         {
964           if (pmatch[i].rm_eo == -1)
965             assert(pmatch[i].rm_so == -1);
966           assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
967
968           parents = submatch_data[i].parents;
969           if (parents != NULL)
970             for (j = 0; parents[j] >= 0; j++)
971               {
972                 if (pmatch[i].rm_so < pmatch[parents[j]].rm_so
973                     || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
974                   pmatch[i].rm_so = pmatch[i].rm_eo = -1;
975               }
976           i++;
977         }
978     }
979
980   while (i < nmatch)
981     {
982       pmatch[i].rm_so = -1;
983       pmatch[i].rm_eo = -1;
984       i++;
985     }
986 }
987
988
989 /*
990   Wrapper functions for POSIX compatible regexp matching.
991 */
992
993 int
994 regexec(const regex_t *restrict preg, const char *restrict string,
995           size_t nmatch, regmatch_t pmatch[restrict], int eflags)
996 {
997   tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
998   reg_errcode_t status;
999   int *tags = NULL, eo;
1000   if (tnfa->cflags & REG_NOSUB) nmatch = 0;
1001   if (tnfa->num_tags > 0 && nmatch > 0)
1002     {
1003       tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
1004       if (tags == NULL)
1005         return REG_ESPACE;
1006     }
1007
1008   /* Dispatch to the appropriate matcher. */
1009   if (tnfa->have_backrefs)
1010     {
1011       /* The regex has back references, use the backtracking matcher. */
1012       status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo);
1013     }
1014   else
1015     {
1016       /* Exact matching, no back references, use the parallel matcher. */
1017       status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo);
1018     }
1019
1020   if (status == REG_OK)
1021     /* A match was found, so fill the submatch registers. */
1022     tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
1023   if (tags)
1024     xfree(tags);
1025   return status;
1026 }