regex: remove an unused local variable from regexec
[musl] / src / regex / regexec.c
1 /*
2   regexec.c - TRE POSIX compatible matching functions (and more).
3
4   Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
5   All rights reserved.
6
7   Redistribution and use in source and binary forms, with or without
8   modification, are permitted provided that the following conditions
9   are met:
10
11     1. Redistributions of source code must retain the above copyright
12        notice, this list of conditions and the following disclaimer.
13
14     2. Redistributions in binary form must reproduce the above copyright
15        notice, this list of conditions and the following disclaimer in the
16        documentation and/or other materials provided with the distribution.
17
18   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
22   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 */
31
32 #include <stdlib.h>
33 #include <string.h>
34 #include <wchar.h>
35 #include <wctype.h>
36 #include <limits.h>
37
38 #include <regex.h>
39
40 #include "tre.h"
41
42 #include <assert.h>
43
44 static void
45 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
46                 const tre_tnfa_t *tnfa, int *tags, int match_eo);
47
48 /***********************************************************************
49  from tre-match-utils.h
50 ***********************************************************************/
51
52 #define GET_NEXT_WCHAR() do {                                                 \
53     prev_c = next_c; pos += pos_add_next;                                     \
54     if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0) {        \
55         if (pos_add_next < 0) return REG_NOMATCH;                             \
56         else pos_add_next++;                                                  \
57     }                                                                         \
58     str_byte += pos_add_next;                                                 \
59   } while (0)
60
61 #define IS_WORD_CHAR(c)  ((c) == L'_' || tre_isalnum(c))
62
63 #define CHECK_ASSERTIONS(assertions)                                          \
64   (((assertions & ASSERT_AT_BOL)                                              \
65     && (pos > 0 || reg_notbol)                                                \
66     && (prev_c != L'\n' || !reg_newline))                                     \
67    || ((assertions & ASSERT_AT_EOL)                                           \
68        && (next_c != L'\0' || reg_noteol)                                     \
69        && (next_c != L'\n' || !reg_newline))                                  \
70    || ((assertions & ASSERT_AT_BOW)                                           \
71        && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))                    \
72    || ((assertions & ASSERT_AT_EOW)                                           \
73        && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))                    \
74    || ((assertions & ASSERT_AT_WB)                                            \
75        && (pos != 0 && next_c != L'\0'                                        \
76            && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c)))                  \
77    || ((assertions & ASSERT_AT_WB_NEG)                                        \
78        && (pos == 0 || next_c == L'\0'                                        \
79            || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
80
81 #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)                             \
82   (((trans_i->assertions & ASSERT_CHAR_CLASS)                                 \
83        && !(tnfa->cflags & REG_ICASE)                                         \
84        && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class))                 \
85     || ((trans_i->assertions & ASSERT_CHAR_CLASS)                             \
86         && (tnfa->cflags & REG_ICASE)                                         \
87         && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class)     \
88         && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class))    \
89     || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG)                         \
90         && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
91                                       tnfa->cflags & REG_ICASE)))
92
93
94
95
96 /* Returns 1 if `t1' wins `t2', 0 otherwise. */
97 static int
98 tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
99               int *t1, int *t2)
100 {
101   int i;
102   for (i = 0; i < num_tags; i++)
103     {
104       if (tag_directions[i] == TRE_TAG_MINIMIZE)
105         {
106           if (t1[i] < t2[i])
107             return 1;
108           if (t1[i] > t2[i])
109             return 0;
110         }
111       else
112         {
113           if (t1[i] > t2[i])
114             return 1;
115           if (t1[i] < t2[i])
116             return 0;
117         }
118     }
119   /*  assert(0);*/
120   return 0;
121 }
122
123 static int
124 tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
125 {
126   while (*classes != (tre_ctype_t)0)
127     if ((!icase && tre_isctype(wc, *classes))
128         || (icase && (tre_isctype(tre_toupper(wc), *classes)
129                       || tre_isctype(tre_tolower(wc), *classes))))
130       return 1; /* Match. */
131     else
132       classes++;
133   return 0; /* No match. */
134 }
135
136
137 /***********************************************************************
138  from tre-match-parallel.c
139 ***********************************************************************/
140
141 /*
142   This algorithm searches for matches basically by reading characters
143   in the searched string one by one, starting at the beginning.  All
144   matching paths in the TNFA are traversed in parallel.  When two or
145   more paths reach the same state, exactly one is chosen according to
146   tag ordering rules; if returning submatches is not required it does
147   not matter which path is chosen.
148
149   The worst case time required for finding the leftmost and longest
150   match, or determining that there is no match, is always linearly
151   dependent on the length of the text being searched.
152
153   This algorithm cannot handle TNFAs with back referencing nodes.
154   See `tre-match-backtrack.c'.
155 */
156
157 typedef struct {
158   tre_tnfa_transition_t *state;
159   int *tags;
160 } tre_tnfa_reach_t;
161
162 typedef struct {
163   int pos;
164   int **tags;
165 } tre_reach_pos_t;
166
167
168 static reg_errcode_t
169 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string,
170                       int *match_tags, int eflags,
171                       int *match_end_ofs)
172 {
173   /* State variables required by GET_NEXT_WCHAR. */
174   tre_char_t prev_c = 0, next_c = 0;
175   const char *str_byte = string;
176   int pos = -1;
177   int pos_add_next = 1;
178 #ifdef TRE_MBSTATE
179   mbstate_t mbstate;
180 #endif /* TRE_MBSTATE */
181   int reg_notbol = eflags & REG_NOTBOL;
182   int reg_noteol = eflags & REG_NOTEOL;
183   int reg_newline = tnfa->cflags & REG_NEWLINE;
184
185   char *buf;
186   tre_tnfa_transition_t *trans_i;
187   tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i;
188   tre_reach_pos_t *reach_pos;
189   int *tag_i;
190   int num_tags, i;
191
192   int match_eo = -1;       /* end offset of match (-1 if no match found yet) */
193   int new_match = 0;
194   int *tmp_tags = NULL;
195   int *tmp_iptr;
196
197 #ifdef TRE_MBSTATE
198   memset(&mbstate, '\0', sizeof(mbstate));
199 #endif /* TRE_MBSTATE */
200
201   if (!match_tags)
202     num_tags = 0;
203   else
204     num_tags = tnfa->num_tags;
205
206   /* Allocate memory for temporary data required for matching.  This needs to
207      be done for every matching operation to be thread safe.  This allocates
208      everything in a single large block from the stack frame using alloca()
209      or with malloc() if alloca is unavailable. */
210   {
211     int tbytes, rbytes, pbytes, xbytes, total_bytes;
212     char *tmp_buf;
213     /* Compute the length of the block we need. */
214     tbytes = sizeof(*tmp_tags) * num_tags;
215     rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
216     pbytes = sizeof(*reach_pos) * tnfa->num_states;
217     xbytes = sizeof(int) * num_tags;
218     total_bytes =
219       (sizeof(long) - 1) * 4 /* for alignment paddings */
220       + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes;
221
222     /* Allocate the memory. */
223     buf = xmalloc((unsigned)total_bytes);
224     if (buf == NULL)
225       return REG_ESPACE;
226     memset(buf, 0, (size_t)total_bytes);
227
228     /* Get the various pointers within tmp_buf (properly aligned). */
229     tmp_tags = (void *)buf;
230     tmp_buf = buf + tbytes;
231     tmp_buf += ALIGN(tmp_buf, long);
232     reach_next = (void *)tmp_buf;
233     tmp_buf += rbytes;
234     tmp_buf += ALIGN(tmp_buf, long);
235     reach = (void *)tmp_buf;
236     tmp_buf += rbytes;
237     tmp_buf += ALIGN(tmp_buf, long);
238     reach_pos = (void *)tmp_buf;
239     tmp_buf += pbytes;
240     tmp_buf += ALIGN(tmp_buf, long);
241     for (i = 0; i < tnfa->num_states; i++)
242       {
243         reach[i].tags = (void *)tmp_buf;
244         tmp_buf += xbytes;
245         reach_next[i].tags = (void *)tmp_buf;
246         tmp_buf += xbytes;
247       }
248   }
249
250   for (i = 0; i < tnfa->num_states; i++)
251     reach_pos[i].pos = -1;
252
253   GET_NEXT_WCHAR();
254   pos = 0;
255
256   reach_next_i = reach_next;
257   while (1)
258     {
259       /* If no match found yet, add the initial states to `reach_next'. */
260       if (match_eo < 0)
261         {
262           trans_i = tnfa->initial;
263           while (trans_i->state != NULL)
264             {
265               if (reach_pos[trans_i->state_id].pos < pos)
266                 {
267                   if (trans_i->assertions
268                       && CHECK_ASSERTIONS(trans_i->assertions))
269                     {
270                       trans_i++;
271                       continue;
272                     }
273
274                   reach_next_i->state = trans_i->state;
275                   for (i = 0; i < num_tags; i++)
276                     reach_next_i->tags[i] = -1;
277                   tag_i = trans_i->tags;
278                   if (tag_i)
279                     while (*tag_i >= 0)
280                       {
281                         if (*tag_i < num_tags)
282                           reach_next_i->tags[*tag_i] = pos;
283                         tag_i++;
284                       }
285                   if (reach_next_i->state == tnfa->final)
286                     {
287                       match_eo = pos;
288                       new_match = 1;
289                       for (i = 0; i < num_tags; i++)
290                         match_tags[i] = reach_next_i->tags[i];
291                     }
292                   reach_pos[trans_i->state_id].pos = pos;
293                   reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
294                   reach_next_i++;
295                 }
296               trans_i++;
297             }
298           reach_next_i->state = NULL;
299         }
300       else
301         {
302           if (num_tags == 0 || reach_next_i == reach_next)
303             /* We have found a match. */
304             break;
305         }
306
307       /* Check for end of string. */
308       if (!next_c) break;
309
310       GET_NEXT_WCHAR();
311
312       /* Swap `reach' and `reach_next'. */
313       reach_i = reach;
314       reach = reach_next;
315       reach_next = reach_i;
316
317       /* For each state in `reach', weed out states that don't fulfill the
318          minimal matching conditions. */
319       if (tnfa->num_minimals && new_match)
320         {
321           new_match = 0;
322           reach_next_i = reach_next;
323           for (reach_i = reach; reach_i->state; reach_i++)
324             {
325               int skip = 0;
326               for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2)
327                 {
328                   int end = tnfa->minimal_tags[i];
329                   int start = tnfa->minimal_tags[i + 1];
330                   if (end >= num_tags)
331                     {
332                       skip = 1;
333                       break;
334                     }
335                   else if (reach_i->tags[start] == match_tags[start]
336                            && reach_i->tags[end] < match_tags[end])
337                     {
338                       skip = 1;
339                       break;
340                     }
341                 }
342               if (!skip)
343                 {
344                   reach_next_i->state = reach_i->state;
345                   tmp_iptr = reach_next_i->tags;
346                   reach_next_i->tags = reach_i->tags;
347                   reach_i->tags = tmp_iptr;
348                   reach_next_i++;
349                 }
350             }
351           reach_next_i->state = NULL;
352
353           /* Swap `reach' and `reach_next'. */
354           reach_i = reach;
355           reach = reach_next;
356           reach_next = reach_i;
357         }
358
359       /* For each state in `reach' see if there is a transition leaving with
360          the current input symbol to a state not yet in `reach_next', and
361          add the destination states to `reach_next'. */
362       reach_next_i = reach_next;
363       for (reach_i = reach; reach_i->state; reach_i++)
364         {
365           for (trans_i = reach_i->state; trans_i->state; trans_i++)
366             {
367               /* Does this transition match the input symbol? */
368               if (trans_i->code_min <= (tre_cint_t)prev_c &&
369                   trans_i->code_max >= (tre_cint_t)prev_c)
370                 {
371                   if (trans_i->assertions
372                       && (CHECK_ASSERTIONS(trans_i->assertions)
373                           || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
374                     {
375                       continue;
376                     }
377
378                   /* Compute the tags after this transition. */
379                   for (i = 0; i < num_tags; i++)
380                     tmp_tags[i] = reach_i->tags[i];
381                   tag_i = trans_i->tags;
382                   if (tag_i != NULL)
383                     while (*tag_i >= 0)
384                       {
385                         if (*tag_i < num_tags)
386                           tmp_tags[*tag_i] = pos;
387                         tag_i++;
388                       }
389
390                   if (reach_pos[trans_i->state_id].pos < pos)
391                     {
392                       /* Found an unvisited node. */
393                       reach_next_i->state = trans_i->state;
394                       tmp_iptr = reach_next_i->tags;
395                       reach_next_i->tags = tmp_tags;
396                       tmp_tags = tmp_iptr;
397                       reach_pos[trans_i->state_id].pos = pos;
398                       reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
399
400                       if (reach_next_i->state == tnfa->final
401                           && (match_eo == -1
402                               || (num_tags > 0
403                                   && reach_next_i->tags[0] <= match_tags[0])))
404                         {
405                           match_eo = pos;
406                           new_match = 1;
407                           for (i = 0; i < num_tags; i++)
408                             match_tags[i] = reach_next_i->tags[i];
409                         }
410                       reach_next_i++;
411
412                     }
413                   else
414                     {
415                       assert(reach_pos[trans_i->state_id].pos == pos);
416                       /* Another path has also reached this state.  We choose
417                          the winner by examining the tag values for both
418                          paths. */
419                       if (tre_tag_order(num_tags, tnfa->tag_directions,
420                                         tmp_tags,
421                                         *reach_pos[trans_i->state_id].tags))
422                         {
423                           /* The new path wins. */
424                           tmp_iptr = *reach_pos[trans_i->state_id].tags;
425                           *reach_pos[trans_i->state_id].tags = tmp_tags;
426                           if (trans_i->state == tnfa->final)
427                             {
428                               match_eo = pos;
429                               new_match = 1;
430                               for (i = 0; i < num_tags; i++)
431                                 match_tags[i] = tmp_tags[i];
432                             }
433                           tmp_tags = tmp_iptr;
434                         }
435                     }
436                 }
437             }
438         }
439       reach_next_i->state = NULL;
440     }
441
442   if (buf)
443     xfree(buf);
444
445   *match_end_ofs = match_eo;
446   return match_eo >= 0 ? REG_OK : REG_NOMATCH;
447 }
448
449
450
451 /***********************************************************************
452  from tre-match-backtrack.c
453 ***********************************************************************/
454
455 /*
456   This matcher is for regexps that use back referencing.  Regexp matching
457   with back referencing is an NP-complete problem on the number of back
458   references.  The easiest way to match them is to use a backtracking
459   routine which basically goes through all possible paths in the TNFA
460   and chooses the one which results in the best (leftmost and longest)
461   match.  This can be spectacularly expensive and may run out of stack
462   space, but there really is no better known generic algorithm.  Quoting
463   Henry Spencer from comp.compilers:
464   <URL: http://compilers.iecc.com/comparch/article/93-03-102>
465
466     POSIX.2 REs require longest match, which is really exciting to
467     implement since the obsolete ("basic") variant also includes
468     \<digit>.  I haven't found a better way of tackling this than doing
469     a preliminary match using a DFA (or simulation) on a modified RE
470     that just replicates subREs for \<digit>, and then doing a
471     backtracking match to determine whether the subRE matches were
472     right.  This can be rather slow, but I console myself with the
473     thought that people who use \<digit> deserve very slow execution.
474     (Pun unintentional but very appropriate.)
475
476 */
477
478 typedef struct {
479   int pos;
480   const char *str_byte;
481   tre_tnfa_transition_t *state;
482   int state_id;
483   int next_c;
484   int *tags;
485 #ifdef TRE_MBSTATE
486   mbstate_t mbstate;
487 #endif /* TRE_MBSTATE */
488 } tre_backtrack_item_t;
489
490 typedef struct tre_backtrack_struct {
491   tre_backtrack_item_t item;
492   struct tre_backtrack_struct *prev;
493   struct tre_backtrack_struct *next;
494 } *tre_backtrack_t;
495
496 #ifdef TRE_MBSTATE
497 #define BT_STACK_MBSTATE_IN  stack->item.mbstate = (mbstate)
498 #define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate
499 #else /* !TRE_MBSTATE */
500 #define BT_STACK_MBSTATE_IN
501 #define BT_STACK_MBSTATE_OUT
502 #endif /* !TRE_MBSTATE */
503
504 #define tre_bt_mem_new            tre_mem_new
505 #define tre_bt_mem_alloc          tre_mem_alloc
506 #define tre_bt_mem_destroy        tre_mem_destroy
507
508
509 #define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \
510   do                                                                          \
511     {                                                                         \
512       int i;                                                                  \
513       if (!stack->next)                                                       \
514         {                                                                     \
515           tre_backtrack_t s;                                                  \
516           s = tre_bt_mem_alloc(mem, sizeof(*s));                              \
517           if (!s)                                                             \
518             {                                                                 \
519               tre_bt_mem_destroy(mem);                                        \
520               if (tags)                                                       \
521                 xfree(tags);                                                  \
522               if (pmatch)                                                     \
523                 xfree(pmatch);                                                \
524               if (states_seen)                                                \
525                 xfree(states_seen);                                           \
526               return REG_ESPACE;                                              \
527             }                                                                 \
528           s->prev = stack;                                                    \
529           s->next = NULL;                                                     \
530           s->item.tags = tre_bt_mem_alloc(mem,                                \
531                                           sizeof(*tags) * tnfa->num_tags);    \
532           if (!s->item.tags)                                                  \
533             {                                                                 \
534               tre_bt_mem_destroy(mem);                                        \
535               if (tags)                                                       \
536                 xfree(tags);                                                  \
537               if (pmatch)                                                     \
538                 xfree(pmatch);                                                \
539               if (states_seen)                                                \
540                 xfree(states_seen);                                           \
541               return REG_ESPACE;                                              \
542             }                                                                 \
543           stack->next = s;                                                    \
544           stack = s;                                                          \
545         }                                                                     \
546       else                                                                    \
547         stack = stack->next;                                                  \
548       stack->item.pos = (_pos);                                               \
549       stack->item.str_byte = (_str_byte);                                     \
550       stack->item.state = (_state);                                           \
551       stack->item.state_id = (_state_id);                                     \
552       stack->item.next_c = (_next_c);                                         \
553       for (i = 0; i < tnfa->num_tags; i++)                                    \
554         stack->item.tags[i] = (_tags)[i];                                     \
555       BT_STACK_MBSTATE_IN;                                                    \
556     }                                                                         \
557   while (0)
558
559 #define BT_STACK_POP()                                                        \
560   do                                                                          \
561     {                                                                         \
562       int i;                                                                  \
563       assert(stack->prev);                                                    \
564       pos = stack->item.pos;                                                  \
565       str_byte = stack->item.str_byte;                                        \
566       state = stack->item.state;                                              \
567       next_c = stack->item.next_c;                                            \
568       for (i = 0; i < tnfa->num_tags; i++)                                    \
569         tags[i] = stack->item.tags[i];                                        \
570       BT_STACK_MBSTATE_OUT;                                                   \
571       stack = stack->prev;                                                    \
572     }                                                                         \
573   while (0)
574
575 #undef MIN
576 #define MIN(a, b) ((a) <= (b) ? (a) : (b))
577
578 static reg_errcode_t
579 tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
580                        int *match_tags, int eflags, int *match_end_ofs)
581 {
582   /* State variables required by GET_NEXT_WCHAR. */
583   tre_char_t prev_c = 0, next_c = 0;
584   const char *str_byte = string;
585   int pos = 0;
586   int pos_add_next = 1;
587 #ifdef TRE_MBSTATE
588   mbstate_t mbstate;
589 #endif /* TRE_MBSTATE */
590   int reg_notbol = eflags & REG_NOTBOL;
591   int reg_noteol = eflags & REG_NOTEOL;
592   int reg_newline = tnfa->cflags & REG_NEWLINE;
593
594   /* These are used to remember the necessary values of the above
595      variables to return to the position where the current search
596      started from. */
597   int next_c_start;
598   const char *str_byte_start;
599 #ifdef TRE_MBSTATE
600   mbstate_t mbstate_start;
601 #endif /* TRE_MBSTATE */
602
603   /* End offset of best match so far, or -1 if no match found yet. */
604   int match_eo = -1;
605   /* Tag arrays. */
606   int *next_tags, *tags = NULL;
607   /* Current TNFA state. */
608   tre_tnfa_transition_t *state;
609   int *states_seen = NULL;
610
611   /* Memory allocator to for allocating the backtracking stack. */
612   tre_mem_t mem = tre_bt_mem_new();
613
614   /* The backtracking stack. */
615   tre_backtrack_t stack;
616
617   tre_tnfa_transition_t *trans_i;
618   regmatch_t *pmatch = NULL;
619   int ret;
620
621 #ifdef TRE_MBSTATE
622   memset(&mbstate, '\0', sizeof(mbstate));
623 #endif /* TRE_MBSTATE */
624
625   if (!mem)
626     return REG_ESPACE;
627   stack = tre_bt_mem_alloc(mem, sizeof(*stack));
628   if (!stack)
629     {
630       ret = REG_ESPACE;
631       goto error_exit;
632     }
633   stack->prev = NULL;
634   stack->next = NULL;
635
636   if (tnfa->num_tags)
637     {
638       tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
639       if (!tags)
640         {
641           ret = REG_ESPACE;
642           goto error_exit;
643         }
644     }
645   if (tnfa->num_submatches)
646     {
647       pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches);
648       if (!pmatch)
649         {
650           ret = REG_ESPACE;
651           goto error_exit;
652         }
653     }
654   if (tnfa->num_states)
655     {
656       states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states);
657       if (!states_seen)
658         {
659           ret = REG_ESPACE;
660           goto error_exit;
661         }
662     }
663
664  retry:
665   {
666     int i;
667     for (i = 0; i < tnfa->num_tags; i++)
668       {
669         tags[i] = -1;
670         if (match_tags)
671           match_tags[i] = -1;
672       }
673     for (i = 0; i < tnfa->num_states; i++)
674       states_seen[i] = 0;
675   }
676
677   state = NULL;
678   GET_NEXT_WCHAR();
679   next_c_start = next_c;
680   str_byte_start = str_byte;
681 #ifdef TRE_MBSTATE
682   mbstate_start = mbstate;
683 #endif /* TRE_MBSTATE */
684
685   /* Handle initial states. */
686   next_tags = NULL;
687   for (trans_i = tnfa->initial; trans_i->state; trans_i++)
688     {
689       if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions))
690         {
691           continue;
692         }
693       if (state == NULL)
694         {
695           /* Start from this state. */
696           state = trans_i->state;
697           next_tags = trans_i->tags;
698         }
699       else
700         {
701           /* Backtrack to this state. */
702           BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,
703                         trans_i->state_id, next_c, tags, mbstate);
704           {
705             int *tmp = trans_i->tags;
706             if (tmp)
707               while (*tmp >= 0)
708                 stack->item.tags[*tmp++] = pos;
709           }
710         }
711     }
712
713   if (next_tags)
714     for (; *next_tags >= 0; next_tags++)
715       tags[*next_tags] = pos;
716
717
718   if (state == NULL)
719     goto backtrack;
720
721   while (1)
722     {
723       tre_tnfa_transition_t *next_state;
724       int empty_br_match;
725
726       if (state == tnfa->final)
727         {
728           if (match_eo < pos
729               || (match_eo == pos
730                   && match_tags
731                   && tre_tag_order(tnfa->num_tags, tnfa->tag_directions,
732                                    tags, match_tags)))
733             {
734               int i;
735               /* This match wins the previous match. */
736               match_eo = pos;
737               if (match_tags)
738                 for (i = 0; i < tnfa->num_tags; i++)
739                   match_tags[i] = tags[i];
740             }
741           /* Our TNFAs never have transitions leaving from the final state,
742              so we jump right to backtracking. */
743           goto backtrack;
744         }
745
746       /* Go to the next character in the input string. */
747       empty_br_match = 0;
748       trans_i = state;
749       if (trans_i->state && trans_i->assertions & ASSERT_BACKREF)
750         {
751           /* This is a back reference state.  All transitions leaving from
752              this state have the same back reference "assertion".  Instead
753              of reading the next character, we match the back reference. */
754           int so, eo, bt = trans_i->u.backref;
755           int bt_len;
756           int result;
757
758           /* Get the substring we need to match against.  Remember to
759              turn off REG_NOSUB temporarily. */
760           tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB,
761                           tnfa, tags, pos);
762           so = pmatch[bt].rm_so;
763           eo = pmatch[bt].rm_eo;
764           bt_len = eo - so;
765
766           result = strncmp((const char*)string + so, str_byte - 1,
767                                  (size_t)bt_len);
768
769           if (result == 0)
770             {
771               /* Back reference matched.  Check for infinite loop. */
772               if (bt_len == 0)
773                 empty_br_match = 1;
774               if (empty_br_match && states_seen[trans_i->state_id])
775                 {
776                   goto backtrack;
777                 }
778
779               states_seen[trans_i->state_id] = empty_br_match;
780
781               /* Advance in input string and resync `prev_c', `next_c'
782                  and pos. */
783               str_byte += bt_len - 1;
784               pos += bt_len - 1;
785               GET_NEXT_WCHAR();
786             }
787           else
788             {
789               goto backtrack;
790             }
791         }
792       else
793         {
794           /* Check for end of string. */
795           if (next_c == L'\0')
796                 goto backtrack;
797
798           /* Read the next character. */
799           GET_NEXT_WCHAR();
800         }
801
802       next_state = NULL;
803       for (trans_i = state; trans_i->state; trans_i++)
804         {
805           if (trans_i->code_min <= (tre_cint_t)prev_c
806               && trans_i->code_max >= (tre_cint_t)prev_c)
807             {
808               if (trans_i->assertions
809                   && (CHECK_ASSERTIONS(trans_i->assertions)
810                       || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
811                 {
812                   continue;
813                 }
814
815               if (next_state == NULL)
816                 {
817                   /* First matching transition. */
818                   next_state = trans_i->state;
819                   next_tags = trans_i->tags;
820                 }
821               else
822                 {
823                   /* Second matching transition.  We may need to backtrack here
824                      to take this transition instead of the first one, so we
825                      push this transition in the backtracking stack so we can
826                      jump back here if needed. */
827                   BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,
828                                 trans_i->state_id, next_c, tags, mbstate);
829                   {
830                     int *tmp;
831                     for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++)
832                       stack->item.tags[*tmp] = pos;
833                   }
834 #if 0 /* XXX - it's important not to look at all transitions here to keep
835          the stack small! */
836                   break;
837 #endif
838                 }
839             }
840         }
841
842       if (next_state != NULL)
843         {
844           /* Matching transitions were found.  Take the first one. */
845           state = next_state;
846
847           /* Update the tag values. */
848           if (next_tags)
849             while (*next_tags >= 0)
850               tags[*next_tags++] = pos;
851         }
852       else
853         {
854         backtrack:
855           /* A matching transition was not found.  Try to backtrack. */
856           if (stack->prev)
857             {
858               if (stack->item.state->assertions & ASSERT_BACKREF)
859                 {
860                   states_seen[stack->item.state_id] = 0;
861                 }
862
863               BT_STACK_POP();
864             }
865           else if (match_eo < 0)
866             {
867               /* Try starting from a later position in the input string. */
868               /* Check for end of string. */
869               if (next_c == L'\0')
870                     {
871                       break;
872                     }
873               next_c = next_c_start;
874 #ifdef TRE_MBSTATE
875               mbstate = mbstate_start;
876 #endif /* TRE_MBSTATE */
877               str_byte = str_byte_start;
878               goto retry;
879             }
880           else
881             {
882               break;
883             }
884         }
885     }
886
887   ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
888   *match_end_ofs = match_eo;
889
890  error_exit:
891   tre_bt_mem_destroy(mem);
892 #ifndef TRE_USE_ALLOCA
893   if (tags)
894     xfree(tags);
895   if (pmatch)
896     xfree(pmatch);
897   if (states_seen)
898     xfree(states_seen);
899 #endif /* !TRE_USE_ALLOCA */
900
901   return ret;
902 }
903
904 /***********************************************************************
905  from regexec.c
906 ***********************************************************************/
907
908 /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
909    endpoint values. */
910 static void
911 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
912                 const tre_tnfa_t *tnfa, int *tags, int match_eo)
913 {
914   tre_submatch_data_t *submatch_data;
915   unsigned int i, j;
916   int *parents;
917
918   i = 0;
919   if (match_eo >= 0 && !(cflags & REG_NOSUB))
920     {
921       /* Construct submatch offsets from the tags. */
922       submatch_data = tnfa->submatch_data;
923       while (i < tnfa->num_submatches && i < nmatch)
924         {
925           if (submatch_data[i].so_tag == tnfa->end_tag)
926             pmatch[i].rm_so = match_eo;
927           else
928             pmatch[i].rm_so = tags[submatch_data[i].so_tag];
929
930           if (submatch_data[i].eo_tag == tnfa->end_tag)
931             pmatch[i].rm_eo = match_eo;
932           else
933             pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
934
935           /* If either of the endpoints were not used, this submatch
936              was not part of the match. */
937           if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
938             pmatch[i].rm_so = pmatch[i].rm_eo = -1;
939
940           i++;
941         }
942       /* Reset all submatches that are not within all of their parent
943          submatches. */
944       i = 0;
945       while (i < tnfa->num_submatches && i < nmatch)
946         {
947           if (pmatch[i].rm_eo == -1)
948             assert(pmatch[i].rm_so == -1);
949           assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
950
951           parents = submatch_data[i].parents;
952           if (parents != NULL)
953             for (j = 0; parents[j] >= 0; j++)
954               {
955                 if (pmatch[i].rm_so < pmatch[parents[j]].rm_so
956                     || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
957                   pmatch[i].rm_so = pmatch[i].rm_eo = -1;
958               }
959           i++;
960         }
961     }
962
963   while (i < nmatch)
964     {
965       pmatch[i].rm_so = -1;
966       pmatch[i].rm_eo = -1;
967       i++;
968     }
969 }
970
971
972 /*
973   Wrapper functions for POSIX compatible regexp matching.
974 */
975
976 int
977 regexec(const regex_t *restrict preg, const char *restrict string,
978           size_t nmatch, regmatch_t pmatch[restrict], int eflags)
979 {
980   tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
981   reg_errcode_t status;
982   int *tags = NULL, eo;
983   if (tnfa->num_tags > 0 && nmatch > 0)
984     {
985       tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
986       if (tags == NULL)
987         return REG_ESPACE;
988     }
989
990   /* Dispatch to the appropriate matcher. */
991   if (tnfa->have_backrefs)
992     {
993       /* The regex has back references, use the backtracking matcher. */
994       status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo);
995     }
996   else
997     {
998       /* Exact matching, no back references, use the parallel matcher. */
999       status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo);
1000     }
1001
1002   if (status == REG_OK)
1003     /* A match was found, so fill the submatch registers. */
1004     tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
1005   if (tags)
1006     xfree(tags);
1007   return status;
1008 }