fix the use of uninitialized value in regcomp
[musl] / src / regex / regexec.c
1 /*
2   regexec.c - TRE POSIX compatible matching functions (and more).
3
4   Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi>
5   All rights reserved.
6
7   Redistribution and use in source and binary forms, with or without
8   modification, are permitted provided that the following conditions
9   are met:
10
11     1. Redistributions of source code must retain the above copyright
12        notice, this list of conditions and the following disclaimer.
13
14     2. Redistributions in binary form must reproduce the above copyright
15        notice, this list of conditions and the following disclaimer in the
16        documentation and/or other materials provided with the distribution.
17
18   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
19   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT
22   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25   DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26   THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27   (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30 */
31
32 #include <stdlib.h>
33 #include <string.h>
34 #include <wchar.h>
35 #include <wctype.h>
36 #include <limits.h>
37
38 #include <regex.h>
39
40 #include "tre.h"
41
42 #include <assert.h>
43
44 static void
45 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
46                 const tre_tnfa_t *tnfa, int *tags, int match_eo);
47
48 /***********************************************************************
49  from tre-match-utils.h
50 ***********************************************************************/
51
52 #define GET_NEXT_WCHAR() do {                                                 \
53     prev_c = next_c; pos += pos_add_next;                                     \
54     if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0) {        \
55         if (pos_add_next < 0) { ret = REG_NOMATCH; goto error_exit; }         \
56         else pos_add_next++;                                                  \
57     }                                                                         \
58     str_byte += pos_add_next;                                                 \
59   } while (0)
60
61 #define IS_WORD_CHAR(c)  ((c) == L'_' || tre_isalnum(c))
62
63 #define CHECK_ASSERTIONS(assertions)                                          \
64   (((assertions & ASSERT_AT_BOL)                                              \
65     && (pos > 0 || reg_notbol)                                                \
66     && (prev_c != L'\n' || !reg_newline))                                     \
67    || ((assertions & ASSERT_AT_EOL)                                           \
68        && (next_c != L'\0' || reg_noteol)                                     \
69        && (next_c != L'\n' || !reg_newline))                                  \
70    || ((assertions & ASSERT_AT_BOW)                                           \
71        && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))                    \
72    || ((assertions & ASSERT_AT_EOW)                                           \
73        && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))                    \
74    || ((assertions & ASSERT_AT_WB)                                            \
75        && (pos != 0 && next_c != L'\0'                                        \
76            && IS_WORD_CHAR(prev_c) == IS_WORD_CHAR(next_c)))                  \
77    || ((assertions & ASSERT_AT_WB_NEG)                                        \
78        && (pos == 0 || next_c == L'\0'                                        \
79            || IS_WORD_CHAR(prev_c) != IS_WORD_CHAR(next_c))))
80
81 #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)                             \
82   (((trans_i->assertions & ASSERT_CHAR_CLASS)                                 \
83        && !(tnfa->cflags & REG_ICASE)                                         \
84        && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class))                 \
85     || ((trans_i->assertions & ASSERT_CHAR_CLASS)                             \
86         && (tnfa->cflags & REG_ICASE)                                         \
87         && !tre_isctype(tre_tolower((tre_cint_t)prev_c),trans_i->u.class)     \
88         && !tre_isctype(tre_toupper((tre_cint_t)prev_c),trans_i->u.class))    \
89     || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG)                         \
90         && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\
91                                       tnfa->cflags & REG_ICASE)))
92
93
94
95
96 /* Returns 1 if `t1' wins `t2', 0 otherwise. */
97 static int
98 tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
99               int *t1, int *t2)
100 {
101   int i;
102   for (i = 0; i < num_tags; i++)
103     {
104       if (tag_directions[i] == TRE_TAG_MINIMIZE)
105         {
106           if (t1[i] < t2[i])
107             return 1;
108           if (t1[i] > t2[i])
109             return 0;
110         }
111       else
112         {
113           if (t1[i] > t2[i])
114             return 1;
115           if (t1[i] < t2[i])
116             return 0;
117         }
118     }
119   /*  assert(0);*/
120   return 0;
121 }
122
123 static int
124 tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
125 {
126   while (*classes != (tre_ctype_t)0)
127     if ((!icase && tre_isctype(wc, *classes))
128         || (icase && (tre_isctype(tre_toupper(wc), *classes)
129                       || tre_isctype(tre_tolower(wc), *classes))))
130       return 1; /* Match. */
131     else
132       classes++;
133   return 0; /* No match. */
134 }
135
136
137 /***********************************************************************
138  from tre-match-parallel.c
139 ***********************************************************************/
140
141 /*
142   This algorithm searches for matches basically by reading characters
143   in the searched string one by one, starting at the beginning.  All
144   matching paths in the TNFA are traversed in parallel.  When two or
145   more paths reach the same state, exactly one is chosen according to
146   tag ordering rules; if returning submatches is not required it does
147   not matter which path is chosen.
148
149   The worst case time required for finding the leftmost and longest
150   match, or determining that there is no match, is always linearly
151   dependent on the length of the text being searched.
152
153   This algorithm cannot handle TNFAs with back referencing nodes.
154   See `tre-match-backtrack.c'.
155 */
156
157 typedef struct {
158   tre_tnfa_transition_t *state;
159   int *tags;
160 } tre_tnfa_reach_t;
161
162 typedef struct {
163   int pos;
164   int **tags;
165 } tre_reach_pos_t;
166
167
168 static reg_errcode_t
169 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string,
170                       int *match_tags, int eflags,
171                       int *match_end_ofs)
172 {
173   /* State variables required by GET_NEXT_WCHAR. */
174   tre_char_t prev_c = 0, next_c = 0;
175   const char *str_byte = string;
176   int pos = -1;
177   int pos_add_next = 1;
178 #ifdef TRE_MBSTATE
179   mbstate_t mbstate;
180 #endif /* TRE_MBSTATE */
181   int reg_notbol = eflags & REG_NOTBOL;
182   int reg_noteol = eflags & REG_NOTEOL;
183   int reg_newline = tnfa->cflags & REG_NEWLINE;
184   reg_errcode_t ret;
185
186   char *buf;
187   tre_tnfa_transition_t *trans_i;
188   tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i;
189   tre_reach_pos_t *reach_pos;
190   int *tag_i;
191   int num_tags, i;
192
193   int match_eo = -1;       /* end offset of match (-1 if no match found yet) */
194   int new_match = 0;
195   int *tmp_tags = NULL;
196   int *tmp_iptr;
197
198 #ifdef TRE_MBSTATE
199   memset(&mbstate, '\0', sizeof(mbstate));
200 #endif /* TRE_MBSTATE */
201
202   if (!match_tags)
203     num_tags = 0;
204   else
205     num_tags = tnfa->num_tags;
206
207   /* Allocate memory for temporary data required for matching.  This needs to
208      be done for every matching operation to be thread safe.  This allocates
209      everything in a single large block from the stack frame using alloca()
210      or with malloc() if alloca is unavailable. */
211   {
212     int tbytes, rbytes, pbytes, xbytes, total_bytes;
213     char *tmp_buf;
214     /* Compute the length of the block we need. */
215     tbytes = sizeof(*tmp_tags) * num_tags;
216     rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
217     pbytes = sizeof(*reach_pos) * tnfa->num_states;
218     xbytes = sizeof(int) * num_tags;
219     total_bytes =
220       (sizeof(long) - 1) * 4 /* for alignment paddings */
221       + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes;
222
223     /* Allocate the memory. */
224     buf = xmalloc((unsigned)total_bytes);
225     if (buf == NULL)
226       return REG_ESPACE;
227     memset(buf, 0, (size_t)total_bytes);
228
229     /* Get the various pointers within tmp_buf (properly aligned). */
230     tmp_tags = (void *)buf;
231     tmp_buf = buf + tbytes;
232     tmp_buf += ALIGN(tmp_buf, long);
233     reach_next = (void *)tmp_buf;
234     tmp_buf += rbytes;
235     tmp_buf += ALIGN(tmp_buf, long);
236     reach = (void *)tmp_buf;
237     tmp_buf += rbytes;
238     tmp_buf += ALIGN(tmp_buf, long);
239     reach_pos = (void *)tmp_buf;
240     tmp_buf += pbytes;
241     tmp_buf += ALIGN(tmp_buf, long);
242     for (i = 0; i < tnfa->num_states; i++)
243       {
244         reach[i].tags = (void *)tmp_buf;
245         tmp_buf += xbytes;
246         reach_next[i].tags = (void *)tmp_buf;
247         tmp_buf += xbytes;
248       }
249   }
250
251   for (i = 0; i < tnfa->num_states; i++)
252     reach_pos[i].pos = -1;
253
254   GET_NEXT_WCHAR();
255   pos = 0;
256
257   reach_next_i = reach_next;
258   while (1)
259     {
260       /* If no match found yet, add the initial states to `reach_next'. */
261       if (match_eo < 0)
262         {
263           trans_i = tnfa->initial;
264           while (trans_i->state != NULL)
265             {
266               if (reach_pos[trans_i->state_id].pos < pos)
267                 {
268                   if (trans_i->assertions
269                       && CHECK_ASSERTIONS(trans_i->assertions))
270                     {
271                       trans_i++;
272                       continue;
273                     }
274
275                   reach_next_i->state = trans_i->state;
276                   for (i = 0; i < num_tags; i++)
277                     reach_next_i->tags[i] = -1;
278                   tag_i = trans_i->tags;
279                   if (tag_i)
280                     while (*tag_i >= 0)
281                       {
282                         if (*tag_i < num_tags)
283                           reach_next_i->tags[*tag_i] = pos;
284                         tag_i++;
285                       }
286                   if (reach_next_i->state == tnfa->final)
287                     {
288                       match_eo = pos;
289                       new_match = 1;
290                       for (i = 0; i < num_tags; i++)
291                         match_tags[i] = reach_next_i->tags[i];
292                     }
293                   reach_pos[trans_i->state_id].pos = pos;
294                   reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
295                   reach_next_i++;
296                 }
297               trans_i++;
298             }
299           reach_next_i->state = NULL;
300         }
301       else
302         {
303           if (num_tags == 0 || reach_next_i == reach_next)
304             /* We have found a match. */
305             break;
306         }
307
308       /* Check for end of string. */
309       if (!next_c) break;
310
311       GET_NEXT_WCHAR();
312
313       /* Swap `reach' and `reach_next'. */
314       reach_i = reach;
315       reach = reach_next;
316       reach_next = reach_i;
317
318       /* For each state in `reach', weed out states that don't fulfill the
319          minimal matching conditions. */
320       if (tnfa->num_minimals && new_match)
321         {
322           new_match = 0;
323           reach_next_i = reach_next;
324           for (reach_i = reach; reach_i->state; reach_i++)
325             {
326               int skip = 0;
327               for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2)
328                 {
329                   int end = tnfa->minimal_tags[i];
330                   int start = tnfa->minimal_tags[i + 1];
331                   if (end >= num_tags)
332                     {
333                       skip = 1;
334                       break;
335                     }
336                   else if (reach_i->tags[start] == match_tags[start]
337                            && reach_i->tags[end] < match_tags[end])
338                     {
339                       skip = 1;
340                       break;
341                     }
342                 }
343               if (!skip)
344                 {
345                   reach_next_i->state = reach_i->state;
346                   tmp_iptr = reach_next_i->tags;
347                   reach_next_i->tags = reach_i->tags;
348                   reach_i->tags = tmp_iptr;
349                   reach_next_i++;
350                 }
351             }
352           reach_next_i->state = NULL;
353
354           /* Swap `reach' and `reach_next'. */
355           reach_i = reach;
356           reach = reach_next;
357           reach_next = reach_i;
358         }
359
360       /* For each state in `reach' see if there is a transition leaving with
361          the current input symbol to a state not yet in `reach_next', and
362          add the destination states to `reach_next'. */
363       reach_next_i = reach_next;
364       for (reach_i = reach; reach_i->state; reach_i++)
365         {
366           for (trans_i = reach_i->state; trans_i->state; trans_i++)
367             {
368               /* Does this transition match the input symbol? */
369               if (trans_i->code_min <= (tre_cint_t)prev_c &&
370                   trans_i->code_max >= (tre_cint_t)prev_c)
371                 {
372                   if (trans_i->assertions
373                       && (CHECK_ASSERTIONS(trans_i->assertions)
374                           || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
375                     {
376                       continue;
377                     }
378
379                   /* Compute the tags after this transition. */
380                   for (i = 0; i < num_tags; i++)
381                     tmp_tags[i] = reach_i->tags[i];
382                   tag_i = trans_i->tags;
383                   if (tag_i != NULL)
384                     while (*tag_i >= 0)
385                       {
386                         if (*tag_i < num_tags)
387                           tmp_tags[*tag_i] = pos;
388                         tag_i++;
389                       }
390
391                   if (reach_pos[trans_i->state_id].pos < pos)
392                     {
393                       /* Found an unvisited node. */
394                       reach_next_i->state = trans_i->state;
395                       tmp_iptr = reach_next_i->tags;
396                       reach_next_i->tags = tmp_tags;
397                       tmp_tags = tmp_iptr;
398                       reach_pos[trans_i->state_id].pos = pos;
399                       reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
400
401                       if (reach_next_i->state == tnfa->final
402                           && (match_eo == -1
403                               || (num_tags > 0
404                                   && reach_next_i->tags[0] <= match_tags[0])))
405                         {
406                           match_eo = pos;
407                           new_match = 1;
408                           for (i = 0; i < num_tags; i++)
409                             match_tags[i] = reach_next_i->tags[i];
410                         }
411                       reach_next_i++;
412
413                     }
414                   else
415                     {
416                       assert(reach_pos[trans_i->state_id].pos == pos);
417                       /* Another path has also reached this state.  We choose
418                          the winner by examining the tag values for both
419                          paths. */
420                       if (tre_tag_order(num_tags, tnfa->tag_directions,
421                                         tmp_tags,
422                                         *reach_pos[trans_i->state_id].tags))
423                         {
424                           /* The new path wins. */
425                           tmp_iptr = *reach_pos[trans_i->state_id].tags;
426                           *reach_pos[trans_i->state_id].tags = tmp_tags;
427                           if (trans_i->state == tnfa->final)
428                             {
429                               match_eo = pos;
430                               new_match = 1;
431                               for (i = 0; i < num_tags; i++)
432                                 match_tags[i] = tmp_tags[i];
433                             }
434                           tmp_tags = tmp_iptr;
435                         }
436                     }
437                 }
438             }
439         }
440       reach_next_i->state = NULL;
441     }
442
443   *match_end_ofs = match_eo;
444   ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
445 error_exit:
446   xfree(buf);
447   return ret;
448 }
449
450
451
452 /***********************************************************************
453  from tre-match-backtrack.c
454 ***********************************************************************/
455
456 /*
457   This matcher is for regexps that use back referencing.  Regexp matching
458   with back referencing is an NP-complete problem on the number of back
459   references.  The easiest way to match them is to use a backtracking
460   routine which basically goes through all possible paths in the TNFA
461   and chooses the one which results in the best (leftmost and longest)
462   match.  This can be spectacularly expensive and may run out of stack
463   space, but there really is no better known generic algorithm.  Quoting
464   Henry Spencer from comp.compilers:
465   <URL: http://compilers.iecc.com/comparch/article/93-03-102>
466
467     POSIX.2 REs require longest match, which is really exciting to
468     implement since the obsolete ("basic") variant also includes
469     \<digit>.  I haven't found a better way of tackling this than doing
470     a preliminary match using a DFA (or simulation) on a modified RE
471     that just replicates subREs for \<digit>, and then doing a
472     backtracking match to determine whether the subRE matches were
473     right.  This can be rather slow, but I console myself with the
474     thought that people who use \<digit> deserve very slow execution.
475     (Pun unintentional but very appropriate.)
476
477 */
478
479 typedef struct {
480   int pos;
481   const char *str_byte;
482   tre_tnfa_transition_t *state;
483   int state_id;
484   int next_c;
485   int *tags;
486 #ifdef TRE_MBSTATE
487   mbstate_t mbstate;
488 #endif /* TRE_MBSTATE */
489 } tre_backtrack_item_t;
490
491 typedef struct tre_backtrack_struct {
492   tre_backtrack_item_t item;
493   struct tre_backtrack_struct *prev;
494   struct tre_backtrack_struct *next;
495 } *tre_backtrack_t;
496
497 #ifdef TRE_MBSTATE
498 #define BT_STACK_MBSTATE_IN  stack->item.mbstate = (mbstate)
499 #define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate
500 #else /* !TRE_MBSTATE */
501 #define BT_STACK_MBSTATE_IN
502 #define BT_STACK_MBSTATE_OUT
503 #endif /* !TRE_MBSTATE */
504
505 #define tre_bt_mem_new            tre_mem_new
506 #define tre_bt_mem_alloc          tre_mem_alloc
507 #define tre_bt_mem_destroy        tre_mem_destroy
508
509
510 #define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \
511   do                                                                          \
512     {                                                                         \
513       int i;                                                                  \
514       if (!stack->next)                                                       \
515         {                                                                     \
516           tre_backtrack_t s;                                                  \
517           s = tre_bt_mem_alloc(mem, sizeof(*s));                              \
518           if (!s)                                                             \
519             {                                                                 \
520               tre_bt_mem_destroy(mem);                                        \
521               if (tags)                                                       \
522                 xfree(tags);                                                  \
523               if (pmatch)                                                     \
524                 xfree(pmatch);                                                \
525               if (states_seen)                                                \
526                 xfree(states_seen);                                           \
527               return REG_ESPACE;                                              \
528             }                                                                 \
529           s->prev = stack;                                                    \
530           s->next = NULL;                                                     \
531           s->item.tags = tre_bt_mem_alloc(mem,                                \
532                                           sizeof(*tags) * tnfa->num_tags);    \
533           if (!s->item.tags)                                                  \
534             {                                                                 \
535               tre_bt_mem_destroy(mem);                                        \
536               if (tags)                                                       \
537                 xfree(tags);                                                  \
538               if (pmatch)                                                     \
539                 xfree(pmatch);                                                \
540               if (states_seen)                                                \
541                 xfree(states_seen);                                           \
542               return REG_ESPACE;                                              \
543             }                                                                 \
544           stack->next = s;                                                    \
545           stack = s;                                                          \
546         }                                                                     \
547       else                                                                    \
548         stack = stack->next;                                                  \
549       stack->item.pos = (_pos);                                               \
550       stack->item.str_byte = (_str_byte);                                     \
551       stack->item.state = (_state);                                           \
552       stack->item.state_id = (_state_id);                                     \
553       stack->item.next_c = (_next_c);                                         \
554       for (i = 0; i < tnfa->num_tags; i++)                                    \
555         stack->item.tags[i] = (_tags)[i];                                     \
556       BT_STACK_MBSTATE_IN;                                                    \
557     }                                                                         \
558   while (0)
559
560 #define BT_STACK_POP()                                                        \
561   do                                                                          \
562     {                                                                         \
563       int i;                                                                  \
564       assert(stack->prev);                                                    \
565       pos = stack->item.pos;                                                  \
566       str_byte = stack->item.str_byte;                                        \
567       state = stack->item.state;                                              \
568       next_c = stack->item.next_c;                                            \
569       for (i = 0; i < tnfa->num_tags; i++)                                    \
570         tags[i] = stack->item.tags[i];                                        \
571       BT_STACK_MBSTATE_OUT;                                                   \
572       stack = stack->prev;                                                    \
573     }                                                                         \
574   while (0)
575
576 #undef MIN
577 #define MIN(a, b) ((a) <= (b) ? (a) : (b))
578
579 static reg_errcode_t
580 tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
581                        int *match_tags, int eflags, int *match_end_ofs)
582 {
583   /* State variables required by GET_NEXT_WCHAR. */
584   tre_char_t prev_c = 0, next_c = 0;
585   const char *str_byte = string;
586   int pos = 0;
587   int pos_add_next = 1;
588 #ifdef TRE_MBSTATE
589   mbstate_t mbstate;
590 #endif /* TRE_MBSTATE */
591   int reg_notbol = eflags & REG_NOTBOL;
592   int reg_noteol = eflags & REG_NOTEOL;
593   int reg_newline = tnfa->cflags & REG_NEWLINE;
594
595   /* These are used to remember the necessary values of the above
596      variables to return to the position where the current search
597      started from. */
598   int next_c_start;
599   const char *str_byte_start;
600   int pos_start = -1;
601 #ifdef TRE_MBSTATE
602   mbstate_t mbstate_start;
603 #endif /* TRE_MBSTATE */
604
605   /* End offset of best match so far, or -1 if no match found yet. */
606   int match_eo = -1;
607   /* Tag arrays. */
608   int *next_tags, *tags = NULL;
609   /* Current TNFA state. */
610   tre_tnfa_transition_t *state;
611   int *states_seen = NULL;
612
613   /* Memory allocator to for allocating the backtracking stack. */
614   tre_mem_t mem = tre_bt_mem_new();
615
616   /* The backtracking stack. */
617   tre_backtrack_t stack;
618
619   tre_tnfa_transition_t *trans_i;
620   regmatch_t *pmatch = NULL;
621   int ret;
622
623 #ifdef TRE_MBSTATE
624   memset(&mbstate, '\0', sizeof(mbstate));
625 #endif /* TRE_MBSTATE */
626
627   if (!mem)
628     return REG_ESPACE;
629   stack = tre_bt_mem_alloc(mem, sizeof(*stack));
630   if (!stack)
631     {
632       ret = REG_ESPACE;
633       goto error_exit;
634     }
635   stack->prev = NULL;
636   stack->next = NULL;
637
638   if (tnfa->num_tags)
639     {
640       tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
641       if (!tags)
642         {
643           ret = REG_ESPACE;
644           goto error_exit;
645         }
646     }
647   if (tnfa->num_submatches)
648     {
649       pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches);
650       if (!pmatch)
651         {
652           ret = REG_ESPACE;
653           goto error_exit;
654         }
655     }
656   if (tnfa->num_states)
657     {
658       states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states);
659       if (!states_seen)
660         {
661           ret = REG_ESPACE;
662           goto error_exit;
663         }
664     }
665
666  retry:
667   {
668     int i;
669     for (i = 0; i < tnfa->num_tags; i++)
670       {
671         tags[i] = -1;
672         if (match_tags)
673           match_tags[i] = -1;
674       }
675     for (i = 0; i < tnfa->num_states; i++)
676       states_seen[i] = 0;
677   }
678
679   state = NULL;
680   pos = pos_start;
681   GET_NEXT_WCHAR();
682   pos_start = pos;
683   next_c_start = next_c;
684   str_byte_start = str_byte;
685 #ifdef TRE_MBSTATE
686   mbstate_start = mbstate;
687 #endif /* TRE_MBSTATE */
688
689   /* Handle initial states. */
690   next_tags = NULL;
691   for (trans_i = tnfa->initial; trans_i->state; trans_i++)
692     {
693       if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions))
694         {
695           continue;
696         }
697       if (state == NULL)
698         {
699           /* Start from this state. */
700           state = trans_i->state;
701           next_tags = trans_i->tags;
702         }
703       else
704         {
705           /* Backtrack to this state. */
706           BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,
707                         trans_i->state_id, next_c, tags, mbstate);
708           {
709             int *tmp = trans_i->tags;
710             if (tmp)
711               while (*tmp >= 0)
712                 stack->item.tags[*tmp++] = pos;
713           }
714         }
715     }
716
717   if (next_tags)
718     for (; *next_tags >= 0; next_tags++)
719       tags[*next_tags] = pos;
720
721
722   if (state == NULL)
723     goto backtrack;
724
725   while (1)
726     {
727       tre_tnfa_transition_t *next_state;
728       int empty_br_match;
729
730       if (state == tnfa->final)
731         {
732           if (match_eo < pos
733               || (match_eo == pos
734                   && match_tags
735                   && tre_tag_order(tnfa->num_tags, tnfa->tag_directions,
736                                    tags, match_tags)))
737             {
738               int i;
739               /* This match wins the previous match. */
740               match_eo = pos;
741               if (match_tags)
742                 for (i = 0; i < tnfa->num_tags; i++)
743                   match_tags[i] = tags[i];
744             }
745           /* Our TNFAs never have transitions leaving from the final state,
746              so we jump right to backtracking. */
747           goto backtrack;
748         }
749
750       /* Go to the next character in the input string. */
751       empty_br_match = 0;
752       trans_i = state;
753       if (trans_i->state && trans_i->assertions & ASSERT_BACKREF)
754         {
755           /* This is a back reference state.  All transitions leaving from
756              this state have the same back reference "assertion".  Instead
757              of reading the next character, we match the back reference. */
758           int so, eo, bt = trans_i->u.backref;
759           int bt_len;
760           int result;
761
762           /* Get the substring we need to match against.  Remember to
763              turn off REG_NOSUB temporarily. */
764           tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB,
765                           tnfa, tags, pos);
766           so = pmatch[bt].rm_so;
767           eo = pmatch[bt].rm_eo;
768           bt_len = eo - so;
769
770           result = strncmp((const char*)string + so, str_byte - 1,
771                                  (size_t)bt_len);
772
773           if (result == 0)
774             {
775               /* Back reference matched.  Check for infinite loop. */
776               if (bt_len == 0)
777                 empty_br_match = 1;
778               if (empty_br_match && states_seen[trans_i->state_id])
779                 {
780                   goto backtrack;
781                 }
782
783               states_seen[trans_i->state_id] = empty_br_match;
784
785               /* Advance in input string and resync `prev_c', `next_c'
786                  and pos. */
787               str_byte += bt_len - 1;
788               pos += bt_len - 1;
789               GET_NEXT_WCHAR();
790             }
791           else
792             {
793               goto backtrack;
794             }
795         }
796       else
797         {
798           /* Check for end of string. */
799           if (next_c == L'\0')
800                 goto backtrack;
801
802           /* Read the next character. */
803           GET_NEXT_WCHAR();
804         }
805
806       next_state = NULL;
807       for (trans_i = state; trans_i->state; trans_i++)
808         {
809           if (trans_i->code_min <= (tre_cint_t)prev_c
810               && trans_i->code_max >= (tre_cint_t)prev_c)
811             {
812               if (trans_i->assertions
813                   && (CHECK_ASSERTIONS(trans_i->assertions)
814                       || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)))
815                 {
816                   continue;
817                 }
818
819               if (next_state == NULL)
820                 {
821                   /* First matching transition. */
822                   next_state = trans_i->state;
823                   next_tags = trans_i->tags;
824                 }
825               else
826                 {
827                   /* Second matching transition.  We may need to backtrack here
828                      to take this transition instead of the first one, so we
829                      push this transition in the backtracking stack so we can
830                      jump back here if needed. */
831                   BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,
832                                 trans_i->state_id, next_c, tags, mbstate);
833                   {
834                     int *tmp;
835                     for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++)
836                       stack->item.tags[*tmp] = pos;
837                   }
838 #if 0 /* XXX - it's important not to look at all transitions here to keep
839          the stack small! */
840                   break;
841 #endif
842                 }
843             }
844         }
845
846       if (next_state != NULL)
847         {
848           /* Matching transitions were found.  Take the first one. */
849           state = next_state;
850
851           /* Update the tag values. */
852           if (next_tags)
853             while (*next_tags >= 0)
854               tags[*next_tags++] = pos;
855         }
856       else
857         {
858         backtrack:
859           /* A matching transition was not found.  Try to backtrack. */
860           if (stack->prev)
861             {
862               if (stack->item.state->assertions & ASSERT_BACKREF)
863                 {
864                   states_seen[stack->item.state_id] = 0;
865                 }
866
867               BT_STACK_POP();
868             }
869           else if (match_eo < 0)
870             {
871               /* Try starting from a later position in the input string. */
872               /* Check for end of string. */
873               if (next_c == L'\0')
874                     {
875                       break;
876                     }
877               next_c = next_c_start;
878 #ifdef TRE_MBSTATE
879               mbstate = mbstate_start;
880 #endif /* TRE_MBSTATE */
881               str_byte = str_byte_start;
882               goto retry;
883             }
884           else
885             {
886               break;
887             }
888         }
889     }
890
891   ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
892   *match_end_ofs = match_eo;
893
894  error_exit:
895   tre_bt_mem_destroy(mem);
896 #ifndef TRE_USE_ALLOCA
897   if (tags)
898     xfree(tags);
899   if (pmatch)
900     xfree(pmatch);
901   if (states_seen)
902     xfree(states_seen);
903 #endif /* !TRE_USE_ALLOCA */
904
905   return ret;
906 }
907
908 /***********************************************************************
909  from regexec.c
910 ***********************************************************************/
911
912 /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
913    endpoint values. */
914 static void
915 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
916                 const tre_tnfa_t *tnfa, int *tags, int match_eo)
917 {
918   tre_submatch_data_t *submatch_data;
919   unsigned int i, j;
920   int *parents;
921
922   i = 0;
923   if (match_eo >= 0 && !(cflags & REG_NOSUB))
924     {
925       /* Construct submatch offsets from the tags. */
926       submatch_data = tnfa->submatch_data;
927       while (i < tnfa->num_submatches && i < nmatch)
928         {
929           if (submatch_data[i].so_tag == tnfa->end_tag)
930             pmatch[i].rm_so = match_eo;
931           else
932             pmatch[i].rm_so = tags[submatch_data[i].so_tag];
933
934           if (submatch_data[i].eo_tag == tnfa->end_tag)
935             pmatch[i].rm_eo = match_eo;
936           else
937             pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
938
939           /* If either of the endpoints were not used, this submatch
940              was not part of the match. */
941           if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
942             pmatch[i].rm_so = pmatch[i].rm_eo = -1;
943
944           i++;
945         }
946       /* Reset all submatches that are not within all of their parent
947          submatches. */
948       i = 0;
949       while (i < tnfa->num_submatches && i < nmatch)
950         {
951           if (pmatch[i].rm_eo == -1)
952             assert(pmatch[i].rm_so == -1);
953           assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
954
955           parents = submatch_data[i].parents;
956           if (parents != NULL)
957             for (j = 0; parents[j] >= 0; j++)
958               {
959                 if (pmatch[i].rm_so < pmatch[parents[j]].rm_so
960                     || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
961                   pmatch[i].rm_so = pmatch[i].rm_eo = -1;
962               }
963           i++;
964         }
965     }
966
967   while (i < nmatch)
968     {
969       pmatch[i].rm_so = -1;
970       pmatch[i].rm_eo = -1;
971       i++;
972     }
973 }
974
975
976 /*
977   Wrapper functions for POSIX compatible regexp matching.
978 */
979
980 int
981 regexec(const regex_t *restrict preg, const char *restrict string,
982           size_t nmatch, regmatch_t pmatch[restrict], int eflags)
983 {
984   tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD;
985   reg_errcode_t status;
986   int *tags = NULL, eo;
987   if (tnfa->cflags & REG_NOSUB) nmatch = 0;
988   if (tnfa->num_tags > 0 && nmatch > 0)
989     {
990       tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
991       if (tags == NULL)
992         return REG_ESPACE;
993     }
994
995   /* Dispatch to the appropriate matcher. */
996   if (tnfa->have_backrefs)
997     {
998       /* The regex has back references, use the backtracking matcher. */
999       status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo);
1000     }
1001   else
1002     {
1003       /* Exact matching, no back references, use the parallel matcher. */
1004       status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo);
1005     }
1006
1007   if (status == REG_OK)
1008     /* A match was found, so fill the submatch registers. */
1009     tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
1010   if (tags)
1011     xfree(tags);
1012   return status;
1013 }