2 regexec.c - TRE POSIX compatible matching functions (and more).
4 Copyright (c) 2001-2006 Ville Laurikari <vl@iki.fi>.
6 This library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
16 You should have received a copy of the GNU Lesser General Public
17 License along with this library; if not, write to the Free Software
18 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
35 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
36 const tre_tnfa_t *tnfa, int *tags, int match_eo);
38 /***********************************************************************
39 from tre-match-utils.h
40 ***********************************************************************/
42 #define GET_NEXT_WCHAR() do { \
43 prev_c = next_c; pos += pos_add_next; \
44 if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX)) <= 0) { \
45 if (pos_add_next < 0) return REG_NOMATCH; \
46 else pos_add_next++; \
48 str_byte += pos_add_next; \
51 #define CHECK_ASSERTIONS(assertions) \
52 (((assertions & ASSERT_AT_BOL) \
53 && (pos > 0 || reg_notbol) \
54 && (prev_c != L'\n' || !reg_newline)) \
55 || ((assertions & ASSERT_AT_EOL) \
56 && (next_c != L'\0' || reg_noteol) \
57 && (next_c != L'\n' || !reg_newline)))
59 /* Returns 1 if `t1' wins `t2', 0 otherwise. */
61 tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions,
65 for (i = 0; i < num_tags; i++)
67 if (tag_directions[i] == TRE_TAG_MINIMIZE)
87 tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase)
89 DPRINT(("neg_char_classes_test: %p, %d, %d\n", classes, wc, icase));
90 while (*classes != (tre_ctype_t)0)
91 if ((!icase && tre_isctype(wc, *classes))
92 || (icase && (tre_isctype(tre_toupper(wc), *classes)
93 || tre_isctype(tre_tolower(wc), *classes))))
94 return 1; /* Match. */
97 return 0; /* No match. */
101 /***********************************************************************
102 from tre-match-parallel.c
103 ***********************************************************************/
106 This algorithm searches for matches basically by reading characters
107 in the searched string one by one, starting at the beginning. All
108 matching paths in the TNFA are traversed in parallel. When two or
109 more paths reach the same state, exactly one is chosen according to
110 tag ordering rules; if returning submatches is not required it does
111 not matter which path is chosen.
113 The worst case time required for finding the leftmost and longest
114 match, or determining that there is no match, is always linearly
115 dependent on the length of the text being searched.
117 This algorithm cannot handle TNFAs with back referencing nodes.
118 See `tre-match-backtrack.c'.
123 tre_tnfa_transition_t *state;
135 tre_print_reach(const tre_tnfa_t *tnfa, tre_tnfa_reach_t *reach, int num_tags)
139 while (reach->state != NULL)
141 DPRINT((" %p", (void *)reach->state));
145 for (i = 0; i < num_tags; i++)
147 DPRINT(("%d:%d", i, reach->tags[i]));
148 if (i < (num_tags-1))
157 #endif /* TRE_DEBUG */
160 tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, int len,
161 int *match_tags, int eflags, int *match_end_ofs)
163 /* State variables required by GET_NEXT_WCHAR. */
164 tre_char_t prev_c = 0, next_c = 0;
165 const char *str_byte = string;
167 int pos_add_next = 1;
170 #endif /* TRE_MBSTATE */
171 int reg_notbol = eflags & REG_NOTBOL;
172 int reg_noteol = eflags & REG_NOTEOL;
173 int reg_newline = tnfa->cflags & REG_NEWLINE;
176 tre_tnfa_transition_t *trans_i;
177 tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i;
178 tre_reach_pos_t *reach_pos;
182 int match_eo = -1; /* end offset of match (-1 if no match found yet) */
184 int *tmp_tags = NULL;
188 memset(&mbstate, '\0', sizeof(mbstate));
189 #endif /* TRE_MBSTATE */
194 num_tags = tnfa->num_tags;
196 /* Allocate memory for temporary data required for matching. This needs to
197 be done for every matching operation to be thread safe. This allocates
198 everything in a single large block from the stack frame using alloca()
199 or with malloc() if alloca is unavailable. */
201 int tbytes, rbytes, pbytes, xbytes, total_bytes;
203 /* Compute the length of the block we need. */
204 tbytes = sizeof(*tmp_tags) * num_tags;
205 rbytes = sizeof(*reach_next) * (tnfa->num_states + 1);
206 pbytes = sizeof(*reach_pos) * tnfa->num_states;
207 xbytes = sizeof(int) * num_tags;
209 (sizeof(long) - 1) * 4 /* for alignment paddings */
210 + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes;
212 /* Allocate the memory. */
213 #ifdef TRE_USE_ALLOCA
214 buf = alloca(total_bytes);
215 #else /* !TRE_USE_ALLOCA */
216 buf = xmalloc(total_bytes);
217 #endif /* !TRE_USE_ALLOCA */
220 memset(buf, 0, total_bytes);
222 /* Get the various pointers within tmp_buf (properly aligned). */
223 tmp_tags = (void *)buf;
224 tmp_buf = buf + tbytes;
225 tmp_buf += ALIGN(tmp_buf, long);
226 reach_next = (void *)tmp_buf;
228 tmp_buf += ALIGN(tmp_buf, long);
229 reach = (void *)tmp_buf;
231 tmp_buf += ALIGN(tmp_buf, long);
232 reach_pos = (void *)tmp_buf;
234 tmp_buf += ALIGN(tmp_buf, long);
235 for (i = 0; i < tnfa->num_states; i++)
237 reach[i].tags = (void *)tmp_buf;
239 reach_next[i].tags = (void *)tmp_buf;
244 for (i = 0; i < tnfa->num_states; i++)
245 reach_pos[i].pos = -1;
250 DPRINT(("length: %d\n", len));
251 DPRINT(("pos:chr/code | states and tags\n"));
252 DPRINT(("-------------+------------------------------------------------\n"));
254 reach_next_i = reach_next;
257 /* If no match found yet, add the initial states to `reach_next'. */
261 trans_i = tnfa->initial;
262 while (trans_i->state != NULL)
264 if (reach_pos[trans_i->state_id].pos < pos)
266 if (trans_i->assertions
267 && CHECK_ASSERTIONS(trans_i->assertions))
269 DPRINT(("assertion failed\n"));
274 DPRINT((" %p", (void *)trans_i->state));
275 reach_next_i->state = trans_i->state;
276 for (i = 0; i < num_tags; i++)
277 reach_next_i->tags[i] = -1;
278 tag_i = trans_i->tags;
282 if (*tag_i < num_tags)
283 reach_next_i->tags[*tag_i] = pos;
286 if (reach_next_i->state == tnfa->final)
288 DPRINT((" found empty match\n"));
291 for (i = 0; i < num_tags; i++)
292 match_tags[i] = reach_next_i->tags[i];
294 reach_pos[trans_i->state_id].pos = pos;
295 reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
301 reach_next_i->state = NULL;
305 if (num_tags == 0 || reach_next_i == reach_next)
306 /* We have found a match. */
310 /* Check for end of string. */
316 DPRINT(("%3d:%2lc/%05d |", pos - 1, (tre_cint_t)prev_c, (int)prev_c));
317 tre_print_reach(tnfa, reach_next, num_tags);
318 DPRINT(("%3d:%2lc/%05d |", pos, (tre_cint_t)next_c, (int)next_c));
319 tre_print_reach(tnfa, reach_next, num_tags);
320 #endif /* TRE_DEBUG */
322 /* Swap `reach' and `reach_next'. */
325 reach_next = reach_i;
327 /* For each state in `reach' see if there is a transition leaving with
328 the current input symbol to a state not yet in `reach_next', and
329 add the destination states to `reach_next'. */
330 reach_next_i = reach_next;
331 for (reach_i = reach; reach_i->state; reach_i++)
333 for (trans_i = reach_i->state; trans_i->state; trans_i++)
335 /* Does this transition match the input symbol? */
336 if (trans_i->code_min <= prev_c &&
337 trans_i->code_max >= prev_c)
339 if (trans_i->assertions
340 && (CHECK_ASSERTIONS(trans_i->assertions)
341 /* Handle character class transitions. */
342 || ((trans_i->assertions & ASSERT_CHAR_CLASS)
343 && !(tnfa->cflags & REG_ICASE)
344 && !tre_isctype((tre_cint_t)prev_c,
346 || ((trans_i->assertions & ASSERT_CHAR_CLASS)
347 && (tnfa->cflags & REG_ICASE)
348 && (!tre_isctype(tre_tolower((tre_cint_t)prev_c),
350 && !tre_isctype(tre_toupper((tre_cint_t)prev_c),
352 || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG)
353 && tre_neg_char_classes_match(trans_i->neg_classes,
355 tnfa->cflags & REG_ICASE))))
357 DPRINT(("assertion failed\n"));
361 /* Compute the tags after this transition. */
362 for (i = 0; i < num_tags; i++)
363 tmp_tags[i] = reach_i->tags[i];
364 tag_i = trans_i->tags;
368 if (*tag_i < num_tags)
369 tmp_tags[*tag_i] = pos;
373 if (reach_pos[trans_i->state_id].pos < pos)
375 /* Found an unvisited node. */
376 reach_next_i->state = trans_i->state;
377 tmp_iptr = reach_next_i->tags;
378 reach_next_i->tags = tmp_tags;
380 reach_pos[trans_i->state_id].pos = pos;
381 reach_pos[trans_i->state_id].tags = &reach_next_i->tags;
383 if (reach_next_i->state == tnfa->final
386 && reach_next_i->tags[0] <= match_tags[0])))
388 DPRINT((" found match %p\n", trans_i->state));
391 for (i = 0; i < num_tags; i++)
392 match_tags[i] = reach_next_i->tags[i];
399 assert(reach_pos[trans_i->state_id].pos == pos);
400 /* Another path has also reached this state. We choose
401 the winner by examining the tag values for both
403 if (tre_tag_order(num_tags, tnfa->tag_directions,
405 *reach_pos[trans_i->state_id].tags))
407 /* The new path wins. */
408 tmp_iptr = *reach_pos[trans_i->state_id].tags;
409 *reach_pos[trans_i->state_id].tags = tmp_tags;
410 if (trans_i->state == tnfa->final)
412 DPRINT((" found better match\n"));
415 for (i = 0; i < num_tags; i++)
416 match_tags[i] = tmp_tags[i];
424 reach_next_i->state = NULL;
427 DPRINT(("match end offset = %d\n", match_eo));
429 #ifndef TRE_USE_ALLOCA
432 #endif /* !TRE_USE_ALLOCA */
434 *match_end_ofs = match_eo;
435 return match_eo >= 0 ? REG_OK : REG_NOMATCH;
439 /***********************************************************************
440 from tre-match-backtrack.c
441 ***********************************************************************/
444 This matcher is for regexps that use back referencing. Regexp matching
445 with back referencing is an NP-complete problem on the number of back
446 references. The easiest way to match them is to use a backtracking
447 routine which basically goes through all possible paths in the TNFA
448 and chooses the one which results in the best (leftmost and longest)
449 match. This can be spectacularly expensive and may run out of stack
450 space, but there really is no better known generic algorithm. Quoting
451 Henry Spencer from comp.compilers:
452 <URL: http://compilers.iecc.com/comparch/article/93-03-102>
454 POSIX.2 REs require longest match, which is really exciting to
455 implement since the obsolete ("basic") variant also includes
456 \<digit>. I haven't found a better way of tackling this than doing
457 a preliminary match using a DFA (or simulation) on a modified RE
458 that just replicates subREs for \<digit>, and then doing a
459 backtracking match to determine whether the subRE matches were
460 right. This can be rather slow, but I console myself with the
461 thought that people who use \<digit> deserve very slow execution.
462 (Pun unintentional but very appropriate.)
468 const char *str_byte;
469 tre_tnfa_transition_t *state;
475 #endif /* TRE_MBSTATE */
476 } tre_backtrack_item_t;
478 typedef struct tre_backtrack_struct {
479 tre_backtrack_item_t item;
480 struct tre_backtrack_struct *prev;
481 struct tre_backtrack_struct *next;
485 #define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate)
486 #define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate
487 #else /* !TRE_MBSTATE */
488 #define BT_STACK_MBSTATE_IN
489 #define BT_STACK_MBSTATE_OUT
490 #endif /* !TRE_MBSTATE */
493 #ifdef TRE_USE_ALLOCA
494 #define tre_bt_mem_new tre_mem_newa
495 #define tre_bt_mem_alloc tre_mem_alloca
496 #define tre_bt_mem_destroy(obj) do { } while (0)
497 #else /* !TRE_USE_ALLOCA */
498 #define tre_bt_mem_new tre_mem_new
499 #define tre_bt_mem_alloc tre_mem_alloc
500 #define tre_bt_mem_destroy tre_mem_destroy
501 #endif /* !TRE_USE_ALLOCA */
504 #define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate) \
511 s = tre_bt_mem_alloc(mem, sizeof(*s)); \
514 tre_bt_mem_destroy(mem); \
520 xfree(states_seen); \
525 s->item.tags = tre_bt_mem_alloc(mem, \
526 sizeof(*tags) * tnfa->num_tags); \
529 tre_bt_mem_destroy(mem); \
535 xfree(states_seen); \
542 stack = stack->next; \
543 stack->item.pos = (_pos); \
544 stack->item.str_byte = (_str_byte); \
545 stack->item.state = (_state); \
546 stack->item.state_id = (_state_id); \
547 stack->item.next_c = (_next_c); \
548 for (i = 0; i < tnfa->num_tags; i++) \
549 stack->item.tags[i] = (_tags)[i]; \
550 BT_STACK_MBSTATE_IN; \
554 #define BT_STACK_POP() \
558 assert(stack->prev); \
559 pos = stack->item.pos; \
560 str_byte = stack->item.str_byte; \
561 state = stack->item.state; \
562 next_c = stack->item.next_c; \
563 for (i = 0; i < tnfa->num_tags; i++) \
564 tags[i] = stack->item.tags[i]; \
565 BT_STACK_MBSTATE_OUT; \
566 stack = stack->prev; \
571 #define MIN(a, b) ((a) <= (b) ? (a) : (b))
574 tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string,
575 int len, int *match_tags,
576 int eflags, int *match_end_ofs)
578 /* State variables required by GET_NEXT_WCHAR. */
579 tre_char_t prev_c = 0, next_c = 0;
580 const char *str_byte = string;
582 int pos_add_next = 1;
585 #endif /* TRE_MBSTATE */
586 int reg_notbol = eflags & REG_NOTBOL;
587 int reg_noteol = eflags & REG_NOTEOL;
588 int reg_newline = tnfa->cflags & REG_NEWLINE;
590 /* These are used to remember the necessary values of the above
591 variables to return to the position where the current search
594 const char *str_byte_start;
597 mbstate_t mbstate_start;
598 #endif /* TRE_MBSTATE */
600 /* Compilation flags for this regexp. */
601 int cflags = tnfa->cflags;
603 /* End offset of best match so far, or -1 if no match found yet. */
606 int *next_tags, *tags = NULL;
607 /* Current TNFA state. */
608 tre_tnfa_transition_t *state;
609 int *states_seen = NULL;
611 /* Memory allocator to for allocating the backtracking stack. */
612 tre_mem_t mem = tre_bt_mem_new();
614 /* The backtracking stack. */
615 tre_backtrack_t stack;
617 tre_tnfa_transition_t *trans_i;
618 regmatch_t *pmatch = NULL;
622 memset(&mbstate, '\0', sizeof(mbstate));
623 #endif /* TRE_MBSTATE */
627 stack = tre_bt_mem_alloc(mem, sizeof(*stack));
636 #ifdef TRE_USE_ALLOCA
637 tags = alloca(sizeof(*tags) * tnfa->num_tags);
638 pmatch = alloca(sizeof(*pmatch) * tnfa->num_submatches);
639 states_seen = alloca(sizeof(*states_seen) * tnfa->num_states);
640 #else /* !TRE_USE_ALLOCA */
641 tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
647 pmatch = xmalloc(sizeof(*pmatch) * tnfa->num_submatches);
653 states_seen = xmalloc(sizeof(*states_seen) * tnfa->num_states);
659 #endif /* !TRE_USE_ALLOCA */
664 for (i = 0; i < tnfa->num_tags; i++)
670 for (i = 0; i < tnfa->num_states; i++)
678 next_c_start = next_c;
679 str_byte_start = str_byte;
681 mbstate_start = mbstate;
682 #endif /* TRE_MBSTATE */
684 /* Handle initial states. */
686 for (trans_i = tnfa->initial; trans_i->state; trans_i++)
688 DPRINT(("> init %p, prev_c %lc\n", trans_i->state, (tre_cint_t)prev_c));
689 if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions))
691 DPRINT(("assert failed\n"));
696 /* Start from this state. */
697 state = trans_i->state;
698 next_tags = trans_i->tags;
702 /* Backtrack to this state. */
703 DPRINT(("saving state %d for backtracking\n", trans_i->state_id));
704 BT_STACK_PUSH(pos, str_byte, str_wide, trans_i->state,
705 trans_i->state_id, next_c, tags, mbstate);
707 int *tmp = trans_i->tags;
710 stack->item.tags[*tmp++] = pos;
716 for (; *next_tags >= 0; next_tags++)
717 tags[*next_tags] = pos;
720 DPRINT(("entering match loop, pos %d, str_byte %p\n", pos, str_byte));
721 DPRINT(("pos:chr/code | state and tags\n"));
722 DPRINT(("-------------+------------------------------------------------\n"));
729 tre_tnfa_transition_t *trans_i, *next_state;
732 DPRINT(("start loop\n"));
733 if (state == tnfa->final)
735 DPRINT((" match found, %d %d\n", match_eo, pos));
739 && tre_tag_order(tnfa->num_tags, tnfa->tag_directions,
743 /* This match wins the previous match. */
744 DPRINT((" win previous\n"));
747 for (i = 0; i < tnfa->num_tags; i++)
748 match_tags[i] = tags[i];
750 /* Our TNFAs never have transitions leaving from the final state,
751 so we jump right to backtracking. */
756 DPRINT(("%3d:%2lc/%05d | %p ", pos, (tre_cint_t)next_c, (int)next_c,
760 for (i = 0; i < tnfa->num_tags; i++)
761 DPRINT(("%d%s", tags[i], i < tnfa->num_tags - 1 ? ", " : ""));
764 #endif /* TRE_DEBUG */
766 /* Go to the next character in the input string. */
769 if (trans_i->state && trans_i->assertions & ASSERT_BACKREF)
771 /* This is a back reference state. All transitions leaving from
772 this state have the same back reference "assertion". Instead
773 of reading the next character, we match the back reference. */
774 int so, eo, bt = trans_i->u.backref;
778 DPRINT((" should match back reference %d\n", bt));
779 /* Get the substring we need to match against. Remember to
780 turn off REG_NOSUB temporarily. */
781 tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & !REG_NOSUB,
783 so = pmatch[bt].rm_so;
784 eo = pmatch[bt].rm_eo;
789 result = strncmp((char*)string + so, str_byte - 1, bt_len);
791 else if (len - pos < bt_len)
794 result = memcmp((char*)string + so, str_byte - 1, bt_len);
796 /* We can ignore multibyte characters here because the backref
797 string is already aligned at character boundaries. */
800 /* Back reference matched. Check for infinite loop. */
803 if (empty_br_match && states_seen[trans_i->state_id])
805 DPRINT((" avoid loop\n"));
809 states_seen[trans_i->state_id] = empty_br_match;
811 /* Advance in input string and resync `prev_c', `next_c'
813 DPRINT((" back reference matched\n"));
814 str_byte += bt_len - 1;
817 DPRINT((" pos now %d\n", pos));
821 DPRINT((" back reference did not match\n"));
827 /* Check for end of string. */
839 /* Read the next character. */
844 for (trans_i = state; trans_i->state; trans_i++)
846 DPRINT((" transition %d-%d (%c-%c) %d to %d\n",
847 trans_i->code_min, trans_i->code_max,
848 trans_i->code_min, trans_i->code_max,
849 trans_i->assertions, trans_i->state_id));
850 if (trans_i->code_min <= prev_c && trans_i->code_max >= prev_c)
852 if (trans_i->assertions
853 && (CHECK_ASSERTIONS(trans_i->assertions)
854 /* Handle character class transitions. */
855 || ((trans_i->assertions & ASSERT_CHAR_CLASS)
856 && !(cflags & REG_ICASE)
857 && !tre_isctype((tre_cint_t)prev_c, trans_i->u.class))
858 || ((trans_i->assertions & ASSERT_CHAR_CLASS)
859 && (cflags & REG_ICASE)
860 && (!tre_isctype(tre_tolower((tre_cint_t)prev_c),
862 && !tre_isctype(tre_toupper((tre_cint_t)prev_c),
864 || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG)
865 && tre_neg_char_classes_match(trans_i->neg_classes,
867 cflags & REG_ICASE))))
869 DPRINT((" assertion failed\n"));
873 if (next_state == NULL)
875 /* First matching transition. */
876 DPRINT((" Next state is %d\n", trans_i->state_id));
877 next_state = trans_i->state;
878 next_tags = trans_i->tags;
882 /* Second mathing transition. We may need to backtrack here
883 to take this transition instead of the first one, so we
884 push this transition in the backtracking stack so we can
885 jump back here if needed. */
886 DPRINT((" saving state %d for backtracking\n",
888 BT_STACK_PUSH(pos, str_byte, str_wide, trans_i->state,
889 trans_i->state_id, next_c, tags, mbstate);
892 for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++)
893 stack->item.tags[*tmp] = pos;
895 #if 0 /* XXX - it's important not to look at all transitions here to keep
903 if (next_state != NULL)
905 /* Matching transitions were found. Take the first one. */
908 /* Update the tag values. */
910 while (*next_tags >= 0)
911 tags[*next_tags++] = pos;
916 /* A matching transition was not found. Try to backtrack. */
919 DPRINT((" backtracking\n"));
920 if (stack->item.state->assertions & ASSERT_BACKREF)
922 DPRINT((" states_seen[%d] = 0\n",
923 stack->item.state_id));
924 states_seen[stack->item.state_id] = 0;
929 else if (match_eo < 0)
931 /* Try starting from a later position in the input string. */
932 /* Check for end of string. */
937 DPRINT(("end of string.\n"));
945 DPRINT(("end of string.\n"));
949 DPRINT(("restarting from next start position\n"));
950 next_c = next_c_start;
952 mbstate = mbstate_start;
953 #endif /* TRE_MBSTATE */
954 str_byte = str_byte_start;
959 DPRINT(("finished\n"));
965 ret = match_eo >= 0 ? REG_OK : REG_NOMATCH;
966 *match_end_ofs = match_eo;
969 tre_bt_mem_destroy(mem);
970 #ifndef TRE_USE_ALLOCA
977 #endif /* !TRE_USE_ALLOCA */
983 /***********************************************************************
985 ***********************************************************************/
987 /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match
990 tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags,
991 const tre_tnfa_t *tnfa, int *tags, int match_eo)
993 tre_submatch_data_t *submatch_data;
998 if (match_eo >= 0 && !(cflags & REG_NOSUB))
1000 /* Construct submatch offsets from the tags. */
1001 DPRINT(("end tag = t%d = %d\n", tnfa->end_tag, match_eo));
1002 submatch_data = tnfa->submatch_data;
1003 while (i < tnfa->num_submatches && i < nmatch)
1005 if (submatch_data[i].so_tag == tnfa->end_tag)
1006 pmatch[i].rm_so = match_eo;
1008 pmatch[i].rm_so = tags[submatch_data[i].so_tag];
1010 if (submatch_data[i].eo_tag == tnfa->end_tag)
1011 pmatch[i].rm_eo = match_eo;
1013 pmatch[i].rm_eo = tags[submatch_data[i].eo_tag];
1015 /* If either of the endpoints were not used, this submatch
1016 was not part of the match. */
1017 if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1)
1018 pmatch[i].rm_so = pmatch[i].rm_eo = -1;
1020 DPRINT(("pmatch[%d] = {t%d = %d, t%d = %d}\n", i,
1021 submatch_data[i].so_tag, pmatch[i].rm_so,
1022 submatch_data[i].eo_tag, pmatch[i].rm_eo));
1025 /* Reset all submatches that are not within all of their parent
1028 while (i < tnfa->num_submatches && i < nmatch)
1030 if (pmatch[i].rm_eo == -1)
1031 assert(pmatch[i].rm_so == -1);
1032 assert(pmatch[i].rm_so <= pmatch[i].rm_eo);
1034 parents = submatch_data[i].parents;
1035 if (parents != NULL)
1036 for (j = 0; parents[j] >= 0; j++)
1038 DPRINT(("pmatch[%d] parent %d\n", i, parents[j]));
1039 if (pmatch[i].rm_so < pmatch[parents[j]].rm_so
1040 || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo)
1041 pmatch[i].rm_so = pmatch[i].rm_eo = -1;
1049 pmatch[i].rm_so = -1;
1050 pmatch[i].rm_eo = -1;
1057 Wrapper functions for POSIX compatible regexp matching.
1061 tre_match(const tre_tnfa_t *tnfa, const void *string, size_t len,
1062 size_t nmatch, regmatch_t pmatch[], int eflags)
1064 reg_errcode_t status;
1065 int *tags = NULL, eo;
1066 if (tnfa->num_tags > 0 && nmatch > 0)
1068 #ifdef TRE_USE_ALLOCA
1069 tags = alloca(sizeof(*tags) * tnfa->num_tags);
1070 #else /* !TRE_USE_ALLOCA */
1071 tags = xmalloc(sizeof(*tags) * tnfa->num_tags);
1072 #endif /* !TRE_USE_ALLOCA */
1077 /* Dispatch to the appropriate matcher. */
1078 if (tnfa->have_backrefs)
1080 /* The regex has back references, use the backtracking matcher. */
1081 status = tre_tnfa_run_backtrack(tnfa, string, len, tags, eflags, &eo);
1085 /* Exact matching, no back references, use the parallel matcher. */
1086 status = tre_tnfa_run_parallel(tnfa, string, len, tags, eflags, &eo);
1089 if (status == REG_OK)
1090 /* A match was found, so fill the submatch registers. */
1091 tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo);
1092 #ifndef TRE_USE_ALLOCA
1095 #endif /* !TRE_USE_ALLOCA */
1100 regexec(const regex_t *preg, const char *str,
1101 size_t nmatch, regmatch_t pmatch[], int eflags)
1103 return tre_match((void *)preg->TRE_REGEX_T_FIELD, str, -1,
1104 nmatch, pmatch, eflags);