File: | src/regex/regexec.c |
Warning: | line 781, column 9 Dereference of null pointer |
1 | /* | |||
2 | regexec.c - TRE POSIX compatible matching functions (and more). | |||
3 | ||||
4 | Copyright (c) 2001-2009 Ville Laurikari <vl@iki.fi> | |||
5 | All rights reserved. | |||
6 | ||||
7 | Redistribution and use in source and binary forms, with or without | |||
8 | modification, are permitted provided that the following conditions | |||
9 | are met: | |||
10 | ||||
11 | 1. Redistributions of source code must retain the above copyright | |||
12 | notice, this list of conditions and the following disclaimer. | |||
13 | ||||
14 | 2. Redistributions in binary form must reproduce the above copyright | |||
15 | notice, this list of conditions and the following disclaimer in the | |||
16 | documentation and/or other materials provided with the distribution. | |||
17 | ||||
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS | |||
19 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |||
20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |||
21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |||
22 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |||
23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |||
24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |||
25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |||
26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |||
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |||
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |||
29 | ||||
30 | */ | |||
31 | ||||
32 | #include <stdlib.h> | |||
33 | #include <string.h> | |||
34 | #include <wchar.h> | |||
35 | #include <wctype.h> | |||
36 | #include <limits.h> | |||
37 | #include <stdint.h> | |||
38 | ||||
39 | #include <regex.h> | |||
40 | ||||
41 | #include "tre.h" | |||
42 | ||||
43 | #include <assert.h> | |||
44 | ||||
45 | static void | |||
46 | tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, | |||
47 | const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo); | |||
48 | ||||
49 | /*********************************************************************** | |||
50 | from tre-match-utils.h | |||
51 | ***********************************************************************/ | |||
52 | ||||
53 | #define GET_NEXT_WCHAR()do { prev_c = next_c; pos += pos_add_next; if ((pos_add_next = mbtowc(&next_c, str_byte, 4)) <= 0) { if (pos_add_next < 0) { ret = 1; goto error_exit; } else pos_add_next++; } str_byte += pos_add_next; } while (0) do { \ | |||
54 | prev_c = next_c; pos += pos_add_next; \ | |||
55 | if ((pos_add_next = mbtowc(&next_c, str_byte, MB_LEN_MAX4)) <= 0) { \ | |||
56 | if (pos_add_next < 0) { ret = REG_NOMATCH1; goto error_exit; } \ | |||
57 | else pos_add_next++; \ | |||
58 | } \ | |||
59 | str_byte += pos_add_next; \ | |||
60 | } while (0) | |||
61 | ||||
62 | #define IS_WORD_CHAR(c)((c) == L'_' || iswalnum(c)) ((c) == L'_' || tre_isalnumiswalnum(c)) | |||
63 | ||||
64 | #define CHECK_ASSERTIONS(assertions)(((assertions & 1) && (pos > 0 || reg_notbol) && (prev_c != L'\n' || !reg_newline)) || ((assertions & 2) && (next_c != L'\0' || reg_noteol) && (next_c != L'\n' || !reg_newline)) || ((assertions & 16) && (((prev_c ) == L'_' || iswalnum(prev_c)) || !((next_c) == L'_' || iswalnum (next_c)))) || ((assertions & 32) && (!((prev_c) == L'_' || iswalnum(prev_c)) || ((next_c) == L'_' || iswalnum(next_c )))) || ((assertions & 64) && (pos != 0 && next_c != L'\0' && ((prev_c) == L'_' || iswalnum(prev_c )) == ((next_c) == L'_' || iswalnum(next_c)))) || ((assertions & 128) && (pos == 0 || next_c == L'\0' || ((prev_c ) == L'_' || iswalnum(prev_c)) != ((next_c) == L'_' || iswalnum (next_c))))) \ | |||
65 | (((assertions & ASSERT_AT_BOL1) \ | |||
66 | && (pos > 0 || reg_notbol) \ | |||
67 | && (prev_c != L'\n' || !reg_newline)) \ | |||
68 | || ((assertions & ASSERT_AT_EOL2) \ | |||
69 | && (next_c != L'\0' || reg_noteol) \ | |||
70 | && (next_c != L'\n' || !reg_newline)) \ | |||
71 | || ((assertions & ASSERT_AT_BOW16) \ | |||
72 | && (IS_WORD_CHAR(prev_c)((prev_c) == L'_' || iswalnum(prev_c)) || !IS_WORD_CHAR(next_c)((next_c) == L'_' || iswalnum(next_c)))) \ | |||
73 | || ((assertions & ASSERT_AT_EOW32) \ | |||
74 | && (!IS_WORD_CHAR(prev_c)((prev_c) == L'_' || iswalnum(prev_c)) || IS_WORD_CHAR(next_c)((next_c) == L'_' || iswalnum(next_c)))) \ | |||
75 | || ((assertions & ASSERT_AT_WB64) \ | |||
76 | && (pos != 0 && next_c != L'\0' \ | |||
77 | && IS_WORD_CHAR(prev_c)((prev_c) == L'_' || iswalnum(prev_c)) == IS_WORD_CHAR(next_c)((next_c) == L'_' || iswalnum(next_c)))) \ | |||
78 | || ((assertions & ASSERT_AT_WB_NEG128) \ | |||
79 | && (pos == 0 || next_c == L'\0' \ | |||
80 | || IS_WORD_CHAR(prev_c)((prev_c) == L'_' || iswalnum(prev_c)) != IS_WORD_CHAR(next_c)((next_c) == L'_' || iswalnum(next_c))))) | |||
81 | ||||
82 | #define CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)(((trans_i->assertions & 4) && !(tnfa->cflags & 2) && !iswctype((tre_cint_t)prev_c, trans_i-> u.class)) || ((trans_i->assertions & 4) && (tnfa ->cflags & 2) && !iswctype(towlower((tre_cint_t )prev_c),trans_i->u.class) && !iswctype(towupper(( tre_cint_t)prev_c),trans_i->u.class)) || ((trans_i->assertions & 8) && tre_neg_char_classes_match(trans_i->neg_classes ,(tre_cint_t)prev_c, tnfa->cflags & 2))) \ | |||
83 | (((trans_i->assertions & ASSERT_CHAR_CLASS4) \ | |||
84 | && !(tnfa->cflags & REG_ICASE2) \ | |||
85 | && !tre_isctypeiswctype((tre_cint_t)prev_c, trans_i->u.class)) \ | |||
86 | || ((trans_i->assertions & ASSERT_CHAR_CLASS4) \ | |||
87 | && (tnfa->cflags & REG_ICASE2) \ | |||
88 | && !tre_isctypeiswctype(tre_tolowertowlower((tre_cint_t)prev_c),trans_i->u.class) \ | |||
89 | && !tre_isctypeiswctype(tre_touppertowupper((tre_cint_t)prev_c),trans_i->u.class)) \ | |||
90 | || ((trans_i->assertions & ASSERT_CHAR_CLASS_NEG8) \ | |||
91 | && tre_neg_char_classes_match(trans_i->neg_classes,(tre_cint_t)prev_c,\ | |||
92 | tnfa->cflags & REG_ICASE2))) | |||
93 | ||||
94 | ||||
95 | ||||
96 | ||||
97 | /* Returns 1 if `t1' wins `t2', 0 otherwise. */ | |||
98 | static int | |||
99 | tre_tag_order(int num_tags, tre_tag_direction_t *tag_directions, | |||
100 | regoff_t *t1, regoff_t *t2) | |||
101 | { | |||
102 | int i; | |||
103 | for (i = 0; i < num_tags; i++) | |||
104 | { | |||
105 | if (tag_directions[i] == TRE_TAG_MINIMIZE) | |||
106 | { | |||
107 | if (t1[i] < t2[i]) | |||
108 | return 1; | |||
109 | if (t1[i] > t2[i]) | |||
110 | return 0; | |||
111 | } | |||
112 | else | |||
113 | { | |||
114 | if (t1[i] > t2[i]) | |||
115 | return 1; | |||
116 | if (t1[i] < t2[i]) | |||
117 | return 0; | |||
118 | } | |||
119 | } | |||
120 | /* assert(0);*/ | |||
121 | return 0; | |||
122 | } | |||
123 | ||||
124 | static int | |||
125 | tre_neg_char_classes_match(tre_ctype_t *classes, tre_cint_t wc, int icase) | |||
126 | { | |||
127 | while (*classes != (tre_ctype_t)0) | |||
128 | if ((!icase && tre_isctypeiswctype(wc, *classes)) | |||
129 | || (icase && (tre_isctypeiswctype(tre_touppertowupper(wc), *classes) | |||
130 | || tre_isctypeiswctype(tre_tolowertowlower(wc), *classes)))) | |||
131 | return 1; /* Match. */ | |||
132 | else | |||
133 | classes++; | |||
134 | return 0; /* No match. */ | |||
135 | } | |||
136 | ||||
137 | ||||
138 | /*********************************************************************** | |||
139 | from tre-match-parallel.c | |||
140 | ***********************************************************************/ | |||
141 | ||||
142 | /* | |||
143 | This algorithm searches for matches basically by reading characters | |||
144 | in the searched string one by one, starting at the beginning. All | |||
145 | matching paths in the TNFA are traversed in parallel. When two or | |||
146 | more paths reach the same state, exactly one is chosen according to | |||
147 | tag ordering rules; if returning submatches is not required it does | |||
148 | not matter which path is chosen. | |||
149 | ||||
150 | The worst case time required for finding the leftmost and longest | |||
151 | match, or determining that there is no match, is always linearly | |||
152 | dependent on the length of the text being searched. | |||
153 | ||||
154 | This algorithm cannot handle TNFAs with back referencing nodes. | |||
155 | See `tre-match-backtrack.c'. | |||
156 | */ | |||
157 | ||||
158 | typedef struct { | |||
159 | tre_tnfa_transition_t *state; | |||
160 | regoff_t *tags; | |||
161 | } tre_tnfa_reach_t; | |||
162 | ||||
163 | typedef struct { | |||
164 | regoff_t pos; | |||
165 | regoff_t **tags; | |||
166 | } tre_reach_pos_t; | |||
167 | ||||
168 | ||||
169 | static reg_errcode_t | |||
170 | tre_tnfa_run_parallel(const tre_tnfa_t *tnfa, const void *string, | |||
171 | regoff_t *match_tags, int eflags, | |||
172 | regoff_t *match_end_ofs) | |||
173 | { | |||
174 | /* State variables required by GET_NEXT_WCHAR. */ | |||
175 | tre_char_t prev_c = 0, next_c = 0; | |||
176 | const char *str_byte = string; | |||
177 | regoff_t pos = -1; | |||
178 | regoff_t pos_add_next = 1; | |||
179 | #ifdef TRE_MBSTATE | |||
180 | mbstate_t mbstate; | |||
181 | #endif /* TRE_MBSTATE */ | |||
182 | int reg_notbol = eflags & REG_NOTBOL1; | |||
183 | int reg_noteol = eflags & REG_NOTEOL2; | |||
184 | int reg_newline = tnfa->cflags & REG_NEWLINE4; | |||
185 | reg_errcode_t ret; | |||
186 | ||||
187 | char *buf; | |||
188 | tre_tnfa_transition_t *trans_i; | |||
189 | tre_tnfa_reach_t *reach, *reach_next, *reach_i, *reach_next_i; | |||
190 | tre_reach_pos_t *reach_pos; | |||
191 | int *tag_i; | |||
192 | int num_tags, i; | |||
193 | ||||
194 | regoff_t match_eo = -1; /* end offset of match (-1 if no match found yet) */ | |||
195 | int new_match = 0; | |||
196 | regoff_t *tmp_tags = NULL((void*)0); | |||
197 | regoff_t *tmp_iptr; | |||
198 | ||||
199 | #ifdef TRE_MBSTATE | |||
200 | memset(&mbstate, '\0', sizeof(mbstate)); | |||
201 | #endif /* TRE_MBSTATE */ | |||
202 | ||||
203 | if (!match_tags) | |||
204 | num_tags = 0; | |||
205 | else | |||
206 | num_tags = tnfa->num_tags; | |||
207 | ||||
208 | /* Allocate memory for temporary data required for matching. This needs to | |||
209 | be done for every matching operation to be thread safe. This allocates | |||
210 | everything in a single large block with calloc(). */ | |||
211 | { | |||
212 | size_t tbytes, rbytes, pbytes, xbytes, total_bytes; | |||
213 | char *tmp_buf; | |||
214 | ||||
215 | /* Ensure that tbytes and xbytes*num_states cannot overflow, and that | |||
216 | * they don't contribute more than 1/8 of SIZE_MAX to total_bytes. */ | |||
217 | if (num_tags > SIZE_MAX(0xffffffffffffffffu)/(8 * sizeof(regoff_t) * tnfa->num_states)) | |||
218 | goto error_exit; | |||
219 | ||||
220 | /* Likewise check rbytes. */ | |||
221 | if (tnfa->num_states+1 > SIZE_MAX(0xffffffffffffffffu)/(8 * sizeof(*reach_next))) | |||
222 | goto error_exit; | |||
223 | ||||
224 | /* Likewise check pbytes. */ | |||
225 | if (tnfa->num_states > SIZE_MAX(0xffffffffffffffffu)/(8 * sizeof(*reach_pos))) | |||
226 | goto error_exit; | |||
227 | ||||
228 | /* Compute the length of the block we need. */ | |||
229 | tbytes = sizeof(*tmp_tags) * num_tags; | |||
230 | rbytes = sizeof(*reach_next) * (tnfa->num_states + 1); | |||
231 | pbytes = sizeof(*reach_pos) * tnfa->num_states; | |||
232 | xbytes = sizeof(regoff_t) * num_tags; | |||
233 | total_bytes = | |||
234 | (sizeof(long) - 1) * 4 /* for alignment paddings */ | |||
235 | + (rbytes + xbytes * tnfa->num_states) * 2 + tbytes + pbytes; | |||
236 | ||||
237 | /* Allocate the memory. */ | |||
238 | buf = calloc(total_bytes, 1); | |||
239 | if (buf == NULL((void*)0)) | |||
240 | return REG_ESPACE12; | |||
241 | ||||
242 | /* Get the various pointers within tmp_buf (properly aligned). */ | |||
243 | tmp_tags = (void *)buf; | |||
244 | tmp_buf = buf + tbytes; | |||
245 | tmp_buf += ALIGN(tmp_buf, long)((((long)tmp_buf) % sizeof(long)) ? (sizeof(long) - (((long)tmp_buf ) % sizeof(long))) : 0); | |||
246 | reach_next = (void *)tmp_buf; | |||
247 | tmp_buf += rbytes; | |||
248 | tmp_buf += ALIGN(tmp_buf, long)((((long)tmp_buf) % sizeof(long)) ? (sizeof(long) - (((long)tmp_buf ) % sizeof(long))) : 0); | |||
249 | reach = (void *)tmp_buf; | |||
250 | tmp_buf += rbytes; | |||
251 | tmp_buf += ALIGN(tmp_buf, long)((((long)tmp_buf) % sizeof(long)) ? (sizeof(long) - (((long)tmp_buf ) % sizeof(long))) : 0); | |||
252 | reach_pos = (void *)tmp_buf; | |||
253 | tmp_buf += pbytes; | |||
254 | tmp_buf += ALIGN(tmp_buf, long)((((long)tmp_buf) % sizeof(long)) ? (sizeof(long) - (((long)tmp_buf ) % sizeof(long))) : 0); | |||
255 | for (i = 0; i < tnfa->num_states; i++) | |||
256 | { | |||
257 | reach[i].tags = (void *)tmp_buf; | |||
258 | tmp_buf += xbytes; | |||
259 | reach_next[i].tags = (void *)tmp_buf; | |||
260 | tmp_buf += xbytes; | |||
261 | } | |||
262 | } | |||
263 | ||||
264 | for (i = 0; i < tnfa->num_states; i++) | |||
265 | reach_pos[i].pos = -1; | |||
266 | ||||
267 | GET_NEXT_WCHAR()do { prev_c = next_c; pos += pos_add_next; if ((pos_add_next = mbtowc(&next_c, str_byte, 4)) <= 0) { if (pos_add_next < 0) { ret = 1; goto error_exit; } else pos_add_next++; } str_byte += pos_add_next; } while (0); | |||
268 | pos = 0; | |||
269 | ||||
270 | reach_next_i = reach_next; | |||
271 | while (1) | |||
272 | { | |||
273 | /* If no match found yet, add the initial states to `reach_next'. */ | |||
274 | if (match_eo < 0) | |||
275 | { | |||
276 | trans_i = tnfa->initial; | |||
277 | while (trans_i->state != NULL((void*)0)) | |||
278 | { | |||
279 | if (reach_pos[trans_i->state_id].pos < pos) | |||
280 | { | |||
281 | if (trans_i->assertions | |||
282 | && CHECK_ASSERTIONS(trans_i->assertions)(((trans_i->assertions & 1) && (pos > 0 || reg_notbol ) && (prev_c != L'\n' || !reg_newline)) || ((trans_i-> assertions & 2) && (next_c != L'\0' || reg_noteol ) && (next_c != L'\n' || !reg_newline)) || ((trans_i-> assertions & 16) && (((prev_c) == L'_' || iswalnum (prev_c)) || !((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i ->assertions & 32) && (!((prev_c) == L'_' || iswalnum (prev_c)) || ((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i ->assertions & 64) && (pos != 0 && next_c != L'\0' && ((prev_c) == L'_' || iswalnum(prev_c)) == ((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i->assertions & 128) && (pos == 0 || next_c == L'\0' || ((prev_c ) == L'_' || iswalnum(prev_c)) != ((next_c) == L'_' || iswalnum (next_c)))))) | |||
283 | { | |||
284 | trans_i++; | |||
285 | continue; | |||
286 | } | |||
287 | ||||
288 | reach_next_i->state = trans_i->state; | |||
289 | for (i = 0; i < num_tags; i++) | |||
290 | reach_next_i->tags[i] = -1; | |||
291 | tag_i = trans_i->tags; | |||
292 | if (tag_i) | |||
293 | while (*tag_i >= 0) | |||
294 | { | |||
295 | if (*tag_i < num_tags) | |||
296 | reach_next_i->tags[*tag_i] = pos; | |||
297 | tag_i++; | |||
298 | } | |||
299 | if (reach_next_i->state == tnfa->final) | |||
300 | { | |||
301 | match_eo = pos; | |||
302 | new_match = 1; | |||
303 | for (i = 0; i < num_tags; i++) | |||
304 | match_tags[i] = reach_next_i->tags[i]; | |||
305 | } | |||
306 | reach_pos[trans_i->state_id].pos = pos; | |||
307 | reach_pos[trans_i->state_id].tags = &reach_next_i->tags; | |||
308 | reach_next_i++; | |||
309 | } | |||
310 | trans_i++; | |||
311 | } | |||
312 | reach_next_i->state = NULL((void*)0); | |||
313 | } | |||
314 | else | |||
315 | { | |||
316 | if (num_tags == 0 || reach_next_i == reach_next) | |||
317 | /* We have found a match. */ | |||
318 | break; | |||
319 | } | |||
320 | ||||
321 | /* Check for end of string. */ | |||
322 | if (!next_c) break; | |||
323 | ||||
324 | GET_NEXT_WCHAR()do { prev_c = next_c; pos += pos_add_next; if ((pos_add_next = mbtowc(&next_c, str_byte, 4)) <= 0) { if (pos_add_next < 0) { ret = 1; goto error_exit; } else pos_add_next++; } str_byte += pos_add_next; } while (0); | |||
325 | ||||
326 | /* Swap `reach' and `reach_next'. */ | |||
327 | reach_i = reach; | |||
328 | reach = reach_next; | |||
329 | reach_next = reach_i; | |||
330 | ||||
331 | /* For each state in `reach', weed out states that don't fulfill the | |||
332 | minimal matching conditions. */ | |||
333 | if (tnfa->num_minimals && new_match) | |||
334 | { | |||
335 | new_match = 0; | |||
336 | reach_next_i = reach_next; | |||
337 | for (reach_i = reach; reach_i->state; reach_i++) | |||
338 | { | |||
339 | int skip = 0; | |||
340 | for (i = 0; tnfa->minimal_tags[i] >= 0; i += 2) | |||
341 | { | |||
342 | int end = tnfa->minimal_tags[i]; | |||
343 | int start = tnfa->minimal_tags[i + 1]; | |||
344 | if (end >= num_tags) | |||
345 | { | |||
346 | skip = 1; | |||
347 | break; | |||
348 | } | |||
349 | else if (reach_i->tags[start] == match_tags[start] | |||
350 | && reach_i->tags[end] < match_tags[end]) | |||
351 | { | |||
352 | skip = 1; | |||
353 | break; | |||
354 | } | |||
355 | } | |||
356 | if (!skip) | |||
357 | { | |||
358 | reach_next_i->state = reach_i->state; | |||
359 | tmp_iptr = reach_next_i->tags; | |||
360 | reach_next_i->tags = reach_i->tags; | |||
361 | reach_i->tags = tmp_iptr; | |||
362 | reach_next_i++; | |||
363 | } | |||
364 | } | |||
365 | reach_next_i->state = NULL((void*)0); | |||
366 | ||||
367 | /* Swap `reach' and `reach_next'. */ | |||
368 | reach_i = reach; | |||
369 | reach = reach_next; | |||
370 | reach_next = reach_i; | |||
371 | } | |||
372 | ||||
373 | /* For each state in `reach' see if there is a transition leaving with | |||
374 | the current input symbol to a state not yet in `reach_next', and | |||
375 | add the destination states to `reach_next'. */ | |||
376 | reach_next_i = reach_next; | |||
377 | for (reach_i = reach; reach_i->state; reach_i++) | |||
378 | { | |||
379 | for (trans_i = reach_i->state; trans_i->state; trans_i++) | |||
380 | { | |||
381 | /* Does this transition match the input symbol? */ | |||
382 | if (trans_i->code_min <= (tre_cint_t)prev_c && | |||
383 | trans_i->code_max >= (tre_cint_t)prev_c) | |||
384 | { | |||
385 | if (trans_i->assertions | |||
386 | && (CHECK_ASSERTIONS(trans_i->assertions)(((trans_i->assertions & 1) && (pos > 0 || reg_notbol ) && (prev_c != L'\n' || !reg_newline)) || ((trans_i-> assertions & 2) && (next_c != L'\0' || reg_noteol ) && (next_c != L'\n' || !reg_newline)) || ((trans_i-> assertions & 16) && (((prev_c) == L'_' || iswalnum (prev_c)) || !((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i ->assertions & 32) && (!((prev_c) == L'_' || iswalnum (prev_c)) || ((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i ->assertions & 64) && (pos != 0 && next_c != L'\0' && ((prev_c) == L'_' || iswalnum(prev_c)) == ((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i->assertions & 128) && (pos == 0 || next_c == L'\0' || ((prev_c ) == L'_' || iswalnum(prev_c)) != ((next_c) == L'_' || iswalnum (next_c))))) | |||
387 | || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)(((trans_i->assertions & 4) && !(tnfa->cflags & 2) && !iswctype((tre_cint_t)prev_c, trans_i-> u.class)) || ((trans_i->assertions & 4) && (tnfa ->cflags & 2) && !iswctype(towlower((tre_cint_t )prev_c),trans_i->u.class) && !iswctype(towupper(( tre_cint_t)prev_c),trans_i->u.class)) || ((trans_i->assertions & 8) && tre_neg_char_classes_match(trans_i->neg_classes ,(tre_cint_t)prev_c, tnfa->cflags & 2))))) | |||
388 | { | |||
389 | continue; | |||
390 | } | |||
391 | ||||
392 | /* Compute the tags after this transition. */ | |||
393 | for (i = 0; i < num_tags; i++) | |||
394 | tmp_tags[i] = reach_i->tags[i]; | |||
395 | tag_i = trans_i->tags; | |||
396 | if (tag_i != NULL((void*)0)) | |||
397 | while (*tag_i >= 0) | |||
398 | { | |||
399 | if (*tag_i < num_tags) | |||
400 | tmp_tags[*tag_i] = pos; | |||
401 | tag_i++; | |||
402 | } | |||
403 | ||||
404 | if (reach_pos[trans_i->state_id].pos < pos) | |||
405 | { | |||
406 | /* Found an unvisited node. */ | |||
407 | reach_next_i->state = trans_i->state; | |||
408 | tmp_iptr = reach_next_i->tags; | |||
409 | reach_next_i->tags = tmp_tags; | |||
410 | tmp_tags = tmp_iptr; | |||
411 | reach_pos[trans_i->state_id].pos = pos; | |||
412 | reach_pos[trans_i->state_id].tags = &reach_next_i->tags; | |||
413 | ||||
414 | if (reach_next_i->state == tnfa->final | |||
415 | && (match_eo == -1 | |||
416 | || (num_tags > 0 | |||
417 | && reach_next_i->tags[0] <= match_tags[0]))) | |||
418 | { | |||
419 | match_eo = pos; | |||
420 | new_match = 1; | |||
421 | for (i = 0; i < num_tags; i++) | |||
422 | match_tags[i] = reach_next_i->tags[i]; | |||
423 | } | |||
424 | reach_next_i++; | |||
425 | ||||
426 | } | |||
427 | else | |||
428 | { | |||
429 | assert(reach_pos[trans_i->state_id].pos == pos)(void)0; | |||
430 | /* Another path has also reached this state. We choose | |||
431 | the winner by examining the tag values for both | |||
432 | paths. */ | |||
433 | if (tre_tag_order(num_tags, tnfa->tag_directions, | |||
434 | tmp_tags, | |||
435 | *reach_pos[trans_i->state_id].tags)) | |||
436 | { | |||
437 | /* The new path wins. */ | |||
438 | tmp_iptr = *reach_pos[trans_i->state_id].tags; | |||
439 | *reach_pos[trans_i->state_id].tags = tmp_tags; | |||
440 | if (trans_i->state == tnfa->final) | |||
441 | { | |||
442 | match_eo = pos; | |||
443 | new_match = 1; | |||
444 | for (i = 0; i < num_tags; i++) | |||
445 | match_tags[i] = tmp_tags[i]; | |||
446 | } | |||
447 | tmp_tags = tmp_iptr; | |||
448 | } | |||
449 | } | |||
450 | } | |||
451 | } | |||
452 | } | |||
453 | reach_next_i->state = NULL((void*)0); | |||
454 | } | |||
455 | ||||
456 | *match_end_ofs = match_eo; | |||
457 | ret = match_eo >= 0 ? REG_OK0 : REG_NOMATCH1; | |||
458 | error_exit: | |||
459 | xfreefree(buf); | |||
460 | return ret; | |||
461 | } | |||
462 | ||||
463 | ||||
464 | ||||
465 | /*********************************************************************** | |||
466 | from tre-match-backtrack.c | |||
467 | ***********************************************************************/ | |||
468 | ||||
469 | /* | |||
470 | This matcher is for regexps that use back referencing. Regexp matching | |||
471 | with back referencing is an NP-complete problem on the number of back | |||
472 | references. The easiest way to match them is to use a backtracking | |||
473 | routine which basically goes through all possible paths in the TNFA | |||
474 | and chooses the one which results in the best (leftmost and longest) | |||
475 | match. This can be spectacularly expensive and may run out of stack | |||
476 | space, but there really is no better known generic algorithm. Quoting | |||
477 | Henry Spencer from comp.compilers: | |||
478 | <URL: http://compilers.iecc.com/comparch/article/93-03-102> | |||
479 | ||||
480 | POSIX.2 REs require longest match, which is really exciting to | |||
481 | implement since the obsolete ("basic") variant also includes | |||
482 | \<digit>. I haven't found a better way of tackling this than doing | |||
483 | a preliminary match using a DFA (or simulation) on a modified RE | |||
484 | that just replicates subREs for \<digit>, and then doing a | |||
485 | backtracking match to determine whether the subRE matches were | |||
486 | right. This can be rather slow, but I console myself with the | |||
487 | thought that people who use \<digit> deserve very slow execution. | |||
488 | (Pun unintentional but very appropriate.) | |||
489 | ||||
490 | */ | |||
491 | ||||
492 | typedef struct { | |||
493 | regoff_t pos; | |||
494 | const char *str_byte; | |||
495 | tre_tnfa_transition_t *state; | |||
496 | int state_id; | |||
497 | int next_c; | |||
498 | regoff_t *tags; | |||
499 | #ifdef TRE_MBSTATE | |||
500 | mbstate_t mbstate; | |||
501 | #endif /* TRE_MBSTATE */ | |||
502 | } tre_backtrack_item_t; | |||
503 | ||||
504 | typedef struct tre_backtrack_struct { | |||
505 | tre_backtrack_item_t item; | |||
506 | struct tre_backtrack_struct *prev; | |||
507 | struct tre_backtrack_struct *next; | |||
508 | } *tre_backtrack_t; | |||
509 | ||||
510 | #ifdef TRE_MBSTATE | |||
511 | #define BT_STACK_MBSTATE_IN stack->item.mbstate = (mbstate) | |||
512 | #define BT_STACK_MBSTATE_OUT (mbstate) = stack->item.mbstate | |||
513 | #else /* !TRE_MBSTATE */ | |||
514 | #define BT_STACK_MBSTATE_IN | |||
515 | #define BT_STACK_MBSTATE_OUT | |||
516 | #endif /* !TRE_MBSTATE */ | |||
517 | ||||
518 | #define tre_bt_mem_newtre_mem_new tre_mem_new | |||
519 | #define tre_bt_mem_alloctre_mem_alloc tre_mem_alloc | |||
520 | #define tre_bt_mem_destroy__tre_mem_destroy tre_mem_destroy__tre_mem_destroy | |||
521 | ||||
522 | ||||
523 | #define BT_STACK_PUSH(_pos, _str_byte, _str_wide, _state, _state_id, _next_c, _tags, _mbstate)do { int i; if (!stack->next) { tre_backtrack_t s; s = __tre_mem_alloc_impl (mem, 0, ((void*)0), 0, sizeof(*s)); if (!s) { __tre_mem_destroy (mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen ) free(states_seen); return 12; } s->prev = stack; s->next = ((void*)0); s->item.tags = __tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*tags) * tnfa->num_tags); if (!s-> item.tags) { __tre_mem_destroy(mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen) free(states_seen); return 12; } stack->next = s; stack = s; } else stack = stack-> next; stack->item.pos = (_pos); stack->item.str_byte = ( _str_byte); stack->item.state = (_state); stack->item.state_id = (_state_id); stack->item.next_c = (_next_c); for (i = 0 ; i < tnfa->num_tags; i++) stack->item.tags[i] = (_tags )[i]; ; } while (0) \ | |||
524 | do \ | |||
525 | { \ | |||
526 | int i; \ | |||
527 | if (!stack->next) \ | |||
528 | { \ | |||
529 | tre_backtrack_t s; \ | |||
530 | s = tre_bt_mem_alloc(mem, sizeof(*s))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*s)); \ | |||
531 | if (!s) \ | |||
532 | { \ | |||
533 | tre_bt_mem_destroy__tre_mem_destroy(mem); \ | |||
534 | if (tags) \ | |||
535 | xfreefree(tags); \ | |||
536 | if (pmatch) \ | |||
537 | xfreefree(pmatch); \ | |||
538 | if (states_seen) \ | |||
539 | xfreefree(states_seen); \ | |||
540 | return REG_ESPACE12; \ | |||
541 | } \ | |||
542 | s->prev = stack; \ | |||
543 | s->next = NULL((void*)0); \ | |||
544 | s->item.tags = tre_bt_mem_alloc(mem, \__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*tags) * tnfa ->num_tags) | |||
545 | sizeof(*tags) * tnfa->num_tags)__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*tags) * tnfa ->num_tags); \ | |||
546 | if (!s->item.tags) \ | |||
547 | { \ | |||
548 | tre_bt_mem_destroy__tre_mem_destroy(mem); \ | |||
549 | if (tags) \ | |||
550 | xfreefree(tags); \ | |||
551 | if (pmatch) \ | |||
552 | xfreefree(pmatch); \ | |||
553 | if (states_seen) \ | |||
554 | xfreefree(states_seen); \ | |||
555 | return REG_ESPACE12; \ | |||
556 | } \ | |||
557 | stack->next = s; \ | |||
558 | stack = s; \ | |||
559 | } \ | |||
560 | else \ | |||
561 | stack = stack->next; \ | |||
562 | stack->item.pos = (_pos); \ | |||
563 | stack->item.str_byte = (_str_byte); \ | |||
564 | stack->item.state = (_state); \ | |||
565 | stack->item.state_id = (_state_id); \ | |||
566 | stack->item.next_c = (_next_c); \ | |||
567 | for (i = 0; i < tnfa->num_tags; i++) \ | |||
568 | stack->item.tags[i] = (_tags)[i]; \ | |||
569 | BT_STACK_MBSTATE_IN; \ | |||
570 | } \ | |||
571 | while (0) | |||
572 | ||||
573 | #define BT_STACK_POP()do { int i; (void)0; pos = stack->item.pos; str_byte = stack ->item.str_byte; state = stack->item.state; next_c = stack ->item.next_c; for (i = 0; i < tnfa->num_tags; i++) tags [i] = stack->item.tags[i]; ; stack = stack->prev; } while (0) \ | |||
574 | do \ | |||
575 | { \ | |||
576 | int i; \ | |||
577 | assert(stack->prev)(void)0; \ | |||
578 | pos = stack->item.pos; \ | |||
579 | str_byte = stack->item.str_byte; \ | |||
580 | state = stack->item.state; \ | |||
581 | next_c = stack->item.next_c; \ | |||
582 | for (i = 0; i < tnfa->num_tags; i++) \ | |||
583 | tags[i] = stack->item.tags[i]; \ | |||
584 | BT_STACK_MBSTATE_OUT; \ | |||
585 | stack = stack->prev; \ | |||
586 | } \ | |||
587 | while (0) | |||
588 | ||||
589 | #undef MIN | |||
590 | #define MIN(a, b)((a) <= (b) ? (a) : (b)) ((a) <= (b) ? (a) : (b)) | |||
591 | ||||
592 | static reg_errcode_t | |||
593 | tre_tnfa_run_backtrack(const tre_tnfa_t *tnfa, const void *string, | |||
594 | regoff_t *match_tags, int eflags, regoff_t *match_end_ofs) | |||
595 | { | |||
596 | /* State variables required by GET_NEXT_WCHAR. */ | |||
597 | tre_char_t prev_c = 0, next_c = 0; | |||
598 | const char *str_byte = string; | |||
599 | regoff_t pos = 0; | |||
600 | regoff_t pos_add_next = 1; | |||
601 | #ifdef TRE_MBSTATE | |||
602 | mbstate_t mbstate; | |||
603 | #endif /* TRE_MBSTATE */ | |||
604 | int reg_notbol = eflags & REG_NOTBOL1; | |||
605 | int reg_noteol = eflags & REG_NOTEOL2; | |||
606 | int reg_newline = tnfa->cflags & REG_NEWLINE4; | |||
607 | ||||
608 | /* These are used to remember the necessary values of the above | |||
609 | variables to return to the position where the current search | |||
610 | started from. */ | |||
611 | int next_c_start; | |||
612 | const char *str_byte_start; | |||
613 | regoff_t pos_start = -1; | |||
614 | #ifdef TRE_MBSTATE | |||
615 | mbstate_t mbstate_start; | |||
616 | #endif /* TRE_MBSTATE */ | |||
617 | ||||
618 | /* End offset of best match so far, or -1 if no match found yet. */ | |||
619 | regoff_t match_eo = -1; | |||
620 | /* Tag arrays. */ | |||
621 | int *next_tags; | |||
622 | regoff_t *tags = NULL((void*)0); | |||
623 | /* Current TNFA state. */ | |||
624 | tre_tnfa_transition_t *state; | |||
625 | int *states_seen = NULL((void*)0); | |||
626 | ||||
627 | /* Memory allocator to for allocating the backtracking stack. */ | |||
628 | tre_mem_t mem = tre_bt_mem_new()__tre_mem_new_impl(0, ((void*)0)); | |||
629 | ||||
630 | /* The backtracking stack. */ | |||
631 | tre_backtrack_t stack; | |||
632 | ||||
633 | tre_tnfa_transition_t *trans_i; | |||
634 | regmatch_t *pmatch = NULL((void*)0); | |||
| ||||
635 | int ret; | |||
636 | ||||
637 | #ifdef TRE_MBSTATE | |||
638 | memset(&mbstate, '\0', sizeof(mbstate)); | |||
639 | #endif /* TRE_MBSTATE */ | |||
640 | ||||
641 | if (!mem) | |||
642 | return REG_ESPACE12; | |||
643 | stack = tre_bt_mem_alloc(mem, sizeof(*stack))__tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*stack)); | |||
644 | if (!stack) | |||
645 | { | |||
646 | ret = REG_ESPACE12; | |||
647 | goto error_exit; | |||
648 | } | |||
649 | stack->prev = NULL((void*)0); | |||
650 | stack->next = NULL((void*)0); | |||
651 | ||||
652 | if (tnfa->num_tags) | |||
653 | { | |||
654 | tags = xmallocmalloc(sizeof(*tags) * tnfa->num_tags); | |||
655 | if (!tags) | |||
656 | { | |||
657 | ret = REG_ESPACE12; | |||
658 | goto error_exit; | |||
659 | } | |||
660 | } | |||
661 | if (tnfa->num_submatches) | |||
662 | { | |||
663 | pmatch = xmallocmalloc(sizeof(*pmatch) * tnfa->num_submatches); | |||
664 | if (!pmatch) | |||
665 | { | |||
666 | ret = REG_ESPACE12; | |||
667 | goto error_exit; | |||
668 | } | |||
669 | } | |||
670 | if (tnfa->num_states) | |||
671 | { | |||
672 | states_seen = xmallocmalloc(sizeof(*states_seen) * tnfa->num_states); | |||
673 | if (!states_seen) | |||
674 | { | |||
675 | ret = REG_ESPACE12; | |||
676 | goto error_exit; | |||
677 | } | |||
678 | } | |||
679 | ||||
680 | retry: | |||
681 | { | |||
682 | int i; | |||
683 | for (i = 0; i < tnfa->num_tags; i++) | |||
684 | { | |||
685 | tags[i] = -1; | |||
686 | if (match_tags) | |||
687 | match_tags[i] = -1; | |||
688 | } | |||
689 | for (i = 0; i < tnfa->num_states; i++) | |||
690 | states_seen[i] = 0; | |||
691 | } | |||
692 | ||||
693 | state = NULL((void*)0); | |||
694 | pos = pos_start; | |||
695 | GET_NEXT_WCHAR()do { prev_c = next_c; pos += pos_add_next; if ((pos_add_next = mbtowc(&next_c, str_byte, 4)) <= 0) { if (pos_add_next < 0) { ret = 1; goto error_exit; } else pos_add_next++; } str_byte += pos_add_next; } while (0); | |||
696 | pos_start = pos; | |||
697 | next_c_start = next_c; | |||
698 | str_byte_start = str_byte; | |||
699 | #ifdef TRE_MBSTATE | |||
700 | mbstate_start = mbstate; | |||
701 | #endif /* TRE_MBSTATE */ | |||
702 | ||||
703 | /* Handle initial states. */ | |||
704 | next_tags = NULL((void*)0); | |||
705 | for (trans_i = tnfa->initial; trans_i->state; trans_i++) | |||
706 | { | |||
707 | if (trans_i->assertions && CHECK_ASSERTIONS(trans_i->assertions)(((trans_i->assertions & 1) && (pos > 0 || reg_notbol ) && (prev_c != L'\n' || !reg_newline)) || ((trans_i-> assertions & 2) && (next_c != L'\0' || reg_noteol ) && (next_c != L'\n' || !reg_newline)) || ((trans_i-> assertions & 16) && (((prev_c) == L'_' || iswalnum (prev_c)) || !((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i ->assertions & 32) && (!((prev_c) == L'_' || iswalnum (prev_c)) || ((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i ->assertions & 64) && (pos != 0 && next_c != L'\0' && ((prev_c) == L'_' || iswalnum(prev_c)) == ((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i->assertions & 128) && (pos == 0 || next_c == L'\0' || ((prev_c ) == L'_' || iswalnum(prev_c)) != ((next_c) == L'_' || iswalnum (next_c)))))) | |||
708 | { | |||
709 | continue; | |||
710 | } | |||
711 | if (state == NULL((void*)0)) | |||
712 | { | |||
713 | /* Start from this state. */ | |||
714 | state = trans_i->state; | |||
715 | next_tags = trans_i->tags; | |||
716 | } | |||
717 | else | |||
718 | { | |||
719 | /* Backtrack to this state. */ | |||
720 | BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,do { int i; if (!stack->next) { tre_backtrack_t s; s = __tre_mem_alloc_impl (mem, 0, ((void*)0), 0, sizeof(*s)); if (!s) { __tre_mem_destroy (mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen ) free(states_seen); return 12; } s->prev = stack; s->next = ((void*)0); s->item.tags = __tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*tags) * tnfa->num_tags); if (!s-> item.tags) { __tre_mem_destroy(mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen) free(states_seen); return 12; } stack->next = s; stack = s; } else stack = stack-> next; stack->item.pos = (pos); stack->item.str_byte = ( str_byte); stack->item.state = (trans_i->state); stack-> item.state_id = (trans_i->state_id); stack->item.next_c = (next_c); for (i = 0; i < tnfa->num_tags; i++) stack ->item.tags[i] = (tags)[i]; ; } while (0) | |||
721 | trans_i->state_id, next_c, tags, mbstate)do { int i; if (!stack->next) { tre_backtrack_t s; s = __tre_mem_alloc_impl (mem, 0, ((void*)0), 0, sizeof(*s)); if (!s) { __tre_mem_destroy (mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen ) free(states_seen); return 12; } s->prev = stack; s->next = ((void*)0); s->item.tags = __tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*tags) * tnfa->num_tags); if (!s-> item.tags) { __tre_mem_destroy(mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen) free(states_seen); return 12; } stack->next = s; stack = s; } else stack = stack-> next; stack->item.pos = (pos); stack->item.str_byte = ( str_byte); stack->item.state = (trans_i->state); stack-> item.state_id = (trans_i->state_id); stack->item.next_c = (next_c); for (i = 0; i < tnfa->num_tags; i++) stack ->item.tags[i] = (tags)[i]; ; } while (0); | |||
722 | { | |||
723 | int *tmp = trans_i->tags; | |||
724 | if (tmp) | |||
725 | while (*tmp >= 0) | |||
726 | stack->item.tags[*tmp++] = pos; | |||
727 | } | |||
728 | } | |||
729 | } | |||
730 | ||||
731 | if (next_tags) | |||
732 | for (; *next_tags >= 0; next_tags++) | |||
733 | tags[*next_tags] = pos; | |||
734 | ||||
735 | ||||
736 | if (state == NULL((void*)0)) | |||
737 | goto backtrack; | |||
738 | ||||
739 | while (1) | |||
740 | { | |||
741 | tre_tnfa_transition_t *next_state; | |||
742 | int empty_br_match; | |||
743 | ||||
744 | if (state == tnfa->final) | |||
745 | { | |||
746 | if (match_eo < pos | |||
747 | || (match_eo == pos | |||
748 | && match_tags | |||
749 | && tre_tag_order(tnfa->num_tags, tnfa->tag_directions, | |||
750 | tags, match_tags))) | |||
751 | { | |||
752 | int i; | |||
753 | /* This match wins the previous match. */ | |||
754 | match_eo = pos; | |||
755 | if (match_tags) | |||
756 | for (i = 0; i < tnfa->num_tags; i++) | |||
757 | match_tags[i] = tags[i]; | |||
758 | } | |||
759 | /* Our TNFAs never have transitions leaving from the final state, | |||
760 | so we jump right to backtracking. */ | |||
761 | goto backtrack; | |||
762 | } | |||
763 | ||||
764 | /* Go to the next character in the input string. */ | |||
765 | empty_br_match = 0; | |||
766 | trans_i = state; | |||
767 | if (trans_i->state && trans_i->assertions & ASSERT_BACKREF256) | |||
768 | { | |||
769 | /* This is a back reference state. All transitions leaving from | |||
770 | this state have the same back reference "assertion". Instead | |||
771 | of reading the next character, we match the back reference. */ | |||
772 | regoff_t so, eo; | |||
773 | int bt = trans_i->u.backref; | |||
774 | regoff_t bt_len; | |||
775 | int result; | |||
776 | ||||
777 | /* Get the substring we need to match against. Remember to | |||
778 | turn off REG_NOSUB temporarily. */ | |||
779 | tre_fill_pmatch(bt + 1, pmatch, tnfa->cflags & ~REG_NOSUB8, | |||
780 | tnfa, tags, pos); | |||
781 | so = pmatch[bt].rm_so; | |||
| ||||
782 | eo = pmatch[bt].rm_eo; | |||
783 | bt_len = eo - so; | |||
784 | ||||
785 | result = strncmp((const char*)string + so, str_byte - 1, | |||
786 | (size_t)bt_len); | |||
787 | ||||
788 | if (result == 0) | |||
789 | { | |||
790 | /* Back reference matched. Check for infinite loop. */ | |||
791 | if (bt_len == 0) | |||
792 | empty_br_match = 1; | |||
793 | if (empty_br_match && states_seen[trans_i->state_id]) | |||
794 | { | |||
795 | goto backtrack; | |||
796 | } | |||
797 | ||||
798 | states_seen[trans_i->state_id] = empty_br_match; | |||
799 | ||||
800 | /* Advance in input string and resync `prev_c', `next_c' | |||
801 | and pos. */ | |||
802 | str_byte += bt_len - 1; | |||
803 | pos += bt_len - 1; | |||
804 | GET_NEXT_WCHAR()do { prev_c = next_c; pos += pos_add_next; if ((pos_add_next = mbtowc(&next_c, str_byte, 4)) <= 0) { if (pos_add_next < 0) { ret = 1; goto error_exit; } else pos_add_next++; } str_byte += pos_add_next; } while (0); | |||
805 | } | |||
806 | else | |||
807 | { | |||
808 | goto backtrack; | |||
809 | } | |||
810 | } | |||
811 | else | |||
812 | { | |||
813 | /* Check for end of string. */ | |||
814 | if (next_c == L'\0') | |||
815 | goto backtrack; | |||
816 | ||||
817 | /* Read the next character. */ | |||
818 | GET_NEXT_WCHAR()do { prev_c = next_c; pos += pos_add_next; if ((pos_add_next = mbtowc(&next_c, str_byte, 4)) <= 0) { if (pos_add_next < 0) { ret = 1; goto error_exit; } else pos_add_next++; } str_byte += pos_add_next; } while (0); | |||
819 | } | |||
820 | ||||
821 | next_state = NULL((void*)0); | |||
822 | for (trans_i = state; trans_i->state; trans_i++) | |||
823 | { | |||
824 | if (trans_i->code_min <= (tre_cint_t)prev_c | |||
825 | && trans_i->code_max >= (tre_cint_t)prev_c) | |||
826 | { | |||
827 | if (trans_i->assertions | |||
828 | && (CHECK_ASSERTIONS(trans_i->assertions)(((trans_i->assertions & 1) && (pos > 0 || reg_notbol ) && (prev_c != L'\n' || !reg_newline)) || ((trans_i-> assertions & 2) && (next_c != L'\0' || reg_noteol ) && (next_c != L'\n' || !reg_newline)) || ((trans_i-> assertions & 16) && (((prev_c) == L'_' || iswalnum (prev_c)) || !((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i ->assertions & 32) && (!((prev_c) == L'_' || iswalnum (prev_c)) || ((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i ->assertions & 64) && (pos != 0 && next_c != L'\0' && ((prev_c) == L'_' || iswalnum(prev_c)) == ((next_c) == L'_' || iswalnum(next_c)))) || ((trans_i->assertions & 128) && (pos == 0 || next_c == L'\0' || ((prev_c ) == L'_' || iswalnum(prev_c)) != ((next_c) == L'_' || iswalnum (next_c))))) | |||
829 | || CHECK_CHAR_CLASSES(trans_i, tnfa, eflags)(((trans_i->assertions & 4) && !(tnfa->cflags & 2) && !iswctype((tre_cint_t)prev_c, trans_i-> u.class)) || ((trans_i->assertions & 4) && (tnfa ->cflags & 2) && !iswctype(towlower((tre_cint_t )prev_c),trans_i->u.class) && !iswctype(towupper(( tre_cint_t)prev_c),trans_i->u.class)) || ((trans_i->assertions & 8) && tre_neg_char_classes_match(trans_i->neg_classes ,(tre_cint_t)prev_c, tnfa->cflags & 2))))) | |||
830 | { | |||
831 | continue; | |||
832 | } | |||
833 | ||||
834 | if (next_state == NULL((void*)0)) | |||
835 | { | |||
836 | /* First matching transition. */ | |||
837 | next_state = trans_i->state; | |||
838 | next_tags = trans_i->tags; | |||
839 | } | |||
840 | else | |||
841 | { | |||
842 | /* Second matching transition. We may need to backtrack here | |||
843 | to take this transition instead of the first one, so we | |||
844 | push this transition in the backtracking stack so we can | |||
845 | jump back here if needed. */ | |||
846 | BT_STACK_PUSH(pos, str_byte, 0, trans_i->state,do { int i; if (!stack->next) { tre_backtrack_t s; s = __tre_mem_alloc_impl (mem, 0, ((void*)0), 0, sizeof(*s)); if (!s) { __tre_mem_destroy (mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen ) free(states_seen); return 12; } s->prev = stack; s->next = ((void*)0); s->item.tags = __tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*tags) * tnfa->num_tags); if (!s-> item.tags) { __tre_mem_destroy(mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen) free(states_seen); return 12; } stack->next = s; stack = s; } else stack = stack-> next; stack->item.pos = (pos); stack->item.str_byte = ( str_byte); stack->item.state = (trans_i->state); stack-> item.state_id = (trans_i->state_id); stack->item.next_c = (next_c); for (i = 0; i < tnfa->num_tags; i++) stack ->item.tags[i] = (tags)[i]; ; } while (0) | |||
847 | trans_i->state_id, next_c, tags, mbstate)do { int i; if (!stack->next) { tre_backtrack_t s; s = __tre_mem_alloc_impl (mem, 0, ((void*)0), 0, sizeof(*s)); if (!s) { __tre_mem_destroy (mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen ) free(states_seen); return 12; } s->prev = stack; s->next = ((void*)0); s->item.tags = __tre_mem_alloc_impl(mem, 0, ((void*)0), 0, sizeof(*tags) * tnfa->num_tags); if (!s-> item.tags) { __tre_mem_destroy(mem); if (tags) free(tags); if (pmatch) free(pmatch); if (states_seen) free(states_seen); return 12; } stack->next = s; stack = s; } else stack = stack-> next; stack->item.pos = (pos); stack->item.str_byte = ( str_byte); stack->item.state = (trans_i->state); stack-> item.state_id = (trans_i->state_id); stack->item.next_c = (next_c); for (i = 0; i < tnfa->num_tags; i++) stack ->item.tags[i] = (tags)[i]; ; } while (0); | |||
848 | { | |||
849 | int *tmp; | |||
850 | for (tmp = trans_i->tags; tmp && *tmp >= 0; tmp++) | |||
851 | stack->item.tags[*tmp] = pos; | |||
852 | } | |||
853 | #if 0 /* XXX - it's important not to look at all transitions here to keep | |||
854 | the stack small! */ | |||
855 | break; | |||
856 | #endif | |||
857 | } | |||
858 | } | |||
859 | } | |||
860 | ||||
861 | if (next_state != NULL((void*)0)) | |||
862 | { | |||
863 | /* Matching transitions were found. Take the first one. */ | |||
864 | state = next_state; | |||
865 | ||||
866 | /* Update the tag values. */ | |||
867 | if (next_tags) | |||
868 | while (*next_tags >= 0) | |||
869 | tags[*next_tags++] = pos; | |||
870 | } | |||
871 | else | |||
872 | { | |||
873 | backtrack: | |||
874 | /* A matching transition was not found. Try to backtrack. */ | |||
875 | if (stack->prev) | |||
876 | { | |||
877 | if (stack->item.state->assertions & ASSERT_BACKREF256) | |||
878 | { | |||
879 | states_seen[stack->item.state_id] = 0; | |||
880 | } | |||
881 | ||||
882 | BT_STACK_POP()do { int i; (void)0; pos = stack->item.pos; str_byte = stack ->item.str_byte; state = stack->item.state; next_c = stack ->item.next_c; for (i = 0; i < tnfa->num_tags; i++) tags [i] = stack->item.tags[i]; ; stack = stack->prev; } while (0); | |||
883 | } | |||
884 | else if (match_eo < 0) | |||
885 | { | |||
886 | /* Try starting from a later position in the input string. */ | |||
887 | /* Check for end of string. */ | |||
888 | if (next_c == L'\0') | |||
889 | { | |||
890 | break; | |||
891 | } | |||
892 | next_c = next_c_start; | |||
893 | #ifdef TRE_MBSTATE | |||
894 | mbstate = mbstate_start; | |||
895 | #endif /* TRE_MBSTATE */ | |||
896 | str_byte = str_byte_start; | |||
897 | goto retry; | |||
898 | } | |||
899 | else | |||
900 | { | |||
901 | break; | |||
902 | } | |||
903 | } | |||
904 | } | |||
905 | ||||
906 | ret = match_eo >= 0 ? REG_OK0 : REG_NOMATCH1; | |||
907 | *match_end_ofs = match_eo; | |||
908 | ||||
909 | error_exit: | |||
910 | tre_bt_mem_destroy__tre_mem_destroy(mem); | |||
911 | #ifndef TRE_USE_ALLOCA | |||
912 | if (tags) | |||
913 | xfreefree(tags); | |||
914 | if (pmatch) | |||
915 | xfreefree(pmatch); | |||
916 | if (states_seen) | |||
917 | xfreefree(states_seen); | |||
918 | #endif /* !TRE_USE_ALLOCA */ | |||
919 | ||||
920 | return ret; | |||
921 | } | |||
922 | ||||
923 | /*********************************************************************** | |||
924 | from regexec.c | |||
925 | ***********************************************************************/ | |||
926 | ||||
927 | /* Fills the POSIX.2 regmatch_t array according to the TNFA tag and match | |||
928 | endpoint values. */ | |||
929 | static void | |||
930 | tre_fill_pmatch(size_t nmatch, regmatch_t pmatch[], int cflags, | |||
931 | const tre_tnfa_t *tnfa, regoff_t *tags, regoff_t match_eo) | |||
932 | { | |||
933 | tre_submatch_data_t *submatch_data; | |||
934 | unsigned int i, j; | |||
935 | int *parents; | |||
936 | ||||
937 | i = 0; | |||
938 | if (match_eo >= 0 && !(cflags & REG_NOSUB8)) | |||
939 | { | |||
940 | /* Construct submatch offsets from the tags. */ | |||
941 | submatch_data = tnfa->submatch_data; | |||
942 | while (i < tnfa->num_submatches && i < nmatch) | |||
943 | { | |||
944 | if (submatch_data[i].so_tag == tnfa->end_tag) | |||
945 | pmatch[i].rm_so = match_eo; | |||
946 | else | |||
947 | pmatch[i].rm_so = tags[submatch_data[i].so_tag]; | |||
948 | ||||
949 | if (submatch_data[i].eo_tag == tnfa->end_tag) | |||
950 | pmatch[i].rm_eo = match_eo; | |||
951 | else | |||
952 | pmatch[i].rm_eo = tags[submatch_data[i].eo_tag]; | |||
953 | ||||
954 | /* If either of the endpoints were not used, this submatch | |||
955 | was not part of the match. */ | |||
956 | if (pmatch[i].rm_so == -1 || pmatch[i].rm_eo == -1) | |||
957 | pmatch[i].rm_so = pmatch[i].rm_eo = -1; | |||
958 | ||||
959 | i++; | |||
960 | } | |||
961 | /* Reset all submatches that are not within all of their parent | |||
962 | submatches. */ | |||
963 | i = 0; | |||
964 | while (i < tnfa->num_submatches && i < nmatch) | |||
965 | { | |||
966 | if (pmatch[i].rm_eo == -1) | |||
967 | assert(pmatch[i].rm_so == -1)(void)0; | |||
968 | assert(pmatch[i].rm_so <= pmatch[i].rm_eo)(void)0; | |||
969 | ||||
970 | parents = submatch_data[i].parents; | |||
971 | if (parents != NULL((void*)0)) | |||
972 | for (j = 0; parents[j] >= 0; j++) | |||
973 | { | |||
974 | if (pmatch[i].rm_so < pmatch[parents[j]].rm_so | |||
975 | || pmatch[i].rm_eo > pmatch[parents[j]].rm_eo) | |||
976 | pmatch[i].rm_so = pmatch[i].rm_eo = -1; | |||
977 | } | |||
978 | i++; | |||
979 | } | |||
980 | } | |||
981 | ||||
982 | while (i < nmatch) | |||
983 | { | |||
984 | pmatch[i].rm_so = -1; | |||
985 | pmatch[i].rm_eo = -1; | |||
986 | i++; | |||
987 | } | |||
988 | } | |||
989 | ||||
990 | ||||
991 | /* | |||
992 | Wrapper functions for POSIX compatible regexp matching. | |||
993 | */ | |||
994 | ||||
995 | int | |||
996 | regexec(const regex_t *restrict preg, const char *restrict string, | |||
997 | size_t nmatch, regmatch_t pmatch[restrict], int eflags) | |||
998 | { | |||
999 | tre_tnfa_t *tnfa = (void *)preg->TRE_REGEX_T_FIELD__opaque; | |||
1000 | reg_errcode_t status; | |||
1001 | regoff_t *tags = NULL((void*)0), eo; | |||
1002 | if (tnfa->cflags & REG_NOSUB8) nmatch = 0; | |||
1003 | if (tnfa->num_tags > 0 && nmatch > 0) | |||
1004 | { | |||
1005 | tags = xmallocmalloc(sizeof(*tags) * tnfa->num_tags); | |||
1006 | if (tags == NULL((void*)0)) | |||
1007 | return REG_ESPACE12; | |||
1008 | } | |||
1009 | ||||
1010 | /* Dispatch to the appropriate matcher. */ | |||
1011 | if (tnfa->have_backrefs) | |||
1012 | { | |||
1013 | /* The regex has back references, use the backtracking matcher. */ | |||
1014 | status = tre_tnfa_run_backtrack(tnfa, string, tags, eflags, &eo); | |||
1015 | } | |||
1016 | else | |||
1017 | { | |||
1018 | /* Exact matching, no back references, use the parallel matcher. */ | |||
1019 | status = tre_tnfa_run_parallel(tnfa, string, tags, eflags, &eo); | |||
1020 | } | |||
1021 | ||||
1022 | if (status == REG_OK0) | |||
1023 | /* A match was found, so fill the submatch registers. */ | |||
1024 | tre_fill_pmatch(nmatch, pmatch, tnfa->cflags, tnfa, tags, eo); | |||
1025 | if (tags) | |||
1026 | xfreefree(tags); | |||
1027 | return status; | |||
1028 | } |