Lines 21-26
Link Here
|
21 |
#ifdef HAVE_CONFIG_H |
21 |
#ifdef HAVE_CONFIG_H |
22 |
# include <config.h> |
22 |
# include <config.h> |
23 |
#endif |
23 |
#endif |
|
|
24 |
#include <assert.h> |
24 |
#include <sys/types.h> |
25 |
#include <sys/types.h> |
25 |
#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC |
26 |
#if defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_MBRTOWC |
26 |
/* We can handle multibyte string. */ |
27 |
/* We can handle multibyte string. */ |
Lines 39-44
Link Here
|
39 |
#ifdef HAVE_LIBPCRE |
40 |
#ifdef HAVE_LIBPCRE |
40 |
# include <pcre.h> |
41 |
# include <pcre.h> |
41 |
#endif |
42 |
#endif |
|
|
43 |
#ifdef HAVE_LANGINFO_CODESET |
44 |
# include <langinfo.h> |
45 |
#endif |
42 |
|
46 |
|
43 |
#define NCHAR (UCHAR_MAX + 1) |
47 |
#define NCHAR (UCHAR_MAX + 1) |
44 |
|
48 |
|
Lines 70-78
Link Here
|
70 |
call the regexp matcher at all. */ |
74 |
call the regexp matcher at all. */ |
71 |
static int kwset_exact_matches; |
75 |
static int kwset_exact_matches; |
72 |
|
76 |
|
73 |
#if defined(MBS_SUPPORT) |
77 |
/* UTF-8 encoding allows some optimizations that we can't otherwise |
74 |
static char* check_multibyte_string PARAMS ((char const *buf, size_t size)); |
78 |
assume in a multibyte encoding. */ |
75 |
#endif |
79 |
static int using_utf8; |
|
|
80 |
|
76 |
static void kwsinit PARAMS ((void)); |
81 |
static void kwsinit PARAMS ((void)); |
77 |
static void kwsmusts PARAMS ((void)); |
82 |
static void kwsmusts PARAMS ((void)); |
78 |
static void Gcompile PARAMS ((char const *, size_t)); |
83 |
static void Gcompile PARAMS ((char const *, size_t)); |
Lines 84-89
Link Here
|
84 |
static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int)); |
89 |
static size_t Pexecute PARAMS ((char const *, size_t, size_t *, int)); |
85 |
|
90 |
|
86 |
void |
91 |
void |
|
|
92 |
check_utf8 (void) |
93 |
{ |
94 |
#ifdef HAVE_LANGINFO_CODESET |
95 |
if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0) |
96 |
using_utf8 = 1; |
97 |
#endif |
98 |
} |
99 |
|
100 |
void |
87 |
dfaerror (char const *mesg) |
101 |
dfaerror (char const *mesg) |
88 |
{ |
102 |
{ |
89 |
error (2, 0, mesg); |
103 |
error (2, 0, mesg); |
Lines 141-187
Link Here
|
141 |
} |
155 |
} |
142 |
} |
156 |
} |
143 |
|
157 |
|
144 |
#ifdef MBS_SUPPORT |
|
|
145 |
/* This function allocate the array which correspond to "buf". |
146 |
Then this check multibyte string and mark on the positions which |
147 |
are not singlebyte character nor the first byte of a multibyte |
148 |
character. Caller must free the array. */ |
149 |
static char* |
150 |
check_multibyte_string(char const *buf, size_t size) |
151 |
{ |
152 |
char *mb_properties = xmalloc(size); |
153 |
mbstate_t cur_state; |
154 |
wchar_t wc; |
155 |
int i; |
156 |
memset(&cur_state, 0, sizeof(mbstate_t)); |
157 |
memset(mb_properties, 0, sizeof(char)*size); |
158 |
for (i = 0; i < size ;) |
159 |
{ |
160 |
size_t mbclen; |
161 |
mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state); |
162 |
|
163 |
if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) |
164 |
{ |
165 |
/* An invalid sequence, or a truncated multibyte character. |
166 |
We treat it as a singlebyte character. */ |
167 |
mbclen = 1; |
168 |
} |
169 |
else if (match_icase) |
170 |
{ |
171 |
if (iswupper((wint_t)wc)) |
172 |
{ |
173 |
wc = towlower((wint_t)wc); |
174 |
wcrtomb(buf + i, wc, &cur_state); |
175 |
} |
176 |
} |
177 |
mb_properties[i] = mbclen; |
178 |
i += mbclen; |
179 |
} |
180 |
|
181 |
return mb_properties; |
182 |
} |
183 |
#endif |
184 |
|
185 |
static void |
158 |
static void |
186 |
Gcompile (char const *pattern, size_t size) |
159 |
Gcompile (char const *pattern, size_t size) |
187 |
{ |
160 |
{ |
Lines 190-195
Link Here
|
190 |
size_t total = size; |
163 |
size_t total = size; |
191 |
char const *motif = pattern; |
164 |
char const *motif = pattern; |
192 |
|
165 |
|
|
|
166 |
check_utf8 (); |
193 |
re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | (match_icase ? RE_ICASE : 0)); |
167 |
re_set_syntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE | (match_icase ? RE_ICASE : 0)); |
194 |
dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte); |
168 |
dfasyntax (RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte); |
195 |
|
169 |
|
Lines 266-271
Link Here
|
266 |
size_t total = size; |
240 |
size_t total = size; |
267 |
char const *motif = pattern; |
241 |
char const *motif = pattern; |
268 |
|
242 |
|
|
|
243 |
check_utf8 (); |
269 |
if (strcmp (matcher, "awk") == 0) |
244 |
if (strcmp (matcher, "awk") == 0) |
270 |
{ |
245 |
{ |
271 |
re_set_syntax (RE_SYNTAX_AWK | (match_icase ? RE_ICASE : 0)); |
246 |
re_set_syntax (RE_SYNTAX_AWK | (match_icase ? RE_ICASE : 0)); |
Lines 350-367
Link Here
|
350 |
struct kwsmatch kwsm; |
325 |
struct kwsmatch kwsm; |
351 |
size_t i, ret_val; |
326 |
size_t i, ret_val; |
352 |
#ifdef MBS_SUPPORT |
327 |
#ifdef MBS_SUPPORT |
353 |
char *mb_properties = NULL; |
328 |
mbstate_t mbs; |
354 |
if (MB_CUR_MAX > 1) |
329 |
memset (&mbs, '\0', sizeof (mbstate_t)); |
355 |
{ |
|
|
356 |
if (match_icase) |
357 |
{ |
358 |
char *case_buf = xmalloc(size); |
359 |
memcpy(case_buf, buf, size); |
360 |
buf = case_buf; |
361 |
} |
362 |
if (kwset) |
363 |
mb_properties = check_multibyte_string(buf, size); |
364 |
} |
365 |
#endif /* MBS_SUPPORT */ |
330 |
#endif /* MBS_SUPPORT */ |
366 |
|
331 |
|
367 |
buflim = buf + size; |
332 |
buflim = buf + size; |
Lines 373-393
Link Here
|
373 |
if (kwset) |
338 |
if (kwset) |
374 |
{ |
339 |
{ |
375 |
/* Find a possible match using the KWset matcher. */ |
340 |
/* Find a possible match using the KWset matcher. */ |
376 |
size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm); |
341 |
#ifdef MBS_SUPPORT |
|
|
342 |
size_t bytes_left = 0; |
343 |
#endif /* MBS_SUPPORT */ |
344 |
size_t offset; |
345 |
#ifdef MBS_SUPPORT |
346 |
/* kwsexec doesn't work with match_icase and multibyte input. */ |
347 |
if (match_icase && MB_CUR_MAX > 1) |
348 |
/* Avoid kwset */ |
349 |
offset = 0; |
350 |
else |
351 |
#endif /* MBS_SUPPORT */ |
352 |
offset = kwsexec (kwset, beg, buflim - beg, &kwsm); |
377 |
if (offset == (size_t) -1) |
353 |
if (offset == (size_t) -1) |
378 |
goto failure; |
354 |
goto failure; |
|
|
355 |
#ifdef MBS_SUPPORT |
356 |
if (MB_CUR_MAX > 1 && !using_utf8) |
357 |
{ |
358 |
bytes_left = offset; |
359 |
while (bytes_left) |
360 |
{ |
361 |
size_t len = mbrlen (beg, bytes_left, &mbs); |
362 |
if (len == (size_t) -1 || len == 0) |
363 |
{ |
364 |
/* Incomplete character: treat as single-byte. */ |
365 |
memset (&mbs, '\0', sizeof (mbstate_t)); |
366 |
beg++; |
367 |
bytes_left--; |
368 |
continue; |
369 |
} |
370 |
|
371 |
if (len == (size_t) -2) |
372 |
/* Offset points inside multibyte character: |
373 |
* no good. */ |
374 |
break; |
375 |
|
376 |
beg += len; |
377 |
bytes_left -= len; |
378 |
} |
379 |
} |
380 |
else |
381 |
#endif /* MBS_SUPPORT */ |
379 |
beg += offset; |
382 |
beg += offset; |
380 |
/* Narrow down to the line containing the candidate, and |
383 |
/* Narrow down to the line containing the candidate, and |
381 |
run it through DFA. */ |
384 |
run it through DFA. */ |
382 |
end = memchr(beg, eol, buflim - beg); |
385 |
end = memchr(beg, eol, buflim - beg); |
383 |
end++; |
386 |
end++; |
384 |
#ifdef MBS_SUPPORT |
387 |
#ifdef MBS_SUPPORT |
385 |
if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0) |
388 |
if (MB_CUR_MAX > 1 && bytes_left) |
386 |
continue; |
389 |
continue; |
387 |
#endif |
390 |
#endif /* MBS_SUPPORT */ |
388 |
while (beg > buf && beg[-1] != eol) |
391 |
while (beg > buf && beg[-1] != eol) |
389 |
--beg; |
392 |
--beg; |
390 |
if (kwsm.index < kwset_exact_matches) |
393 |
if ( |
|
|
394 |
#ifdef MBS_SUPPORT |
395 |
!(match_icase && MB_CUR_MAX > 1) && |
396 |
#endif /* MBS_SUPPORT */ |
397 |
(kwsm.index < kwset_exact_matches)) |
391 |
goto success_in_beg_and_end; |
398 |
goto success_in_beg_and_end; |
392 |
if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) |
399 |
if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) |
393 |
continue; |
400 |
continue; |
Lines 395-407
Link Here
|
395 |
else |
402 |
else |
396 |
{ |
403 |
{ |
397 |
/* No good fixed strings; start with DFA. */ |
404 |
/* No good fixed strings; start with DFA. */ |
|
|
405 |
#ifdef MBS_SUPPORT |
406 |
size_t bytes_left = 0; |
407 |
#endif /* MBS_SUPPORT */ |
398 |
size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref); |
408 |
size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref); |
399 |
if (offset == (size_t) -1) |
409 |
if (offset == (size_t) -1) |
400 |
break; |
410 |
break; |
401 |
/* Narrow down to the line we've found. */ |
411 |
/* Narrow down to the line we've found. */ |
|
|
412 |
#ifdef MBS_SUPPORT |
413 |
if (MB_CUR_MAX > 1 && !using_utf8) |
414 |
{ |
415 |
bytes_left = offset; |
416 |
while (bytes_left) |
417 |
{ |
418 |
size_t len = mbrlen (beg, bytes_left, &mbs); |
419 |
if (len == (size_t) -1 || len == 0) |
420 |
{ |
421 |
/* Incomplete character: treat as single-byte. */ |
422 |
memset (&mbs, '\0', sizeof (mbstate_t)); |
423 |
beg++; |
424 |
bytes_left--; |
425 |
continue; |
426 |
} |
427 |
|
428 |
if (len == (size_t) -2) |
429 |
/* Offset points inside multibyte character: |
430 |
* no good. */ |
431 |
break; |
432 |
|
433 |
beg += len; |
434 |
bytes_left -= len; |
435 |
} |
436 |
} |
437 |
else |
438 |
#endif /* MBS_SUPPORT */ |
402 |
beg += offset; |
439 |
beg += offset; |
403 |
end = memchr (beg, eol, buflim - beg); |
440 |
end = memchr (beg, eol, buflim - beg); |
404 |
end++; |
441 |
end++; |
|
|
442 |
#ifdef MBS_SUPPORT |
443 |
if (MB_CUR_MAX > 1 && bytes_left) |
444 |
continue; |
445 |
#endif /* MBS_SUPPORT */ |
405 |
while (beg > buf && beg[-1] != eol) |
446 |
while (beg > buf && beg[-1] != eol) |
406 |
--beg; |
447 |
--beg; |
407 |
} |
448 |
} |
Lines 469-483
Link Here
|
469 |
} /* for (beg = end ..) */ |
510 |
} /* for (beg = end ..) */ |
470 |
|
511 |
|
471 |
failure: |
512 |
failure: |
472 |
#ifdef MBS_SUPPORT |
|
|
473 |
if (MB_CUR_MAX > 1) |
474 |
{ |
475 |
if (mb_properties) |
476 |
free (mb_properties); |
477 |
if (match_icase) |
478 |
free ((char *) buf); |
479 |
} |
480 |
#endif /* MBS_SUPPORT */ |
481 |
return (size_t) -1; |
513 |
return (size_t) -1; |
482 |
|
514 |
|
483 |
success_in_beg_and_end: |
515 |
success_in_beg_and_end: |
Lines 486-509
Link Here
|
486 |
/* FALLTHROUGH */ |
518 |
/* FALLTHROUGH */ |
487 |
|
519 |
|
488 |
success_in_start_and_len: |
520 |
success_in_start_and_len: |
489 |
#ifdef MBS_SUPPORT |
|
|
490 |
if (MB_CUR_MAX > 1) |
491 |
{ |
492 |
if (mb_properties) |
493 |
free (mb_properties); |
494 |
if (match_icase) |
495 |
free ((char *) buf); |
496 |
} |
497 |
#endif /* MBS_SUPPORT */ |
498 |
*match_size = len; |
521 |
*match_size = len; |
499 |
return start; |
522 |
return start; |
500 |
} |
523 |
} |
501 |
|
524 |
|
|
|
525 |
static wchar_t **f_pattern; |
526 |
static char *f_initial_byte; |
527 |
static size_t f_pattern_count; |
528 |
static int f_i_multibyte; /* whether we're using the new -Fi MB method */ |
529 |
|
502 |
static void |
530 |
static void |
503 |
Fcompile (char const *pattern, size_t size) |
531 |
Fcompile (char const *pattern, size_t size) |
504 |
{ |
532 |
{ |
505 |
char const *beg, *lim, *err; |
533 |
char const *beg, *lim, *err; |
506 |
|
534 |
|
|
|
535 |
check_utf8 (); |
536 |
#ifdef MBS_SUPPORT |
537 |
/* Support -F -i for UTF-8 input. */ |
538 |
if (match_icase && MB_CUR_MAX > 1) |
539 |
{ |
540 |
size_t in = 0; |
541 |
|
542 |
while (f_i_multibyte != -1 && in < size) |
543 |
{ |
544 |
wchar_t *f_this_pattern; |
545 |
size_t f_this_pattern_allocated = sizeof (wchar_t) * 1000; |
546 |
mbstate_t mbs; |
547 |
size_t out = 0; |
548 |
f_pattern_count++; |
549 |
f_pattern = xrealloc (f_pattern, |
550 |
sizeof (wchar_t *) * f_pattern_count); |
551 |
f_initial_byte = xrealloc (f_initial_byte, |
552 |
sizeof (char) * |
553 |
(2 * f_pattern_count + 1)); |
554 |
if (f_pattern_count == 1) |
555 |
f_initial_byte[0] = '\0'; |
556 |
|
557 |
/* Convert pattern into wchar_t*, storing them in this_pattern. |
558 |
Don't read more than we're given. */ |
559 |
f_this_pattern = xmalloc (f_this_pattern_allocated); |
560 |
memset (&mbs, '\0', sizeof (mbs)); |
561 |
while (in < size) |
562 |
{ |
563 |
size_t c; |
564 |
wchar_t this_wc; |
565 |
if (out == f_this_pattern_allocated) |
566 |
{ |
567 |
f_this_pattern_allocated *= 2; |
568 |
f_this_pattern = xrealloc (f_this_pattern, |
569 |
f_this_pattern_allocated); |
570 |
} |
571 |
|
572 |
c = mbrtowc (&this_wc, pattern + in, size - in, &mbs); |
573 |
if (c < 1) |
574 |
{ |
575 |
/* Fall back to old method. */ |
576 |
f_i_multibyte = -1; |
577 |
while (f_pattern_count--) |
578 |
free (f_pattern[f_pattern_count]); |
579 |
free (f_pattern); |
580 |
f_pattern = NULL; |
581 |
break; |
582 |
} |
583 |
|
584 |
f_this_pattern[out] = towlower (this_wc); |
585 |
if (out == 0) |
586 |
{ |
587 |
/* First character. Work out the first byte of upper and |
588 |
lower case multibyte strings for the first character. */ |
589 |
wchar_t wc; |
590 |
char mbs[MB_CUR_MAX]; |
591 |
mbstate_t ps; |
592 |
|
593 |
if (iswupper (this_wc)) |
594 |
{ |
595 |
wc = towlower (this_wc); |
596 |
} |
597 |
else |
598 |
{ |
599 |
wc = towupper (this_wc); |
600 |
} |
601 |
|
602 |
memset (&ps, '\0', sizeof (ps)); |
603 |
wcrtomb (mbs, this_wc, &ps); |
604 |
mbs[1] = '\0'; |
605 |
strcat (f_initial_byte, mbs); |
606 |
|
607 |
memset (&ps, '\0', sizeof (ps)); |
608 |
wcrtomb (mbs, wc, &ps); |
609 |
mbs[1] = '\0'; |
610 |
strcat (f_initial_byte, mbs); |
611 |
} |
612 |
|
613 |
in += c; |
614 |
|
615 |
if (this_wc == L'\n') |
616 |
break; |
617 |
|
618 |
out++; |
619 |
} |
620 |
|
621 |
if (f_i_multibyte == -1) |
622 |
break; |
623 |
|
624 |
/* Nul-terminate it. */ |
625 |
if (out == f_this_pattern_allocated) |
626 |
{ |
627 |
f_this_pattern_allocated++; |
628 |
f_this_pattern = xrealloc (f_this_pattern, |
629 |
f_this_pattern_allocated); |
630 |
} |
631 |
|
632 |
f_this_pattern[out] = L'\0'; |
633 |
f_pattern[f_pattern_count - 1] = f_this_pattern; |
634 |
f_i_multibyte = 1; |
635 |
} |
636 |
} |
637 |
#endif /* MBS_SUPPORT */ |
638 |
|
639 |
|
507 |
kwsinit (); |
640 |
kwsinit (); |
508 |
beg = pattern; |
641 |
beg = pattern; |
509 |
do |
642 |
do |
Lines 523-528
Link Here
|
523 |
} |
656 |
} |
524 |
|
657 |
|
525 |
static size_t |
658 |
static size_t |
|
|
659 |
Fimbexec (const char *buf, size_t size, size_t *plen) |
660 |
{ |
661 |
char const *beg; |
662 |
size_t len; |
663 |
mbstate_t mbs; |
664 |
|
665 |
assert (match_icase && f_i_multibyte == 1); |
666 |
assert (MB_CUR_MAX > 1); |
667 |
|
668 |
memset (&mbs, '\0', sizeof (mbs)); |
669 |
beg = buf; |
670 |
len = 0; |
671 |
while (beg < buf + size) |
672 |
{ |
673 |
wchar_t wc; |
674 |
char const *p; |
675 |
char const *next_char; |
676 |
unsigned char match[f_pattern_count]; |
677 |
size_t i, letter; |
678 |
int patterns_left; |
679 |
|
680 |
for (p = beg; |
681 |
(p < buf + size) && !strchr (f_initial_byte, *p); |
682 |
p++) |
683 |
; |
684 |
|
685 |
if (p == NULL || p == buf + size) |
686 |
break; |
687 |
|
688 |
/* First byte matches, now check the rest */ |
689 |
beg = p; |
690 |
letter = len = 0; |
691 |
memset (match, '\1', f_pattern_count); |
692 |
patterns_left = 1; |
693 |
while (patterns_left) |
694 |
{ |
695 |
size_t c; |
696 |
|
697 |
patterns_left = 0; |
698 |
|
699 |
c = mbrtowc (&wc, beg + len, size - (beg - buf) - len, &mbs); |
700 |
if (c < 1) |
701 |
{ |
702 |
memset (&mbs, '\0', sizeof (mbs)); |
703 |
next_char = beg + 1; |
704 |
break; |
705 |
} |
706 |
|
707 |
if (!len) |
708 |
next_char = beg + c; |
709 |
|
710 |
wc = towlower (wc); |
711 |
for (i = 0; i < f_pattern_count; i++) |
712 |
{ |
713 |
if (match[i]) |
714 |
{ |
715 |
if (f_pattern[i][letter] == L'\0') |
716 |
{ |
717 |
/* Found a match. */ |
718 |
*plen = len; |
719 |
return beg - buf; |
720 |
} |
721 |
|
722 |
if (f_pattern[i][letter] == wc) |
723 |
patterns_left = 1; |
724 |
else |
725 |
match[i] = '\0'; |
726 |
} |
727 |
} |
728 |
|
729 |
len += c; |
730 |
letter++; |
731 |
} |
732 |
|
733 |
beg = next_char; |
734 |
} |
735 |
|
736 |
return -1; |
737 |
} |
738 |
|
739 |
static size_t |
526 |
Fexecute (char const *buf, size_t size, size_t *match_size, int exact) |
740 |
Fexecute (char const *buf, size_t size, size_t *match_size, int exact) |
527 |
{ |
741 |
{ |
528 |
register char const *beg, *try, *end; |
742 |
register char const *beg, *try, *end; |
Lines 531-557
Link Here
|
531 |
struct kwsmatch kwsmatch; |
745 |
struct kwsmatch kwsmatch; |
532 |
size_t ret_val; |
746 |
size_t ret_val; |
533 |
#ifdef MBS_SUPPORT |
747 |
#ifdef MBS_SUPPORT |
534 |
char *mb_properties = NULL; |
748 |
mbstate_t mbs; |
535 |
if (MB_CUR_MAX > 1) |
749 |
memset (&mbs, '\0', sizeof (mbstate_t)); |
536 |
{ |
|
|
537 |
if (match_icase) |
538 |
{ |
539 |
char *case_buf = xmalloc(size); |
540 |
memcpy(case_buf, buf, size); |
541 |
buf = case_buf; |
542 |
} |
543 |
mb_properties = check_multibyte_string(buf, size); |
544 |
} |
545 |
#endif /* MBS_SUPPORT */ |
750 |
#endif /* MBS_SUPPORT */ |
546 |
|
751 |
|
547 |
for (beg = buf; beg <= buf + size; ++beg) |
752 |
for (beg = buf; beg <= buf + size; ++beg) |
548 |
{ |
753 |
{ |
549 |
size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); |
754 |
size_t offset; |
|
|
755 |
#ifdef MBS_SUPPORT |
756 |
if (match_icase && f_i_multibyte == 1) |
757 |
offset = Fimbexec (beg, buf + size - beg, &kwsmatch.size[0]); |
758 |
else |
759 |
#endif /* MBS_SUPPORT */ |
760 |
offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); |
761 |
|
550 |
if (offset == (size_t) -1) |
762 |
if (offset == (size_t) -1) |
551 |
goto failure; |
763 |
goto failure; |
552 |
#ifdef MBS_SUPPORT |
764 |
#ifdef MBS_SUPPORT |
553 |
if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0) |
765 |
if (MB_CUR_MAX > 1 && !using_utf8) |
554 |
continue; /* It is a part of multibyte character. */ |
766 |
{ |
|
|
767 |
size_t bytes_left = offset; |
768 |
while (bytes_left) |
769 |
{ |
770 |
size_t len = mbrlen (beg, bytes_left, &mbs); |
771 |
if (len == (size_t) -1 || len == 0) |
772 |
{ |
773 |
/* Incomplete character: treat as single-byte. */ |
774 |
memset (&mbs, '\0', sizeof (mbstate_t)); |
775 |
beg++; |
776 |
bytes_left--; |
777 |
continue; |
778 |
} |
779 |
|
780 |
if (len == (size_t) -2) |
781 |
/* Offset points inside multibyte character: no good. */ |
782 |
break; |
783 |
|
784 |
beg += len; |
785 |
bytes_left -= len; |
786 |
} |
787 |
|
788 |
if (bytes_left) |
789 |
continue; |
790 |
} |
791 |
else |
555 |
#endif /* MBS_SUPPORT */ |
792 |
#endif /* MBS_SUPPORT */ |
556 |
beg += offset; |
793 |
beg += offset; |
557 |
len = kwsmatch.size[0]; |
794 |
len = kwsmatch.size[0]; |
Lines 583-592
Link Here
|
583 |
{ |
820 |
{ |
584 |
/* Try a shorter length anchored at the same place. */ |
821 |
/* Try a shorter length anchored at the same place. */ |
585 |
--len; |
822 |
--len; |
|
|
823 |
#ifdef MBS_SUPPORT |
824 |
if (match_icase && f_i_multibyte == 1) |
825 |
offset = Fimbexec (beg, len, &kwsmatch.size[0]); |
826 |
else |
827 |
#endif /* MBS_SUPPORT */ |
586 |
offset = kwsexec (kwset, beg, len, &kwsmatch); |
828 |
offset = kwsexec (kwset, beg, len, &kwsmatch); |
|
|
829 |
|
587 |
if (offset == -1) { |
830 |
if (offset == -1) { |
588 |
break; /* Try a different anchor. */ |
831 |
break; /* Try a different anchor. */ |
589 |
} |
832 |
} |
|
|
833 |
#ifdef MBS_SUPPORT |
834 |
if (MB_CUR_MAX > 1 && !using_utf8) |
835 |
{ |
836 |
size_t bytes_left = offset; |
837 |
while (bytes_left) |
838 |
{ |
839 |
size_t len = mbrlen (beg, bytes_left, &mbs); |
840 |
if (len == (size_t) -1 || len == 0) |
841 |
{ |
842 |
/* Incomplete character: treat as single-byte. */ |
843 |
memset (&mbs, '\0', sizeof (mbstate_t)); |
844 |
beg++; |
845 |
bytes_left--; |
846 |
continue; |
847 |
} |
848 |
|
849 |
if (len == (size_t) -2) |
850 |
/* Offset points inside multibyte character: |
851 |
* no good. */ |
852 |
break; |
853 |
|
854 |
beg += len; |
855 |
bytes_left -= len; |
856 |
} |
857 |
|
858 |
if (bytes_left) |
859 |
break; /* Try a different anchor. */ |
860 |
} |
861 |
else |
862 |
#endif /* MBS_SUPPORT */ |
590 |
beg += offset; |
863 |
beg += offset; |
591 |
len = kwsmatch.size[0]; |
864 |
len = kwsmatch.size[0]; |
592 |
} |
865 |
} |
Lines 597-615
Link Here
|
597 |
} |
870 |
} |
598 |
|
871 |
|
599 |
failure: |
872 |
failure: |
|
|
873 |
return -1; |
874 |
|
875 |
success: |
600 |
#ifdef MBS_SUPPORT |
876 |
#ifdef MBS_SUPPORT |
601 |
if (MB_CUR_MAX > 1) |
877 |
if (MB_CUR_MAX > 1 && !using_utf8) |
602 |
{ |
878 |
{ |
603 |
if (match_icase) |
879 |
end = beg + len; |
604 |
free((char *) buf); |
880 |
while (end < buf + size) |
605 |
if (mb_properties) |
881 |
{ |
606 |
free(mb_properties); |
882 |
size_t len = mbrlen (end, buf + size - end, &mbs); |
|
|
883 |
if (len == (size_t) -1 || len == (size_t) -2 || len == 0) |
884 |
{ |
885 |
memset (&mbs, '\0', sizeof (mbstate_t)); |
886 |
len = 1; |
887 |
} |
888 |
if (len == 1 && *end == eol) |
889 |
break; |
890 |
|
891 |
end += len; |
892 |
} |
607 |
} |
893 |
} |
|
|
894 |
else |
608 |
#endif /* MBS_SUPPORT */ |
895 |
#endif /* MBS_SUPPORT */ |
609 |
return -1; |
|
|
610 |
|
611 |
success: |
612 |
end = memchr (beg + len, eol, (buf + size) - (beg + len)); |
896 |
end = memchr (beg + len, eol, (buf + size) - (beg + len)); |
|
|
897 |
|
613 |
end++; |
898 |
end++; |
614 |
while (buf < beg && beg[-1] != eol) |
899 |
while (buf < beg && beg[-1] != eol) |
615 |
--beg; |
900 |
--beg; |
Lines 618-632
Link Here
|
618 |
|
903 |
|
619 |
success_in_beg_and_len: |
904 |
success_in_beg_and_len: |
620 |
*match_size = len; |
905 |
*match_size = len; |
621 |
#ifdef MBS_SUPPORT |
|
|
622 |
if (MB_CUR_MAX > 1) |
623 |
{ |
624 |
if (mb_properties) |
625 |
free (mb_properties); |
626 |
if (match_icase) |
627 |
free ((char *) buf); |
628 |
} |
629 |
#endif /* MBS_SUPPORT */ |
630 |
return beg - buf; |
906 |
return beg - buf; |
631 |
} |
907 |
} |
632 |
|
908 |
|