Lines 330-335
Link Here
|
330 |
static int use_dfa; |
330 |
static int use_dfa; |
331 |
static int use_dfa_checked = 0; |
331 |
static int use_dfa_checked = 0; |
332 |
#ifdef MBS_SUPPORT |
332 |
#ifdef MBS_SUPPORT |
|
|
333 |
const char *last_char = NULL; |
333 |
int mb_cur_max = MB_CUR_MAX; |
334 |
int mb_cur_max = MB_CUR_MAX; |
334 |
mbstate_t mbs; |
335 |
mbstate_t mbs; |
335 |
memset (&mbs, '\0', sizeof (mbstate_t)); |
336 |
memset (&mbs, '\0', sizeof (mbstate_t)); |
Lines 385-390
Link Here
|
385 |
while (bytes_left) |
386 |
while (bytes_left) |
386 |
{ |
387 |
{ |
387 |
size_t mlen = mbrlen (beg, bytes_left, &mbs); |
388 |
size_t mlen = mbrlen (beg, bytes_left, &mbs); |
|
|
389 |
|
390 |
last_char = beg; |
388 |
if (mlen == (size_t) -1 || mlen == 0) |
391 |
if (mlen == (size_t) -1 || mlen == 0) |
389 |
{ |
392 |
{ |
390 |
/* Incomplete character: treat as single-byte. */ |
393 |
/* Incomplete character: treat as single-byte. */ |
Lines 445-450
Link Here
|
445 |
while (bytes_left) |
448 |
while (bytes_left) |
446 |
{ |
449 |
{ |
447 |
size_t mlen = mbrlen (beg, bytes_left, &mbs); |
450 |
size_t mlen = mbrlen (beg, bytes_left, &mbs); |
|
|
451 |
|
452 |
last_char = beg; |
448 |
if (mlen == (size_t) -1 || mlen == 0) |
453 |
if (mlen == (size_t) -1 || mlen == 0) |
449 |
{ |
454 |
{ |
450 |
/* Incomplete character: treat as single-byte. */ |
455 |
/* Incomplete character: treat as single-byte. */ |
Lines 507-516
Link Here
|
507 |
if (match_words) |
512 |
if (match_words) |
508 |
while (start >= 0) |
513 |
while (start >= 0) |
509 |
{ |
514 |
{ |
510 |
if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1])) |
515 |
int lword_match = 0; |
511 |
&& (len == end - beg - 1 |
516 |
if (start == 0) |
512 |
|| !WCHAR ((unsigned char) beg[start + len]))) |
517 |
lword_match = 1; |
513 |
goto success_in_beg_and_end; |
518 |
else |
|
|
519 |
{ |
520 |
assert (start > 0); |
521 |
#ifdef MBS_SUPPORT |
522 |
if (mb_cur_max > 1) |
523 |
{ |
524 |
const char *s; |
525 |
int mr; |
526 |
wchar_t pwc; |
527 |
|
528 |
if (using_utf8) |
529 |
{ |
530 |
s = beg + start - 1; |
531 |
while (s > buf |
532 |
&& (unsigned char) *s >= 0x80 |
533 |
&& (unsigned char) *s <= 0xbf) |
534 |
--s; |
535 |
} |
536 |
else |
537 |
s = last_char; |
538 |
mr = mbtowc (&pwc, s, beg + start - s); |
539 |
if (mr <= 0) |
540 |
{ |
541 |
memset (&mbs, '\0', sizeof (mbstate_t)); |
542 |
lword_match = 1; |
543 |
} |
544 |
else if (!(iswalnum (pwc) || pwc == L'_') |
545 |
&& mr == (int) (beg + start - s)) |
546 |
lword_match = 1; |
547 |
} |
548 |
else |
549 |
#endif /* MBS_SUPPORT */ |
550 |
if (!WCHAR ((unsigned char) beg[start - 1])) |
551 |
lword_match = 1; |
552 |
} |
553 |
|
554 |
if (lword_match) |
555 |
{ |
556 |
int rword_match = 0; |
557 |
if (start + len == end - beg - 1) |
558 |
rword_match = 1; |
559 |
else |
560 |
{ |
561 |
#ifdef MBS_SUPPORT |
562 |
if (mb_cur_max > 1) |
563 |
{ |
564 |
wchar_t nwc; |
565 |
int mr; |
566 |
|
567 |
mr = mbtowc (&nwc, beg + start + len, |
568 |
end - beg - start - len - 1); |
569 |
if (mr <= 0) |
570 |
{ |
571 |
memset (&mbs, '\0', sizeof (mbstate_t)); |
572 |
rword_match = 1; |
573 |
} |
574 |
else if (!iswalnum (nwc) && nwc != L'_') |
575 |
rword_match = 1; |
576 |
} |
577 |
else |
578 |
#endif /* MBS_SUPPORT */ |
579 |
if (!WCHAR ((unsigned char) beg[start + len])) |
580 |
rword_match = 1; |
581 |
} |
582 |
|
583 |
if (rword_match) |
584 |
{ |
585 |
if (!exact) |
586 |
/* Returns the whole line. */ |
587 |
goto success_in_beg_and_end; |
588 |
else |
589 |
/* Returns just this word match. */ |
590 |
goto success_in_start_and_len; |
591 |
} |
592 |
} |
514 |
if (len > 0) |
593 |
if (len > 0) |
515 |
{ |
594 |
{ |
516 |
/* Try a shorter length anchored at the same place. */ |
595 |
/* Try a shorter length anchored at the same place. */ |