--- grep-2.5.1/src/search.c.w 2005-06-07 15:54:49.000000000 +0100 +++ grep-2.5.1/src/search.c 2005-06-07 15:56:21.000000000 +0100 @@ -327,6 +327,7 @@ static int use_dfa; static int use_dfa_checked = 0; #ifdef MBS_SUPPORT + const char *last_char = NULL; mbstate_t mbs; memset (&mbs, '\0', sizeof (mbstate_t)); #endif /* MBS_SUPPORT */ @@ -381,6 +382,8 @@ while (bytes_left) { size_t len = mbrlen (beg, bytes_left, &mbs); + + last_char = beg; if (len == (size_t) -1 || len == 0) { /* Incomplete character: treat as single-byte. */ @@ -441,6 +444,8 @@ while (bytes_left) { size_t len = mbrlen (beg, bytes_left, &mbs); + + last_char = beg; if (len == (size_t) -1 || len == 0) { /* Incomplete character: treat as single-byte. */ @@ -503,10 +508,84 @@ if (match_words) while (start >= 0) { - if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1])) - && (len == end - beg - 1 - || !WCHAR ((unsigned char) beg[start + len]))) - goto success_in_start_and_len; + int lword_match = 0; + if (start == 0) + lword_match = 1; + else + { + assert (start > 0); +#ifdef MBS_SUPPORT + if (MB_CUR_MAX > 1) + { + const char *s; + int mr; + wchar_t pwc; + + if (using_utf8) + { + s = beg + start - 1; + while (s > buf + && (unsigned char) *s >= 0x80 + && (unsigned char) *s <= 0xbf) + --s; + } + else + s = last_char; + mr = mbtowc (&pwc, s, beg + start - s); + if (mr <= 0) + { + memset (&mbs, '\0', sizeof (mbstate_t)); + lword_match = 1; + } + else if (!(iswalnum (pwc) || pwc == L'_') + && mr == (int) (beg + start - s)) + lword_match = 1; + } + else +#endif /* MBS_SUPPORT */ + if (!WCHAR ((unsigned char) beg[start - 1])) + lword_match = 1; + } + + if (lword_match) + { + int rword_match = 0; + if (start + len == end - beg - 1) + rword_match = 1; + else + { +#ifdef MBS_SUPPORT + if (MB_CUR_MAX > 1) + { + wchar_t nwc; + int mr; + + mr = mbtowc (&nwc, beg + start + len, + end - beg - start - len - 1); + if (mr <= 0) + { + memset (&mbs, '\0', sizeof (mbstate_t)); + rword_match = 1; + } + else if (!iswalnum (nwc) && nwc != L'_') + rword_match = 1; + } + else +#endif /* MBS_SUPPORT */ + if (!WCHAR ((unsigned char) beg[start + len])) + rword_match = 1; + } + + if (rword_match) + { + if (!exact) + /* Returns the whole line. */ + goto success_in_beg_and_end; + else + /* Returns just this word match. */ + goto success_in_start_and_len; + } + } if (len > 0) { /* Try a shorter length anchored at the same place. */