Go to:
Gentoo Home
Documentation
Forums
Lists
Bugs
Planet
Store
Wiki
Get Gentoo!
Gentoo's Bugzilla – Attachment 59494 Details for
Bug 93443
grep 79x slowdown with LANG=en_us.utf8 LC_ALL=en_us.utf8
Home
|
New
–
[Ex]
|
Browse
|
Search
|
Privacy Policy
|
[?]
|
Reports
|
Requests
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
Patch #6: more UTF-8 optimizations
grep-2.5.1-w.patch (text/plain), 2.90 KB, created by
Canal Vorfeed
on 2005-05-21 15:22:38 UTC
(
hide
)
Description:
Patch #6: more UTF-8 optimizations
Filename:
MIME Type:
Creator:
Canal Vorfeed
Created:
2005-05-21 15:22:38 UTC
Size:
2.90 KB
patch
obsolete
>--- grep-2.5.1a/src/search.c.w 2005-01-07 15:04:18.766280754 +0000 >+++ grep-2.5.1a/src/search.c 2005-01-07 16:59:19.287275172 +0000 >@@ -330,6 +330,7 @@ > static int use_dfa; > static int use_dfa_checked = 0; > #ifdef MBS_SUPPORT >+ const char *last_char = NULL; > int mb_cur_max = MB_CUR_MAX; > mbstate_t mbs; > memset (&mbs, '\0', sizeof (mbstate_t)); >@@ -385,6 +386,8 @@ > while (bytes_left) > { > size_t mlen = mbrlen (beg, bytes_left, &mbs); >+ >+ last_char = beg; > if (mlen == (size_t) -1 || mlen == 0) > { > /* Incomplete character: treat as single-byte. */ >@@ -445,6 +448,8 @@ > while (bytes_left) > { > size_t mlen = mbrlen (beg, bytes_left, &mbs); >+ >+ last_char = beg; > if (mlen == (size_t) -1 || mlen == 0) > { > /* Incomplete character: treat as single-byte. */ >@@ -507,10 +512,84 @@ > if (match_words) > while (start >= 0) > { >- if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1])) >- && (len == end - beg - 1 >- || !WCHAR ((unsigned char) beg[start + len]))) >- goto success_in_beg_and_end; >+ int lword_match = 0; >+ if (start == 0) >+ lword_match = 1; >+ else >+ { >+ assert (start > 0); >+#ifdef MBS_SUPPORT >+ if (mb_cur_max > 1) >+ { >+ const char *s; >+ int mr; >+ wchar_t pwc; >+ >+ if (using_utf8) >+ { >+ s = beg + start - 1; >+ while (s > buf >+ && (unsigned char) *s >= 0x80 >+ && (unsigned char) *s <= 0xbf) >+ --s; >+ } >+ else >+ s = last_char; >+ mr = mbtowc (&pwc, s, beg + start - s); >+ if (mr <= 0) >+ { >+ memset (&mbs, '\0', sizeof (mbstate_t)); >+ lword_match = 1; >+ } >+ else if (!(iswalnum (pwc) || pwc == L'_') >+ && mr == (int) (beg + start - s)) >+ lword_match = 1; >+ } >+ else >+#endif /* MBS_SUPPORT */ >+ if (!WCHAR ((unsigned char) beg[start - 1])) >+ lword_match = 1; >+ } >+ >+ if (lword_match) >+ { >+ int rword_match = 0; >+ if (start + len == end - beg - 1) >+ rword_match = 1; >+ else >+ { >+#ifdef MBS_SUPPORT >+ if (mb_cur_max > 1) >+ { >+ wchar_t nwc; >+ int mr; >+ >+ mr = mbtowc (&nwc, beg + start + len, >+ end - beg - start - len - 1); >+ if (mr <= 0) >+ { >+ memset (&mbs, '\0', sizeof (mbstate_t)); >+ rword_match = 1; >+ } >+ else if (!iswalnum (nwc) && nwc != L'_') >+ rword_match = 1; >+ } >+ else >+#endif /* MBS_SUPPORT */ >+ if (!WCHAR ((unsigned char) beg[start + len])) >+ rword_match = 1; >+ } >+ >+ if (rword_match) >+ { >+ if (!exact) >+ /* Returns the whole line. */ >+ goto success_in_beg_and_end; >+ else >+ /* Returns just this word match. */ >+ goto success_in_start_and_len; >+ } >+ } > if (len > 0) > { > /* Try a shorter length anchored at the same place. */
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Diff
View Attachment As Raw
Actions:
View
|
Diff
Attachments on
bug 93443
:
59486
|
59488
|
59490
|
59492
|
59493
| 59494 |
74453
|
74455
|
83645
|
83646
|
83647
|
83648