RCS file: /cvsroot/link-grammar/link-grammar/tokenize.c,v Working file: tokenize.c head: 1.4 branch: locks: strict access list: symbolic names: link-grammar-4-2-4: 1.3 release-4-2-2: 1.2 release-4-2-1: 1.2 release-4-1-3: 1.1.1.1 release-4-1-1: 1.1.1.1 begin: 1.1.1.1 start: 1.1.1 keyword substitution: kv total revisions: 5; selected revisions: 1 description: ---------------------------- revision 1.4 date: 2007/10/27 19:03:40; author: dom; state: Exp; lines: +15 -14 Secunia advisory SA27340 and CVE identifier CVE-2007-5395. The vulnerability is caused due to a boundary error within the "separate_word()" function in tokenize.c when processing overly long words (over 61 bytes). This can be exploited to cause a stack-based buffer overflow via a specially crafted sentence passed to the "separate_sentence()" function. Successful exploitation allows execution of arbitrary code. ============================================================================= Index: tokenize.c =================================================================== RCS file: /cvsroot/link-grammar/link-grammar/tokenize.c,v retrieving revision 1.3 retrieving revision 1.4 diff -u -b -B -r1.3 -r1.4 --- link-grammar/link-grammar/tokenize.c 16 Aug 2006 17:07:02 -0000 1.3 +++ link-grammar/link-grammar/tokenize.c 27 Oct 2007 19:03:40 -0000 1.4 @@ -172,7 +172,8 @@ used in a sentence. */ - +#undef MIN +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) static int separate_word(Sentence sent, char *w, char *wend, int is_first_word, int quote_found) { /* w points to a string, wend points to the char one after the end. The @@ -256,8 +257,8 @@ for (n_r_stripped = 0; n_r_stripped < MAX_STRIP; n_r_stripped++) { - strncpy(word, w, wend-w); - word[wend-w] = '\0'; + strncpy(word, w, MIN(wend-w, MAX_WORD)); + word[MIN(wend-w, MAX_WORD)] = '\0'; if (wend == w) break; /* it will work without this */ if (boolean_dictionary_lookup(sent->dict, word) || is_initials_word(word)) break; @@ -285,8 +286,8 @@ /* Now we strip off suffixes...w points to the remaining word, "wend" to the end of the word. */ s_stripped = -1; - strncpy(word, w, wend-w); - word[wend-w] = '\0'; + strncpy(word, w, MIN(wend-w, MAX_WORD)); + word[MIN(wend-w, MAX_WORD)] = '\0'; word_is_in_dict=0; if (boolean_dictionary_lookup(sent->dict, word) || is_initials_word(word)) word_is_in_dict=1; @@ -309,16 +310,16 @@ if(s_ok==1 || i==s_strippable) { - strncpy(newword, w, (wend-len)-w); - newword[(wend-len)-w] = '\0'; + strncpy(newword, w, MIN((wend-len)-w, MAX_WORD)); + newword[MIN((wend-len)-w, MAX_WORD)] = '\0'; /* Check if the remainder is in the dictionary; for the no-suffix case, it won't be */ if (boolean_dictionary_lookup(sent->dict, newword)) { if(verbosity>1) if(i< s_strippable) printf("Splitting word into two: %s-%s\n", newword, suffix[i]); s_stripped = i; wend -= len; - strncpy(word, w, wend-w); - word[wend-w] = '\0'; + strncpy(word, w, MIN(wend-w, MAX_WORD)); + word[MIN(wend-w, MAX_WORD)] = '\0'; break; } @@ -326,16 +327,16 @@ else { for (j=0; jdict, newword)) { if(verbosity>1) if(i < s_strippable) printf("Splitting word into three: %s-%s-%s\n", prefix[j], newword, suffix[i]); if (!issue_sentence_word(sent, prefix[j])) return FALSE; if(i < s_strippable) s_stripped = i; wend -= len; w += strlen(prefix[j]); - strncpy(word, w, wend-w); - word[wend-w] = '\0'; + strncpy(word, w, MIN(wend-w, MAX_WORD)); + word[MIN(wend-w, MAX_WORD)] = '\0'; break; } }