RCS file: /cvsroot/link-grammar/link-grammar/tokenize.c,v
Working file: tokenize.c
head: 1.4
branch:
locks: strict
access list:
symbolic names:
	link-grammar-4-2-4: 1.3
	release-4-2-2: 1.2
	release-4-2-1: 1.2
	release-4-1-3: 1.1.1.1
	release-4-1-1: 1.1.1.1
	begin: 1.1.1.1
	start: 1.1.1
keyword substitution: kv
total revisions: 5;	selected revisions: 1
description:
----------------------------
revision 1.4
date: 2007/10/27 19:03:40;  author: dom;  state: Exp;  lines: +15 -14
Secunia advisory SA27340 and CVE identifier CVE-2007-5395.

The vulnerability is caused due to a boundary error within the
"separate_word()" function in tokenize.c when processing overly long
words (over 61 bytes). This can be exploited to cause a stack-based
buffer overflow via a specially crafted sentence passed to the
"separate_sentence()" function.

Successful exploitation allows execution of arbitrary code.
=============================================================================
Index: tokenize.c
===================================================================
RCS file: /cvsroot/link-grammar/link-grammar/tokenize.c,v
retrieving revision 1.3
retrieving revision 1.4
diff -u -b -B -r1.3 -r1.4
--- link-grammar/link-grammar/tokenize.c	16 Aug 2006 17:07:02 -0000	1.3
+++ link-grammar/link-grammar/tokenize.c	27 Oct 2007 19:03:40 -0000	1.4
@@ -172,7 +172,8 @@
     used in a sentence.
 */
 
-
+#undef	MIN
+#define MIN(a, b)  (((a) < (b)) ? (a) : (b))
 
 static int separate_word(Sentence sent, char *w, char *wend, int is_first_word, int quote_found) {
     /* w points to a string, wend points to the char one after the end.  The
@@ -256,8 +257,8 @@
 
     for (n_r_stripped = 0; n_r_stripped < MAX_STRIP; n_r_stripped++) {
 
-	strncpy(word, w, wend-w);
-	word[wend-w] = '\0';
+	strncpy(word, w, MIN(wend-w, MAX_WORD));
+	word[MIN(wend-w, MAX_WORD)] = '\0';
 	if (wend == w) break;  /* it will work without this */
 	
 	if (boolean_dictionary_lookup(sent->dict, word) || is_initials_word(word)) break;
@@ -285,8 +286,8 @@
     /* Now we strip off suffixes...w points to the remaining word, "wend" to the end of the word. */
 
     s_stripped = -1;
-    strncpy(word, w, wend-w);
-    word[wend-w] = '\0';
+    strncpy(word, w, MIN(wend-w, MAX_WORD));
+    word[MIN(wend-w, MAX_WORD)] = '\0';
     word_is_in_dict=0;
 
     if (boolean_dictionary_lookup(sent->dict, word) || is_initials_word(word)) word_is_in_dict=1;
@@ -309,16 +310,16 @@
 
 	if(s_ok==1 || i==s_strippable) {
 	  
-	  strncpy(newword, w, (wend-len)-w);
-	  newword[(wend-len)-w] = '\0';
+	  strncpy(newword, w, MIN((wend-len)-w, MAX_WORD));
+	  newword[MIN((wend-len)-w, MAX_WORD)] = '\0';
 
 	  /* Check if the remainder is in the dictionary; for the no-suffix case, it won't be */	  
 	  if (boolean_dictionary_lookup(sent->dict, newword)) {
 	    if(verbosity>1) if(i< s_strippable) printf("Splitting word into two: %s-%s\n", newword, suffix[i]); 
 	    s_stripped = i;
 	    wend -= len;
-	    strncpy(word, w, wend-w);
-	    word[wend-w] = '\0';
+	    strncpy(word, w, MIN(wend-w, MAX_WORD));
+	    word[MIN(wend-w, MAX_WORD)] = '\0';
 	    break;
 	  }
 
@@ -326,16 +327,16 @@
 	  else {
 	    for (j=0; j<p_strippable; j++) {
 	      if (strncmp(w, prefix[j], strlen(prefix[j])) == 0) {
-		strncpy(newword, w+strlen(prefix[j]), (wend-len)-(w+strlen(prefix[j])));
-		newword[(wend-len)-(w+strlen(prefix[j]))]='\0';
+		strncpy(newword, w+strlen(prefix[j]), MIN((wend-len)-(w+strlen(prefix[j])), MAX_WORD));
+		newword[MIN((wend-len)-(w+strlen(prefix[j])), MAX_WORD)]='\0';
 		if(boolean_dictionary_lookup(sent->dict, newword)) {
 		  if(verbosity>1) if(i < s_strippable) printf("Splitting word into three: %s-%s-%s\n", prefix[j], newword, suffix[i]); 
 		  if (!issue_sentence_word(sent, prefix[j])) return FALSE;
 		  if(i < s_strippable) s_stripped = i;
 		  wend -= len;
 		  w += strlen(prefix[j]);
-		  strncpy(word, w, wend-w);
-		word[wend-w] = '\0';
+		  strncpy(word, w, MIN(wend-w, MAX_WORD));
+		  word[MIN(wend-w, MAX_WORD)] = '\0';
 		break;
 		}
 	      }