Gentoo Websites Logo
Go to: Gentoo Home Documentation Forums Lists Bugs Planet Store Wiki Get Gentoo!
View | Details | Raw Unified | Return to bug 196803
Collapse All | Expand All

(-)link-grammar/link-grammar/tokenize.c (-13 / +14 lines)
Lines 172-178 Link Here
172
    used in a sentence.
172
    used in a sentence.
173
*/
173
*/
174
174
175
175
#undef	MIN
176
#define MIN(a, b)  (((a) < (b)) ? (a) : (b))
176
177
177
static int separate_word(Sentence sent, char *w, char *wend, int is_first_word, int quote_found) {
178
static int separate_word(Sentence sent, char *w, char *wend, int is_first_word, int quote_found) {
178
    /* w points to a string, wend points to the char one after the end.  The
179
    /* w points to a string, wend points to the char one after the end.  The
Lines 256-263 Link Here
256
257
257
    for (n_r_stripped = 0; n_r_stripped < MAX_STRIP; n_r_stripped++) {
258
    for (n_r_stripped = 0; n_r_stripped < MAX_STRIP; n_r_stripped++) {
258
259
259
	strncpy(word, w, wend-w);
260
	strncpy(word, w, MIN(wend-w, MAX_WORD));
260
	word[wend-w] = '\0';
261
	word[MIN(wend-w, MAX_WORD)] = '\0';
261
	if (wend == w) break;  /* it will work without this */
262
	if (wend == w) break;  /* it will work without this */
262
	
263
	
263
	if (boolean_dictionary_lookup(sent->dict, word) || is_initials_word(word)) break;
264
	if (boolean_dictionary_lookup(sent->dict, word) || is_initials_word(word)) break;
Lines 285-292 Link Here
285
    /* Now we strip off suffixes...w points to the remaining word, "wend" to the end of the word. */
286
    /* Now we strip off suffixes...w points to the remaining word, "wend" to the end of the word. */
286
287
287
    s_stripped = -1;
288
    s_stripped = -1;
288
    strncpy(word, w, wend-w);
289
    strncpy(word, w, MIN(wend-w, MAX_WORD));
289
    word[wend-w] = '\0';
290
    word[MIN(wend-w, MAX_WORD)] = '\0';
290
    word_is_in_dict=0;
291
    word_is_in_dict=0;
291
292
292
    if (boolean_dictionary_lookup(sent->dict, word) || is_initials_word(word)) word_is_in_dict=1;
293
    if (boolean_dictionary_lookup(sent->dict, word) || is_initials_word(word)) word_is_in_dict=1;
Lines 309-324 Link Here
309
310
310
	if(s_ok==1 || i==s_strippable) {
311
	if(s_ok==1 || i==s_strippable) {
311
	  
312
	  
312
	  strncpy(newword, w, (wend-len)-w);
313
	  strncpy(newword, w, MIN((wend-len)-w, MAX_WORD));
313
	  newword[(wend-len)-w] = '\0';
314
	  newword[MIN((wend-len)-w, MAX_WORD)] = '\0';
314
315
315
	  /* Check if the remainder is in the dictionary; for the no-suffix case, it won't be */	  
316
	  /* Check if the remainder is in the dictionary; for the no-suffix case, it won't be */	  
316
	  if (boolean_dictionary_lookup(sent->dict, newword)) {
317
	  if (boolean_dictionary_lookup(sent->dict, newword)) {
317
	    if(verbosity>1) if(i< s_strippable) printf("Splitting word into two: %s-%s\n", newword, suffix[i]); 
318
	    if(verbosity>1) if(i< s_strippable) printf("Splitting word into two: %s-%s\n", newword, suffix[i]); 
318
	    s_stripped = i;
319
	    s_stripped = i;
319
	    wend -= len;
320
	    wend -= len;
320
	    strncpy(word, w, wend-w);
321
	    strncpy(word, w, MIN(wend-w, MAX_WORD));
321
	    word[wend-w] = '\0';
322
	    word[MIN(wend-w, MAX_WORD)] = '\0';
322
	    break;
323
	    break;
323
	  }
324
	  }
324
325
Lines 326-341 Link Here
326
	  else {
327
	  else {
327
	    for (j=0; j<p_strippable; j++) {
328
	    for (j=0; j<p_strippable; j++) {
328
	      if (strncmp(w, prefix[j], strlen(prefix[j])) == 0) {
329
	      if (strncmp(w, prefix[j], strlen(prefix[j])) == 0) {
329
		strncpy(newword, w+strlen(prefix[j]), (wend-len)-(w+strlen(prefix[j])));
330
		strncpy(newword, w+strlen(prefix[j]), MIN((wend-len)-(w+strlen(prefix[j])), MAX_WORD));
330
		newword[(wend-len)-(w+strlen(prefix[j]))]='\0';
331
		newword[MIN((wend-len)-(w+strlen(prefix[j])), MAX_WORD)]='\0';
331
		if(boolean_dictionary_lookup(sent->dict, newword)) {
332
		if(boolean_dictionary_lookup(sent->dict, newword)) {
332
		  if(verbosity>1) if(i < s_strippable) printf("Splitting word into three: %s-%s-%s\n", prefix[j], newword, suffix[i]); 
333
		  if(verbosity>1) if(i < s_strippable) printf("Splitting word into three: %s-%s-%s\n", prefix[j], newword, suffix[i]); 
333
		  if (!issue_sentence_word(sent, prefix[j])) return FALSE;
334
		  if (!issue_sentence_word(sent, prefix[j])) return FALSE;
334
		  if(i < s_strippable) s_stripped = i;
335
		  if(i < s_strippable) s_stripped = i;
335
		  wend -= len;
336
		  wend -= len;
336
		  w += strlen(prefix[j]);
337
		  w += strlen(prefix[j]);
337
		  strncpy(word, w, wend-w);
338
		  strncpy(word, w, MIN(wend-w, MAX_WORD));
338
		word[wend-w] = '\0';
339
		  word[MIN(wend-w, MAX_WORD)] = '\0';
339
		break;
340
		break;
340
		}
341
		}
341
	      }
342
	      }

Return to bug 196803