Lines 172-178
Link Here
|
172 |
used in a sentence. |
172 |
used in a sentence. |
173 |
*/ |
173 |
*/ |
174 |
|
174 |
|
175 |
|
175 |
#undef MIN |
|
|
176 |
#define MIN(a, b) (((a) < (b)) ? (a) : (b)) |
176 |
|
177 |
|
177 |
static int separate_word(Sentence sent, char *w, char *wend, int is_first_word, int quote_found) { |
178 |
static int separate_word(Sentence sent, char *w, char *wend, int is_first_word, int quote_found) { |
178 |
/* w points to a string, wend points to the char one after the end. The |
179 |
/* w points to a string, wend points to the char one after the end. The |
Lines 256-263
Link Here
|
256 |
|
257 |
|
257 |
for (n_r_stripped = 0; n_r_stripped < MAX_STRIP; n_r_stripped++) { |
258 |
for (n_r_stripped = 0; n_r_stripped < MAX_STRIP; n_r_stripped++) { |
258 |
|
259 |
|
259 |
strncpy(word, w, wend-w); |
260 |
strncpy(word, w, MIN(wend-w, MAX_WORD)); |
260 |
word[wend-w] = '\0'; |
261 |
word[MIN(wend-w, MAX_WORD)] = '\0'; |
261 |
if (wend == w) break; /* it will work without this */ |
262 |
if (wend == w) break; /* it will work without this */ |
262 |
|
263 |
|
263 |
if (boolean_dictionary_lookup(sent->dict, word) || is_initials_word(word)) break; |
264 |
if (boolean_dictionary_lookup(sent->dict, word) || is_initials_word(word)) break; |
Lines 285-292
Link Here
|
285 |
/* Now we strip off suffixes...w points to the remaining word, "wend" to the end of the word. */ |
286 |
/* Now we strip off suffixes...w points to the remaining word, "wend" to the end of the word. */ |
286 |
|
287 |
|
287 |
s_stripped = -1; |
288 |
s_stripped = -1; |
288 |
strncpy(word, w, wend-w); |
289 |
strncpy(word, w, MIN(wend-w, MAX_WORD)); |
289 |
word[wend-w] = '\0'; |
290 |
word[MIN(wend-w, MAX_WORD)] = '\0'; |
290 |
word_is_in_dict=0; |
291 |
word_is_in_dict=0; |
291 |
|
292 |
|
292 |
if (boolean_dictionary_lookup(sent->dict, word) || is_initials_word(word)) word_is_in_dict=1; |
293 |
if (boolean_dictionary_lookup(sent->dict, word) || is_initials_word(word)) word_is_in_dict=1; |
Lines 309-324
Link Here
|
309 |
|
310 |
|
310 |
if(s_ok==1 || i==s_strippable) { |
311 |
if(s_ok==1 || i==s_strippable) { |
311 |
|
312 |
|
312 |
strncpy(newword, w, (wend-len)-w); |
313 |
strncpy(newword, w, MIN((wend-len)-w, MAX_WORD)); |
313 |
newword[(wend-len)-w] = '\0'; |
314 |
newword[MIN((wend-len)-w, MAX_WORD)] = '\0'; |
314 |
|
315 |
|
315 |
/* Check if the remainder is in the dictionary; for the no-suffix case, it won't be */ |
316 |
/* Check if the remainder is in the dictionary; for the no-suffix case, it won't be */ |
316 |
if (boolean_dictionary_lookup(sent->dict, newword)) { |
317 |
if (boolean_dictionary_lookup(sent->dict, newword)) { |
317 |
if(verbosity>1) if(i< s_strippable) printf("Splitting word into two: %s-%s\n", newword, suffix[i]); |
318 |
if(verbosity>1) if(i< s_strippable) printf("Splitting word into two: %s-%s\n", newword, suffix[i]); |
318 |
s_stripped = i; |
319 |
s_stripped = i; |
319 |
wend -= len; |
320 |
wend -= len; |
320 |
strncpy(word, w, wend-w); |
321 |
strncpy(word, w, MIN(wend-w, MAX_WORD)); |
321 |
word[wend-w] = '\0'; |
322 |
word[MIN(wend-w, MAX_WORD)] = '\0'; |
322 |
break; |
323 |
break; |
323 |
} |
324 |
} |
324 |
|
325 |
|
Lines 326-341
Link Here
|
326 |
else { |
327 |
else { |
327 |
for (j=0; j<p_strippable; j++) { |
328 |
for (j=0; j<p_strippable; j++) { |
328 |
if (strncmp(w, prefix[j], strlen(prefix[j])) == 0) { |
329 |
if (strncmp(w, prefix[j], strlen(prefix[j])) == 0) { |
329 |
strncpy(newword, w+strlen(prefix[j]), (wend-len)-(w+strlen(prefix[j]))); |
330 |
strncpy(newword, w+strlen(prefix[j]), MIN((wend-len)-(w+strlen(prefix[j])), MAX_WORD)); |
330 |
newword[(wend-len)-(w+strlen(prefix[j]))]='\0'; |
331 |
newword[MIN((wend-len)-(w+strlen(prefix[j])), MAX_WORD)]='\0'; |
331 |
if(boolean_dictionary_lookup(sent->dict, newword)) { |
332 |
if(boolean_dictionary_lookup(sent->dict, newword)) { |
332 |
if(verbosity>1) if(i < s_strippable) printf("Splitting word into three: %s-%s-%s\n", prefix[j], newword, suffix[i]); |
333 |
if(verbosity>1) if(i < s_strippable) printf("Splitting word into three: %s-%s-%s\n", prefix[j], newword, suffix[i]); |
333 |
if (!issue_sentence_word(sent, prefix[j])) return FALSE; |
334 |
if (!issue_sentence_word(sent, prefix[j])) return FALSE; |
334 |
if(i < s_strippable) s_stripped = i; |
335 |
if(i < s_strippable) s_stripped = i; |
335 |
wend -= len; |
336 |
wend -= len; |
336 |
w += strlen(prefix[j]); |
337 |
w += strlen(prefix[j]); |
337 |
strncpy(word, w, wend-w); |
338 |
strncpy(word, w, MIN(wend-w, MAX_WORD)); |
338 |
word[wend-w] = '\0'; |
339 |
word[MIN(wend-w, MAX_WORD)] = '\0'; |
339 |
break; |
340 |
break; |
340 |
} |
341 |
} |
341 |
} |
342 |
} |