Lines 124-188
Link Here
|
124 |
int |
124 |
int |
125 |
url_check_word (char *word, int len) |
125 |
url_check_word (char *word, int len) |
126 |
{ |
126 |
{ |
127 |
char *at, *dot; |
127 |
#define D(x) (x), ((sizeof (x)) - 1) |
|
|
128 |
static const struct { |
129 |
const char *s; |
130 |
int len; |
131 |
} |
132 |
prefix[] = { |
133 |
{ D("irc.") }, |
134 |
{ D("ftp.") }, |
135 |
{ D("www.") }, |
136 |
{ D("irc://") }, |
137 |
{ D("ftp://") }, |
138 |
{ D("http://") }, |
139 |
{ D("https://") }, |
140 |
{ D("file://") }, |
141 |
{ D("rtsp://") }, |
142 |
{ D("gopher://") }, |
143 |
}, |
144 |
suffix[] = { |
145 |
{ D(".org") }, |
146 |
{ D(".net") }, |
147 |
{ D(".com") }, |
148 |
{ D(".edu") }, |
149 |
{ D(".html") }, |
150 |
{ D(".info") }, |
151 |
{ D(".name") }, |
152 |
}; |
153 |
#undef D |
154 |
const char *at, *dot; |
128 |
int i, dots; |
155 |
int i, dots; |
129 |
char temp[4]; |
|
|
130 |
guint32 pre; |
131 |
|
156 |
|
132 |
if ((word[0] == '@' || word[0] == '+' || word[0] == '^' || word[0] == '%' || word[0] == '*' ) && word[1] == '#') |
157 |
if (len > 1 && word[1] == '#' && strchr("@+^%*#", word[0])) |
133 |
return WORD_CHANNEL; |
158 |
return WORD_CHANNEL; |
134 |
|
159 |
|
135 |
if ((word[0] == '#' || word[0] == '&') && word[1] != '#' && word[1] != 0) |
160 |
if ((word[0] == '#' || word[0] == '&') && word[1] != '#' && word[1] != 0) |
136 |
return WORD_CHANNEL; |
161 |
return WORD_CHANNEL; |
137 |
|
162 |
|
138 |
if (len > 4 && word[4] != '.') |
163 |
for (i = 0; i < G_N_ELEMENTS(prefix); i++) |
139 |
{ |
164 |
{ |
140 |
temp[0] = tolower (word[0]); |
165 |
int l; |
141 |
temp[1] = tolower (word[1]); |
|
|
142 |
temp[2] = tolower (word[2]); |
143 |
temp[3] = tolower (word[3]); |
144 |
|
145 |
pre = *((guint32 *)temp); |
146 |
|
147 |
if (CMPL (pre, 'i','r','c','.')) |
148 |
return WORD_URL; |
149 |
if (CMPL (pre, 'f','t','p','.')) |
150 |
return WORD_URL; |
151 |
if (CMPL (pre, 'w','w','w','.')) |
152 |
return WORD_URL; |
153 |
|
166 |
|
154 |
if (len > 7 && word[4] == '/' && word[5] == '/') |
167 |
l = prefix[i].len; |
|
|
168 |
if (len > l) |
155 |
{ |
169 |
{ |
156 |
if (CMPL (pre, 'i','r','c',':')) /* irc:// */ |
170 |
int j; |
157 |
return WORD_URL; |
|
|
158 |
if (CMPL (pre, 'f','t','p',':')) /* ftp:// */ |
159 |
return WORD_URL; |
160 |
} |
161 |
|
171 |
|
162 |
/* check for ABCD://... */ |
172 |
/* This is pretty much strncasecmp(). */ |
163 |
if (len > 8 && word[4] == ':' && word[5] == '/' && word[6] == '/') |
173 |
for (j = 0; j < l; j++) |
164 |
{ |
174 |
{ |
165 |
if (CMPL (pre, 'h','t','t','p')) /* http:// */ |
175 |
unsigned char c = word[j]; |
166 |
return WORD_URL; |
176 |
if (tolower(c) != prefix[i].s[j]) |
167 |
if (CMPL (pre, 'f','i','l','e')) /* file:// */ |
177 |
break; |
168 |
return WORD_URL; |
178 |
} |
169 |
if (CMPL (pre, 'r','t','s','p')) /* rtsp:// */ |
179 |
if (j == l) |
170 |
return WORD_URL; |
180 |
return WORD_URL; |
171 |
} |
|
|
172 |
|
173 |
/* check for https:// */ |
174 |
if (len > 9 && word[5] == ':' && word[6] == '/' && word[7] == '/') |
175 |
{ |
176 |
if (CMPL (pre, 'h','t','t','p') && (word[4] == 's' || word[4] == 'S')) |
177 |
return WORD_URL; |
178 |
} |
179 |
|
180 |
/* check for gopher:// */ |
181 |
if (len > 10 && word[6] == ':' && word[7] == '/' && word[8] == '/') |
182 |
{ |
183 |
if (CMPL (pre, 'g','o','p','h')) |
184 |
if (CMPW (word + 4, 'e','r') || CMPW (word + 4, 'E','R')) |
185 |
return WORD_URL; |
186 |
} |
181 |
} |
187 |
} |
182 |
} |
188 |
|
183 |
|
Lines 205-211
Link Here
|
205 |
{ |
200 |
{ |
206 |
if (word[i] == '.' && i > 1) |
201 |
if (word[i] == '.' && i > 1) |
207 |
dots++; /* allow 127.0.0.1:80 */ |
202 |
dots++; /* allow 127.0.0.1:80 */ |
208 |
else if (!isdigit (word[i]) && word[i] != ':') |
203 |
else if (!isdigit ((unsigned char) word[i]) && word[i] != ':') |
209 |
{ |
204 |
{ |
210 |
dots = 0; |
205 |
dots = 0; |
211 |
break; |
206 |
break; |
Lines 216-250
Link Here
|
216 |
|
211 |
|
217 |
if (len > 5) |
212 |
if (len > 5) |
218 |
{ |
213 |
{ |
219 |
/* create a lowercase version of the last 4 letters */ |
214 |
for (i = 0; i < G_N_ELEMENTS(suffix); i++) |
220 |
temp[0] = tolower (word[len - 4]); |
215 |
{ |
221 |
temp[1] = tolower (word[len - 3]); |
216 |
int l; |
222 |
temp[2] = tolower (word[len - 2]); |
|
|
223 |
temp[3] = tolower (word[len - 1]); |
224 |
|
217 |
|
225 |
pre = *((guint32 *)temp); |
218 |
l = suffix[i].len; |
|
|
219 |
if (len > l) |
220 |
{ |
221 |
const unsigned char *p = &word[len - l]; |
222 |
int j; |
226 |
|
223 |
|
227 |
if (word[len - 5] == '.') |
224 |
/* This is pretty much strncasecmp(). */ |
228 |
{ |
225 |
for (j = 0; j < l; j++) |
229 |
if (CMPL (pre, 'h','t','m','l')) |
226 |
{ |
230 |
return WORD_HOST; |
227 |
if (tolower(p[j]) != suffix[i].s[j]) |
231 |
if (CMPL (pre, 'i','n','f','o')) |
228 |
break; |
232 |
return WORD_HOST; |
229 |
} |
233 |
if (CMPL (pre, 'n','a','m','e')) |
230 |
if (j == l) |
234 |
return WORD_HOST; |
231 |
return WORD_HOST; |
|
|
232 |
} |
235 |
} |
233 |
} |
236 |
|
234 |
|
237 |
if (CMPL (pre, '.','o','r','g')) |
|
|
238 |
return WORD_HOST; |
239 |
if (CMPL (pre, '.','n','e','t')) |
240 |
return WORD_HOST; |
241 |
if (CMPL (pre, '.','c','o','m')) |
242 |
return WORD_HOST; |
243 |
if (CMPL (pre, '.','e','d','u')) |
244 |
return WORD_HOST; |
245 |
|
246 |
if (word[len - 3] == '.' && |
235 |
if (word[len - 3] == '.' && |
247 |
isalpha (word[len - 2]) && isalpha (word[len - 1])) |
236 |
isalpha ((unsigned char) word[len - 2]) && isalpha ((unsigned char) word[len - 1])) |
248 |
return WORD_HOST; |
237 |
return WORD_HOST; |
249 |
} |
238 |
} |
250 |
|
239 |
|