Lines 2-33
Link Here
|
2 |
# include "config.h" |
2 |
# include "config.h" |
3 |
#endif |
3 |
#endif |
4 |
|
4 |
|
|
|
5 |
#include <cstring> |
6 |
#include <cctype> |
5 |
|
7 |
|
6 |
#include "lib.h" |
|
|
7 |
#include "stardict.h" |
8 |
#include <sys/stat.h> |
8 |
#include <sys/stat.h> |
9 |
#include "string.h" |
9 |
#include <zlib.h> |
10 |
#include "zlib.h" |
|
|
11 |
#ifdef HAVE_MMAP |
10 |
#ifdef HAVE_MMAP |
12 |
#include <sys/types.h> |
11 |
# include <sys/types.h> |
13 |
#include <fcntl.h> |
12 |
# include <fcntl.h> |
14 |
#include <sys/mman.h> |
13 |
# include <sys/mman.h> |
15 |
#endif |
14 |
#endif |
|
|
15 |
#include <algorithm> |
16 |
|
16 |
|
17 |
// Notice: read src/tools/DICTFILE_FORMAT for the dictionary file's format information! |
17 |
#include "distance.h" |
18 |
|
18 |
|
19 |
cacheItem::cacheItem() |
19 |
#include "lib.h" |
|
|
20 |
|
21 |
// Notice: read src/tools/DICTFILE_FORMAT for the dictionary |
22 |
// file's format information! |
23 |
|
24 |
class MapFile { |
25 |
public: |
26 |
MapFile(void) : data(NULL) {} |
27 |
~MapFile(); |
28 |
bool open(const gchar *file_name, gulong file_size); |
29 |
inline gchar *begin(void) { return data; } |
30 |
private: |
31 |
gchar *data; |
32 |
gulong size; |
33 |
#ifdef HAVE_MMAP |
34 |
int mmap_fd; |
35 |
#elif defined(_WIN32) |
36 |
HANDLE hFile; |
37 |
HANDLE hFileMap; |
38 |
#endif |
39 |
}; |
40 |
|
41 |
inline bool MapFile::open(const gchar *file_name, gulong file_size) |
20 |
{ |
42 |
{ |
21 |
data= NULL; |
43 |
size=file_size; |
|
|
44 |
#ifdef HAVE_MMAP |
45 |
if ((mmap_fd = ::open(file_name, O_RDONLY)) < 0) { |
46 |
//g_print("Open file %s failed!\n",fullfilename); |
47 |
return false; |
48 |
} |
49 |
data = (gchar *)mmap( NULL, file_size, PROT_READ, MAP_SHARED, mmap_fd, 0); |
50 |
if ((void *)data == (void *)(-1)) { |
51 |
//g_print("mmap file %s failed!\n",idxfilename); |
52 |
data=NULL; |
53 |
return false; |
54 |
} |
55 |
#else |
56 |
# ifdef _WIN32 |
57 |
hFile = CreateFile(file_name, GENERIC_READ, 0, NULL, OPEN_ALWAYS, |
58 |
FILE_ATTRIBUTE_NORMAL, 0); |
59 |
hFileMap = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, |
60 |
file_size, NULL); |
61 |
data = (gchar *)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, file_size); |
62 |
# else |
63 |
gsize read_len; |
64 |
if (!g_file_get_contents(file_name, &data, &read_len, NULL)) |
65 |
return false; |
66 |
|
67 |
if (read_len!=file_size) |
68 |
return false; |
69 |
# endif |
70 |
#endif |
71 |
|
72 |
return true; |
22 |
} |
73 |
} |
23 |
|
74 |
|
24 |
cacheItem::~cacheItem() |
75 |
inline MapFile::~MapFile() |
25 |
{ |
76 |
{ |
26 |
if (data) |
77 |
if (!data) |
|
|
78 |
return; |
79 |
#ifdef HAVE_MMAP |
80 |
munmap(data, size); |
81 |
close(mmap_fd); |
82 |
#else |
83 |
# ifdef _WIN32 |
84 |
UnmapViewOfFile(data); |
85 |
CloseHandle(hFileMap); |
86 |
CloseHandle(hFile); |
87 |
# else |
27 |
g_free(data); |
88 |
g_free(data); |
|
|
89 |
# endif |
90 |
#endif |
91 |
} |
92 |
|
93 |
inline bool bIsVowel(gchar inputchar) |
94 |
{ |
95 |
gchar ch = g_ascii_toupper(inputchar); |
96 |
return( ch=='A' || ch=='E' || ch=='I' || ch=='O' || ch=='U' ); |
97 |
} |
98 |
|
99 |
bool bIsPureEnglish(const gchar *str) |
100 |
{ |
101 |
// i think this should work even when it is UTF8 string :). |
102 |
for (int i=0; str[i]!=0; i++) |
103 |
//if(str[i]<0) |
104 |
//if(str[i]<32 || str[i]>126) // tab equal 9,so this is not OK. |
105 |
// Better use isascii() but not str[i]<0 while char is default unsigned in arm |
106 |
if (!isascii(str[i])) |
107 |
return false; |
108 |
return true; |
109 |
} |
110 |
|
111 |
inline gint stardict_strcmp(const gchar *s1, const gchar *s2) { |
112 |
gint a=g_ascii_strcasecmp(s1, s2); |
113 |
if (a == 0) |
114 |
return strcmp(s1, s2); |
115 |
else |
116 |
return a; |
28 |
} |
117 |
} |
29 |
|
118 |
|
|
|
119 |
bool DictInfo::load_from_ifo_file(const gchar *ifofilename, |
120 |
bool istreedict) |
121 |
{ |
122 |
ifo_file_name=ifofilename; |
123 |
gchar *buffer; |
124 |
if (!g_file_get_contents(ifofilename, &buffer, NULL, NULL)) |
125 |
return false; |
126 |
|
127 |
#define TREEDICT_MAGIC_DATA "StarDict's treedict ifo file\nversion=2.4.2\n" |
128 |
#define DICT_MAGIC_DATA "StarDict's dict ifo file\nversion=2.4.2\n" |
129 |
const gchar *magic_data=istreedict ? TREEDICT_MAGIC_DATA : DICT_MAGIC_DATA; |
130 |
if (!g_str_has_prefix(buffer, magic_data)) { |
131 |
g_free(buffer); |
132 |
return false; |
133 |
} |
134 |
|
135 |
gchar *p1,*p2,*p3; |
136 |
|
137 |
p1 = buffer + strlen(magic_data)-1; |
138 |
|
139 |
p2 = strstr(p1,"\nwordcount="); |
140 |
if (!p2) { |
141 |
g_free(buffer); |
142 |
return false; |
143 |
} |
144 |
|
145 |
p3 = strchr(p2+ sizeof("\nwordcount=")-1,'\n'); |
146 |
gchar *tmpstr = (gchar *)g_memdup(p2+sizeof("\nwordcount=")-1, p3-(p2+sizeof("\nwordcount=")-1)+1); |
147 |
tmpstr[p3-(p2+sizeof("\nwordcount=")-1)] = '\0'; |
148 |
wordcount = atol(tmpstr); |
149 |
g_free(tmpstr); |
150 |
|
151 |
if (istreedict) { |
152 |
p2 = strstr(p1,"\ntdxfilesize="); |
153 |
if (!p2) { |
154 |
g_free(buffer); |
155 |
return false; |
156 |
} |
157 |
p3 = strchr(p2+ sizeof("\ntdxfilesize=")-1,'\n'); |
158 |
tmpstr = (gchar *)g_memdup(p2+sizeof("\ntdxfilesize=")-1, p3-(p2+sizeof("\ntdxfilesize=")-1)+1); |
159 |
tmpstr[p3-(p2+sizeof("\ntdxfilesize=")-1)] = '\0'; |
160 |
index_file_size = atol(tmpstr); |
161 |
g_free(tmpstr); |
162 |
} else { |
163 |
|
164 |
p2 = strstr(p1,"\nidxfilesize="); |
165 |
if (!p2) { |
166 |
g_free(buffer); |
167 |
return false; |
168 |
} |
169 |
|
170 |
p3 = strchr(p2+ sizeof("\nidxfilesize=")-1,'\n'); |
171 |
tmpstr = (gchar *)g_memdup(p2+sizeof("\nidxfilesize=")-1, p3-(p2+sizeof("\nidxfilesize=")-1)+1); |
172 |
tmpstr[p3-(p2+sizeof("\nidxfilesize=")-1)] = '\0'; |
173 |
index_file_size = atol(tmpstr); |
174 |
g_free(tmpstr); |
175 |
} |
176 |
|
177 |
p2 = strstr(p1,"\nbookname="); |
178 |
|
179 |
if (!p2) { |
180 |
g_free(buffer); |
181 |
return false; |
182 |
} |
30 |
|
183 |
|
|
|
184 |
p2 = p2 + sizeof("\nbookname=") -1; |
185 |
p3 = strchr(p2, '\n'); |
186 |
bookname.assign(p2, p3-p2); |
187 |
|
188 |
p2 = strstr(p1,"\nauthor="); |
189 |
if (p2) { |
190 |
p2 = p2 + sizeof("\nauthor=") -1; |
191 |
p3 = strchr(p2, '\n'); |
192 |
author.assign(p2, p3-p2); |
193 |
} |
194 |
|
195 |
p2 = strstr(p1,"\nemail="); |
196 |
if (p2) { |
197 |
p2 = p2 + sizeof("\nemail=") -1; |
198 |
p3 = strchr(p2, '\n'); |
199 |
email.assign(p2, p3-p2); |
200 |
} |
201 |
|
202 |
p2 = strstr(p1,"\nwebsite="); |
203 |
if (p2) { |
204 |
p2 = p2 + sizeof("\nwebsite=") -1; |
205 |
p3 = strchr(p2, '\n'); |
206 |
website.assign(p2, p3-p2); |
207 |
} |
208 |
|
209 |
p2 = strstr(p1,"\ndate="); |
210 |
if (p2) { |
211 |
p2 = p2 + sizeof("\ndate=") -1; |
212 |
p3 = strchr(p2, '\n'); |
213 |
date.assign(p2, p3-p2); |
214 |
} |
215 |
|
216 |
p2 = strstr(p1,"\ndescription="); |
217 |
if (p2) { |
218 |
p2 = p2 + sizeof("\ndescription=")-1; |
219 |
p3 = strchr(p2, '\n'); |
220 |
description.assign(p2, p3-p2); |
221 |
} |
222 |
|
223 |
p2 = strstr(p1,"\nsametypesequence="); |
224 |
if (p2) { |
225 |
p2+=sizeof("\nsametypesequence=")-1; |
226 |
p3 = strchr(p2, '\n'); |
227 |
sametypesequence.assign(p2, p3-p2); |
228 |
} |
229 |
|
230 |
g_free(buffer); |
231 |
|
232 |
return true; |
233 |
} |
31 |
//=================================================================== |
234 |
//=================================================================== |
32 |
DictBase::DictBase() |
235 |
DictBase::DictBase() |
33 |
{ |
236 |
{ |
Lines 46-160
Link Here
|
46 |
dict_data_close(dictdzfile); |
249 |
dict_data_close(dictdzfile); |
47 |
} |
250 |
} |
48 |
|
251 |
|
49 |
gchar* DictBase::GetWordData(glong idxitem_offset, glong idxitem_size) |
252 |
gchar* DictBase::GetWordData(guint32 idxitem_offset, guint32 idxitem_size) |
50 |
{ |
253 |
{ |
51 |
for (int i=0;i<WORDDATA_CACHE_NUM;i++) |
254 |
for (int i=0; i<WORDDATA_CACHE_NUM; i++) |
52 |
{ |
255 |
if (cache[i].data && cache[i].offset == idxitem_offset) |
53 |
if ((cache[i].data) && (cache[i].offset == idxitem_offset)) |
|
|
54 |
{ |
55 |
return cache[i].data; |
256 |
return cache[i].data; |
56 |
} |
257 |
|
57 |
} |
258 |
if (dictfile) |
58 |
if (dictfile) { |
|
|
59 |
fseek(dictfile, idxitem_offset, SEEK_SET); |
259 |
fseek(dictfile, idxitem_offset, SEEK_SET); |
60 |
} |
260 |
|
61 |
gchar *data; |
261 |
gchar *data; |
62 |
if (sametypesequence) { |
262 |
if (sametypesequence) { |
63 |
gchar *origin_data; |
263 |
gchar *origin_data = (gchar *)g_malloc(idxitem_size); |
64 |
origin_data = (gchar *)g_malloc(idxitem_size); |
|
|
65 |
|
264 |
|
66 |
if (dictfile) |
265 |
if (dictfile) |
67 |
fread(origin_data,idxitem_size,1,dictfile); |
266 |
fread(origin_data, idxitem_size, 1, dictfile); |
68 |
else |
267 |
else |
69 |
dict_data_read (dictdzfile, origin_data, idxitem_offset, idxitem_size); |
268 |
dict_data_read(dictdzfile, origin_data, idxitem_offset, idxitem_size); |
70 |
|
269 |
|
71 |
glong data_size; |
270 |
guint32 data_size; |
72 |
gint sametypesequence_len; |
271 |
gint sametypesequence_len; |
73 |
sametypesequence_len = strlen(sametypesequence); |
272 |
sametypesequence_len = strlen(sametypesequence); |
74 |
//there have sametypesequence_len char being omitted. |
273 |
//there have sametypesequence_len char being omitted. |
75 |
data_size = idxitem_size + sizeof(glong) + sametypesequence_len; |
274 |
data_size = idxitem_size + sizeof(guint32) + sametypesequence_len; |
76 |
//if the last item's size is determined by the end up '\0',then +=sizeof(gchar); |
275 |
//if the last item's size is determined by the end up '\0',then +=sizeof(gchar); |
77 |
//if the last item's size is determined by the head glong type data,then +=sizeof(glong); |
276 |
//if the last item's size is determined by the head guint32 type data,then +=sizeof(guint32); |
78 |
switch (sametypesequence[sametypesequence_len-1]) { |
277 |
switch (sametypesequence[sametypesequence_len-1]) { |
79 |
case 'm': |
278 |
case 'm': |
80 |
case 't': |
279 |
case 't': |
81 |
case 'y': |
280 |
case 'y': |
82 |
case 'o': |
281 |
case 'l': |
|
|
282 |
case 'g': |
83 |
data_size += sizeof(gchar); |
283 |
data_size += sizeof(gchar); |
84 |
break; |
284 |
break; |
85 |
case 'W': |
285 |
case 'W': |
86 |
case 'P': |
286 |
case 'P': |
87 |
data_size += sizeof(glong); |
287 |
data_size += sizeof(guint32); |
88 |
break; |
288 |
break; |
89 |
} |
289 |
} |
90 |
data = (gchar *)g_malloc(data_size); |
290 |
data = (gchar *)g_malloc(data_size); |
91 |
gchar *p1,*p2; |
291 |
gchar *p1,*p2; |
92 |
p1 = data + sizeof(glong); |
292 |
p1 = data + sizeof(guint32); |
93 |
p2 = origin_data; |
293 |
p2 = origin_data; |
94 |
glong sec_size; |
294 |
guint32 sec_size; |
95 |
//copy the head items. |
295 |
//copy the head items. |
96 |
for (int i=0;i< sametypesequence_len-1;i++) { |
296 |
for (int i=0; i<sametypesequence_len-1; i++) { |
97 |
memcpy(p1, &(sametypesequence[i]), sizeof(gchar)); |
297 |
*p1=sametypesequence[i]; |
98 |
p1+= sizeof(gchar); |
298 |
p1+=sizeof(gchar); |
99 |
switch (sametypesequence[i]) { |
299 |
switch (sametypesequence[i]) { |
100 |
case 'm': |
300 |
case 'm': |
101 |
case 't': |
301 |
case 't': |
102 |
case 'y': |
302 |
case 'y': |
103 |
case 'o': |
303 |
case 'l': |
|
|
304 |
case 'g': |
104 |
sec_size = strlen(p2)+1; |
305 |
sec_size = strlen(p2)+1; |
105 |
memcpy(p1, p2, sec_size); |
306 |
memcpy(p1, p2, sec_size); |
106 |
p1+= sec_size; |
307 |
p1+=sec_size; |
107 |
p2+= sec_size; |
308 |
p2+=sec_size; |
108 |
break; |
309 |
break; |
109 |
case 'W': |
310 |
case 'W': |
110 |
case 'P': |
311 |
case 'P': |
111 |
memcpy(&sec_size, p2, sizeof(glong)); |
312 |
sec_size = *reinterpret_cast<guint32 *>(p2); |
112 |
sec_size += sizeof(glong); |
313 |
sec_size += sizeof(guint32); |
113 |
memcpy(p1, p2, sec_size); |
314 |
memcpy(p1, p2, sec_size); |
114 |
p1+= sec_size; |
315 |
p1+=sec_size; |
115 |
p2+= sec_size; |
316 |
p2+=sec_size; |
116 |
break; |
317 |
break; |
117 |
} |
318 |
} |
118 |
} |
319 |
} |
119 |
//calculate the last item 's size. |
320 |
//calculate the last item 's size. |
120 |
sec_size = idxitem_size - (p2-origin_data); |
321 |
sec_size = idxitem_size - (p2-origin_data); |
121 |
memcpy(p1, &(sametypesequence[sametypesequence_len-1]), sizeof(gchar)); |
322 |
*p1=sametypesequence[sametypesequence_len-1]; |
122 |
p1+= sizeof(gchar); |
323 |
p1+=sizeof(gchar); |
123 |
switch (sametypesequence[sametypesequence_len-1]) { |
324 |
switch (sametypesequence[sametypesequence_len-1]) { |
124 |
case 'm': |
325 |
case 'm': |
125 |
case 't': |
326 |
case 't': |
126 |
case 'y': |
327 |
case 'y': |
127 |
case 'o': |
328 |
case 'l': |
|
|
329 |
case 'g': |
128 |
memcpy(p1, p2, sec_size); |
330 |
memcpy(p1, p2, sec_size); |
129 |
p1 += sec_size; |
331 |
p1 += sec_size; |
130 |
memcpy(p1, "", sizeof(gchar)); //add the end up '\0'; |
332 |
*p1='\0';//add the end up '\0'; |
131 |
break; |
333 |
break; |
132 |
case 'W': |
334 |
case 'W': |
133 |
case 'P': |
335 |
case 'P': |
134 |
memcpy(p1,&(sec_size), sizeof(glong)); //add the head glong size data. |
336 |
*reinterpret_cast<guint32 *>(p1)=sec_size; |
135 |
p1 += sizeof(glong); |
337 |
p1 += sizeof(guint32); |
136 |
memcpy(p1, p2, sec_size); |
338 |
memcpy(p1, p2, sec_size); |
137 |
break; |
339 |
break; |
138 |
} |
340 |
} |
139 |
g_free(origin_data); |
341 |
g_free(origin_data); |
140 |
} |
342 |
*reinterpret_cast<guint32 *>(data)=data_size; |
141 |
else { |
343 |
} else { |
142 |
data = (gchar *)g_malloc(idxitem_size + sizeof(glong)); |
344 |
data = (gchar *)g_malloc(idxitem_size + sizeof(guint32)); |
143 |
if (dictfile) |
345 |
if (dictfile) |
144 |
fread(data+sizeof(glong),idxitem_size,1,dictfile); |
346 |
fread(data+sizeof(guint32), idxitem_size, 1, dictfile); |
145 |
else |
347 |
else |
146 |
dict_data_read (dictdzfile, data+sizeof(glong), idxitem_offset, idxitem_size); |
348 |
dict_data_read(dictdzfile, data+sizeof(guint32), idxitem_offset, idxitem_size); |
|
|
349 |
*reinterpret_cast<guint32 *>(data)=idxitem_size; |
147 |
} |
350 |
} |
148 |
memcpy(data,&(idxitem_size),sizeof(glong)); |
|
|
149 |
if (cache[cache_cur].data) |
150 |
{ |
151 |
g_free(cache[cache_cur].data); |
351 |
g_free(cache[cache_cur].data); |
152 |
} |
352 |
|
153 |
cache[cache_cur].data = data; |
353 |
cache[cache_cur].data = data; |
154 |
cache[cache_cur].offset = idxitem_offset; |
354 |
cache[cache_cur].offset = idxitem_offset; |
155 |
cache_cur++; |
355 |
cache_cur++; |
156 |
if (cache_cur==WORDDATA_CACHE_NUM) |
356 |
if (cache_cur==WORDDATA_CACHE_NUM) |
157 |
cache_cur =0; |
357 |
cache_cur = 0; |
158 |
return data; |
358 |
return data; |
159 |
} |
359 |
} |
160 |
|
360 |
|
Lines 166-243
Link Here
|
166 |
bookname = NULL; |
366 |
bookname = NULL; |
167 |
idxfile = NULL; |
367 |
idxfile = NULL; |
168 |
wordlist = NULL; |
368 |
wordlist = NULL; |
169 |
#ifdef HAVE_MMAP |
|
|
170 |
mmap_fd = -1; |
171 |
mmap_idxmap_size = 0; |
172 |
#endif |
173 |
idxdatabuffer = NULL; |
369 |
idxdatabuffer = NULL; |
174 |
} |
370 |
} |
175 |
|
371 |
|
176 |
Lib::~Lib() |
372 |
Lib::~Lib() |
177 |
{ |
373 |
{ |
178 |
if (bookname) |
|
|
179 |
g_free(bookname); |
374 |
g_free(bookname); |
180 |
if (idxfile) { |
375 |
if (idxfile) { |
181 |
fclose(idxfile); |
376 |
fclose(idxfile); |
182 |
if (wordoffset) |
|
|
183 |
g_free(wordoffset); |
377 |
g_free(wordoffset); |
184 |
} |
378 |
} else { |
185 |
else { |
|
|
186 |
if (wordlist) |
187 |
g_free(wordlist); |
379 |
g_free(wordlist); |
188 |
|
|
|
189 |
#ifdef HAVE_MMAP |
190 |
if (mmap_fd >= 0) { |
191 |
if (mmap_idxmap_size) |
192 |
munmap(idxdatabuffer, mmap_idxmap_size); |
193 |
close(mmap_fd); |
194 |
} |
195 |
else { |
196 |
if (idxdatabuffer) |
197 |
g_free(idxdatabuffer); |
198 |
} |
199 |
#else |
200 |
if (idxdatabuffer) |
201 |
g_free(idxdatabuffer); |
202 |
#endif |
203 |
} |
380 |
} |
204 |
} |
381 |
} |
205 |
|
382 |
|
206 |
gboolean Lib::load(const char *ifofilename) |
383 |
bool Lib::load(const char *ifofilename) |
207 |
{ |
384 |
{ |
208 |
gulong idxfilesize; |
385 |
gulong idxfilesize; |
209 |
if (!load_ifofile(ifofilename, &idxfilesize)) |
386 |
if (!load_ifofile(ifofilename, &idxfilesize)) |
210 |
return false; |
387 |
return false; |
211 |
|
388 |
|
212 |
gchar fullfilename[256]; |
389 |
std::string fullfilename(ifofilename); |
213 |
|
390 |
fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "dict.dz"); |
214 |
strcpy(fullfilename, ifofilename); |
|
|
215 |
strcpy(fullfilename+strlen(fullfilename)-sizeof("ifo") +1, "dict.dz"); |
216 |
|
391 |
|
217 |
if (g_file_test(fullfilename, G_FILE_TEST_EXISTS)) { |
392 |
if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) { |
218 |
dictdzfile = dict_data_open(fullfilename, 0); |
393 |
dictdzfile = dict_data_open(fullfilename.c_str(), 0); |
219 |
if (!dictdzfile) |
394 |
if (!dictdzfile) { |
220 |
{ |
|
|
221 |
//g_print("open file %s failed!\n",fullfilename); |
395 |
//g_print("open file %s failed!\n",fullfilename); |
222 |
return false; |
396 |
return false; |
223 |
} |
397 |
} |
224 |
} |
398 |
} else { |
225 |
else { |
399 |
fullfilename.erase(fullfilename.length()-sizeof(".dz")+1, sizeof(".dz")-1); |
226 |
fullfilename[strlen(fullfilename)-3] = '\0'; |
400 |
dictfile = fopen(fullfilename.c_str(),"rb"); |
227 |
dictfile = fopen(fullfilename,"rb"); |
401 |
if (!dictfile) { |
228 |
if (!dictfile) |
|
|
229 |
{ |
230 |
//g_print("open file %s failed!\n",fullfilename); |
402 |
//g_print("open file %s failed!\n",fullfilename); |
231 |
return false; |
403 |
return false; |
232 |
} |
404 |
} |
233 |
} |
405 |
} |
234 |
|
406 |
|
235 |
strcpy(fullfilename, ifofilename); |
407 |
fullfilename=ifofilename; |
236 |
strcpy(fullfilename+strlen(fullfilename)-sizeof("ifo") +1, "idx.gz"); |
408 |
fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "idx.gz"); |
237 |
|
409 |
|
238 |
if (g_file_test(fullfilename, G_FILE_TEST_EXISTS)) { |
410 |
if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) { |
239 |
gzFile in; |
411 |
gzFile in; |
240 |
in = gzopen(fullfilename,"rb"); |
412 |
in = gzopen(fullfilename.c_str(),"rb"); |
241 |
if (in == NULL) { |
413 |
if (in == NULL) { |
242 |
//g_print("Open file %s failed!\n",fullfilename); |
414 |
//g_print("Open file %s failed!\n",fullfilename); |
243 |
return false; |
415 |
return false; |
Lines 252-311
Link Here
|
252 |
gzclose(in); |
424 |
gzclose(in); |
253 |
if (len != idxfilesize) |
425 |
if (len != idxfilesize) |
254 |
return false; |
426 |
return false; |
255 |
} |
427 |
} else { |
256 |
else { |
428 |
fullfilename.erase(fullfilename.length()-sizeof(".gz")+1, sizeof(".gz")-1); |
257 |
fullfilename[strlen(fullfilename)-3] = '\0'; |
429 |
MapFile map_file; |
258 |
|
430 |
if (!map_file.open(fullfilename.c_str(), idxfilesize)) |
259 |
#ifdef HAVE_MMAP |
|
|
260 |
if ((mmap_fd = open(fullfilename, O_RDONLY )) < 0) { |
261 |
//g_print("Open file %s failed!\n",fullfilename); |
262 |
return false; |
263 |
} |
264 |
idxdatabuffer = (gchar *)mmap( NULL, idxfilesize, PROT_READ, MAP_SHARED, mmap_fd, 0); |
265 |
if ((void *)idxdatabuffer == (void *)(-1)) { |
266 |
//g_print("mmap file %s failed!\n",idxfilename); |
267 |
return false; |
268 |
} |
269 |
mmap_idxmap_size = idxfilesize; |
270 |
#else |
271 |
#ifdef _WIN32 |
272 |
HANDLE hFile = CreateFile(fullfilename, GENERIC_READ, 0, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0); |
273 |
HANDLE hFileMap = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, idxfilesize, NULL); |
274 |
idxdatabuffer = (gchar *)MapViewOfFile(hFileMap, FILE_MAP_READ, 0, 0, idxfilesize); |
275 |
#else |
276 |
FILE *file; |
277 |
if (!(file = fopen (fullfilename, "rb"))) { |
278 |
//g_print("Open file %s failed!\n",fullfilename); |
279 |
return false; |
280 |
} |
281 |
idxdatabuffer = (gchar *)g_malloc(idxfilesize); |
282 |
gint read_len; |
283 |
read_len = fread (idxdatabuffer, 1, idxfilesize, file); |
284 |
fclose (file); |
285 |
if (read_len!=idxfilesize) |
286 |
return false; |
431 |
return false; |
287 |
#endif |
432 |
idxdatabuffer=map_file.begin(); |
288 |
#endif |
433 |
|
289 |
if (true) { |
|
|
290 |
loadwordoffset(); |
434 |
loadwordoffset(); |
291 |
#ifdef HAVE_MMAP |
435 |
idxdatabuffer=NULL; |
292 |
munmap(idxdatabuffer, mmap_idxmap_size); |
436 |
if (!(idxfile = fopen(fullfilename.c_str(), "rb"))) { |
293 |
idxdatabuffer = NULL; |
|
|
294 |
mmap_idxmap_size = 0; |
295 |
close(mmap_fd); |
296 |
mmap_fd = -1; |
297 |
#else |
298 |
#ifdef _WIN32 |
299 |
UnmapViewOfFile(idxdatabuffer); |
300 |
idxdatabuffer = NULL; |
301 |
CloseHandle(hFileMap); |
302 |
CloseHandle(hFile); |
303 |
#else |
304 |
g_free(idxdatabuffer); |
305 |
idxdatabuffer = NULL; |
306 |
#endif |
307 |
#endif |
308 |
if (!(idxfile = fopen (fullfilename, "rb"))) { |
309 |
if (wordoffset) { |
437 |
if (wordoffset) { |
310 |
g_free(wordoffset); |
438 |
g_free(wordoffset); |
311 |
wordoffset = NULL; |
439 |
wordoffset = NULL; |
Lines 316-397
Link Here
|
316 |
g_print("bookname: %s , wordcount %ld\n",bookname, wordcount); |
444 |
g_print("bookname: %s , wordcount %ld\n",bookname, wordcount); |
317 |
return true; |
445 |
return true; |
318 |
} |
446 |
} |
319 |
else { |
|
|
320 |
// The .idx file will load into memory. Who need this opinion? |
321 |
} |
322 |
} |
323 |
|
447 |
|
324 |
loadwordlist(); |
448 |
loadwordlist(); |
325 |
g_print("bookname: %s , wordcount %ld\n",bookname, wordcount); |
449 |
g_print("bookname: %s , wordcount %ld\n",bookname, wordcount); |
326 |
return true; |
450 |
return true; |
327 |
} |
451 |
} |
328 |
|
452 |
|
329 |
gboolean Lib::load_ifofile(const char *ifofilename, gulong *idxfilesize) |
453 |
bool Lib::load_ifofile(const char *ifofilename, gulong *idxfilesize) |
330 |
{ |
454 |
{ |
331 |
struct stat stats; |
455 |
DictInfo dict_info; |
332 |
if (stat (ifofilename, &stats) == -1) { |
456 |
if (!dict_info.load_from_ifo_file(ifofilename, false)) |
333 |
//g_print("File: %s don't exist!\n",idxfilename); |
|
|
334 |
return false; |
335 |
} |
336 |
|
337 |
FILE *file; |
338 |
if (!(file = fopen (ifofilename, "rb"))) { |
339 |
//g_print("Open file %s failed!\n",idxfilename); |
340 |
return false; |
341 |
} |
342 |
gchar *buffer = (gchar *)g_malloc (stats.st_size + 1); |
343 |
fread (buffer, 1, stats.st_size, file); |
344 |
buffer[stats.st_size] = '\0'; |
345 |
fclose (file); |
346 |
|
347 |
if (!g_str_has_prefix(buffer, "StarDict's dict ifo file\nversion=2.4.2\n")) { |
348 |
g_print("Bad dict ifo file %s, skiped!\n", ifofilename); |
349 |
g_free(buffer); |
350 |
return false; |
457 |
return false; |
351 |
} |
|
|
352 |
gchar *p1= buffer + sizeof("StarDict's dict ifo file\nversion=2.4.2\n")-1 -1; |
353 |
|
458 |
|
354 |
gchar *p2,*p3; |
459 |
*idxfilesize = dict_info.index_file_size; |
355 |
|
460 |
wordcount = dict_info.wordcount; |
356 |
p2 = strstr(p1,"\nidxfilesize="); |
461 |
bookname = g_strdup(dict_info.bookname.c_str()); |
357 |
if (!p2) { |
|
|
358 |
g_free(buffer); |
359 |
return false; |
360 |
} |
361 |
p3 = strchr(p2+ sizeof("\nidxfilesize=")-1,'\n'); |
362 |
gchar *tmpstr = (gchar *)g_memdup(p2+sizeof("\nidxfilesize=")-1, p3-(p2+sizeof("\nidxfilesize=")-1)+1); |
363 |
tmpstr[p3-(p2+sizeof("\nidxfilesize=")-1)] = '\0'; |
364 |
*idxfilesize = atol(tmpstr); |
365 |
g_free(tmpstr); |
366 |
|
367 |
p2 = strstr(p1,"\nwordcount="); |
368 |
if (!p2) { |
369 |
g_free(buffer); |
370 |
return false; |
371 |
} |
372 |
p3 = strchr(p2+ sizeof("\nwordcount=")-1,'\n'); |
373 |
tmpstr = (gchar *)g_memdup(p2+sizeof("\nwordcount=")-1, p3-(p2+sizeof("\nwordcount=")-1)+1); |
374 |
tmpstr[p3-(p2+sizeof("\nwordcount=")-1)] = '\0'; |
375 |
wordcount = atol(tmpstr); |
376 |
g_free(tmpstr); |
377 |
|
378 |
p2 = strstr(p1,"\nbookname="); |
379 |
if (!p2) { |
380 |
g_free(buffer); |
381 |
return false; |
382 |
} |
383 |
p3 = strchr(p2+ sizeof("\nbookname=")-1,'\n'); |
384 |
bookname = (gchar *)g_memdup(p2+sizeof("\nbookname=")-1, p3-(p2+sizeof("\nbookname=")-1)+1); |
385 |
bookname[p3-(p2+sizeof("\nbookname=")-1)] = '\0'; |
386 |
|
462 |
|
387 |
p2 = strstr(p1,"\nsametypesequence="); |
463 |
if (!dict_info.sametypesequence.empty()) |
388 |
if (p2) { |
464 |
sametypesequence=g_strdup(dict_info.sametypesequence.c_str()); |
389 |
p3 = strchr(p2+sizeof("\nsametypesequence=")-1,'\n'); |
|
|
390 |
sametypesequence = (gchar *)g_memdup(p2+sizeof("\nsametypesequence=")-1, p3-(p2+sizeof("\nsametypesequence=")-1)+1); |
391 |
sametypesequence[p3-(p2+sizeof("\nsametypesequence=")-1)] = '\0'; |
392 |
} |
393 |
|
465 |
|
394 |
g_free(buffer); |
|
|
395 |
return true; |
466 |
return true; |
396 |
} |
467 |
} |
397 |
|
468 |
|
Lines 438-478
Link Here
|
438 |
wordoffset[wordcount] = p1; |
509 |
wordoffset[wordcount] = p1; |
439 |
}*/ |
510 |
}*/ |
440 |
|
511 |
|
441 |
gboolean Lib::Lookup(const char* sWord,glong *pIndex) |
512 |
bool Lib::Lookup(const char* sWord,glong *pIndex) |
442 |
{ |
513 |
{ |
443 |
gboolean bFound=false; |
514 |
bool bFound=false; |
444 |
glong iTo=length()-1; |
515 |
glong iTo=length()-1; |
445 |
if (stardict_strcmp(sWord, GetWord(0))<0) { |
516 |
if (stardict_strcmp(sWord, GetWord(0))<0) { |
446 |
*pIndex = 0; |
517 |
*pIndex = 0; |
447 |
} |
518 |
} else if (stardict_strcmp(sWord, GetWord(iTo)) >0) { |
448 |
else if (stardict_strcmp(sWord, GetWord(iTo)) >0 ) { |
|
|
449 |
*pIndex = INVALID_INDEX; |
519 |
*pIndex = INVALID_INDEX; |
450 |
} |
520 |
} else { |
451 |
else { |
|
|
452 |
glong iThisIndex=0; |
521 |
glong iThisIndex=0; |
453 |
glong iFrom=0; |
522 |
glong iFrom=0; |
454 |
|
523 |
|
455 |
int cmpint; |
524 |
int cmpint; |
456 |
while( !bFound && iFrom<=iTo ) |
525 |
while (iFrom<=iTo) { |
457 |
{ |
|
|
458 |
iThisIndex=(iFrom+iTo)/2; |
526 |
iThisIndex=(iFrom+iTo)/2; |
459 |
cmpint = stardict_strcmp(sWord, GetWord(iThisIndex)); |
527 |
cmpint = stardict_strcmp(sWord, GetWord(iThisIndex)); |
460 |
//g_print("lookup %s %d\n",GetWord(iThisIndex),cmpint); |
528 |
//g_print("lookup %s %d\n",GetWord(iThisIndex),cmpint); |
461 |
if (cmpint == 0) |
529 |
if (cmpint>0) |
462 |
{ |
|
|
463 |
bFound=true; |
464 |
} |
465 |
else if (cmpint > 0) |
466 |
{ |
467 |
iFrom=iThisIndex+1; |
530 |
iFrom=iThisIndex+1; |
468 |
} |
531 |
else if (cmpint<0) |
469 |
else |
|
|
470 |
{ |
471 |
iTo=iThisIndex-1; |
532 |
iTo=iThisIndex-1; |
|
|
533 |
else { |
534 |
bFound=true; |
535 |
break; |
472 |
} |
536 |
} |
473 |
} |
537 |
} |
474 |
if (!bFound) |
538 |
|
475 |
{ |
539 |
if (!bFound) { |
476 |
/*glong len = g_utf8_strlen(sWord, -1); |
540 |
/*glong len = g_utf8_strlen(sWord, -1); |
477 |
gchar *last_str = g_utf8_offset_to_pointer(sWord, len-1); |
541 |
gchar *last_str = g_utf8_offset_to_pointer(sWord, len-1); |
478 |
gunichar last = g_utf8_get_char(last_str); |
542 |
gunichar last = g_utf8_get_char(last_str); |
Lines 483-533
Link Here
|
483 |
*pIndex = iFrom; //next |
547 |
*pIndex = iFrom; //next |
484 |
*/ |
548 |
*/ |
485 |
*pIndex = iFrom; //next |
549 |
*pIndex = iFrom; //next |
486 |
} |
550 |
} else |
487 |
else |
|
|
488 |
*pIndex = iThisIndex; |
551 |
*pIndex = iThisIndex; |
489 |
} |
552 |
} |
490 |
return(bFound); |
553 |
|
|
|
554 |
return bFound; |
491 |
} |
555 |
} |
492 |
|
556 |
|
493 |
gboolean Lib::LookupWithRule(GPatternSpec *pspec,glong *aIndex,int iBuffLen) |
557 |
bool Lib::LookupWithRule(GPatternSpec *pspec, glong *aIndex, int iBuffLen) |
494 |
{ |
558 |
{ |
495 |
int iIndexCount=0; |
559 |
int iIndexCount=0; |
496 |
glong i; |
560 |
glong i; |
497 |
for(i=0;i<length() && iIndexCount<iBuffLen-1;i++) |
561 |
for(i=0; i<wordcount && iIndexCount<iBuffLen-1; i++) |
498 |
{ |
562 |
if (g_pattern_match_string(pspec, GetWord(i))) |
499 |
if(g_pattern_match_string(pspec, GetWord(i))) |
|
|
500 |
{ |
501 |
aIndex[iIndexCount++]=i; |
563 |
aIndex[iIndexCount++]=i; |
502 |
} |
564 |
|
503 |
} |
|
|
504 |
aIndex[iIndexCount]= -1; // -1 is the end. |
565 |
aIndex[iIndexCount]= -1; // -1 is the end. |
505 |
|
566 |
|
506 |
return(iIndexCount>0); |
567 |
return (iIndexCount>0); |
507 |
} |
568 |
} |
508 |
|
569 |
|
509 |
gchar * |
570 |
gchar * |
510 |
Lib::GetWord(glong index) |
571 |
Lib::GetWord(glong index) |
511 |
{ |
572 |
{ |
512 |
if (idxfile) { |
573 |
if (idxfile) { |
513 |
if (index == cur_wordindex +1) { |
574 |
if (index != cur_wordindex+1) |
514 |
//needn't fseek(). |
|
|
515 |
} |
516 |
// (index == cur_wordindex) seldom happen, so don't determine this here. |
517 |
else { |
518 |
fseek(idxfile, wordoffset[index], SEEK_SET); |
575 |
fseek(idxfile, wordoffset[index], SEEK_SET); |
519 |
} |
576 |
|
520 |
cur_wordindex = index; |
577 |
cur_wordindex = index; |
521 |
|
578 |
|
522 |
fread(wordentry_buf, wordoffset[index+1] - wordoffset[index] - 2*sizeof(glong), 1, idxfile); |
579 |
fread(wordentry_buf, wordoffset[index+1] - wordoffset[index] - 2*sizeof(guint32), 1, idxfile); |
523 |
//g_print("%s\n", wordentry_buf); |
580 |
//g_print("%s\n", wordentry_buf); |
524 |
fread(&wordentry_offset, sizeof(glong), 1, idxfile); |
581 |
fread(&wordentry_offset, sizeof(guint32), 1, idxfile); |
525 |
wordentry_offset = g_ntohl(wordentry_offset); |
582 |
wordentry_offset = g_ntohl(wordentry_offset); |
526 |
fread(&wordentry_size, sizeof(glong), 1, idxfile); |
583 |
fread(&wordentry_size, sizeof(guint32), 1, idxfile); |
527 |
wordentry_size = g_ntohl(wordentry_size); |
584 |
wordentry_size = g_ntohl(wordentry_size); |
528 |
return wordentry_buf; |
585 |
return wordentry_buf; |
529 |
} |
586 |
} else { |
530 |
else { |
|
|
531 |
return wordlist[index]; |
587 |
return wordlist[index]; |
532 |
} |
588 |
} |
533 |
} |
589 |
} |
Lines 536-561
Link Here
|
536 |
Lib::GetWordData(glong index) |
592 |
Lib::GetWordData(glong index) |
537 |
{ |
593 |
{ |
538 |
if (idxfile) { |
594 |
if (idxfile) { |
539 |
if (index == cur_wordindex) { |
595 |
if (index != cur_wordindex) { |
540 |
// wordentry_offset and wordentry_size are already cached by GetWord(); |
|
|
541 |
} |
542 |
else { |
543 |
cur_wordindex = index; |
596 |
cur_wordindex = index; |
544 |
fseek(idxfile, wordoffset[index+1] - 2*sizeof(glong), SEEK_SET); |
597 |
fseek(idxfile, wordoffset[index+1] - 2*sizeof(guint32), SEEK_SET); |
545 |
fread(&wordentry_offset, sizeof(glong), 1, idxfile); |
598 |
fread(&wordentry_offset, sizeof(guint32), 1, idxfile); |
546 |
wordentry_offset = g_ntohl(wordentry_offset); |
599 |
wordentry_offset = g_ntohl(wordentry_offset); |
547 |
fread(&wordentry_size, sizeof(glong), 1, idxfile); |
600 |
fread(&wordentry_size, sizeof(guint32), 1, idxfile); |
548 |
wordentry_size = g_ntohl(wordentry_size); |
601 |
wordentry_size = g_ntohl(wordentry_size); |
549 |
} |
602 |
} |
550 |
return DictBase::GetWordData(wordentry_offset, wordentry_size); |
603 |
return DictBase::GetWordData(wordentry_offset, wordentry_size); |
551 |
} |
604 |
} else { |
552 |
else { |
605 |
gchar *p1 = wordlist[index+1] - 2*sizeof(guint32); |
553 |
gchar *p1 = wordlist[index+1] - 2*sizeof(glong); |
606 |
guint32 offset, size; |
554 |
glong offset, size; |
607 |
offset=*reinterpret_cast<guint32 *>(p1); |
555 |
memcpy(&offset,p1,sizeof(glong)); |
|
|
556 |
offset = g_ntohl(offset); |
608 |
offset = g_ntohl(offset); |
557 |
p1 = p1 + sizeof(glong); |
609 |
p1 += sizeof(guint32); |
558 |
memcpy(&size, p1, sizeof(glong)); |
610 |
size=*reinterpret_cast<guint32 *>(p1); |
559 |
size = g_ntohl(size); |
611 |
size = g_ntohl(size); |
560 |
return DictBase::GetWordData(offset, size); |
612 |
return DictBase::GetWordData(offset, size); |
561 |
} |
613 |
} |
Lines 566-571
Link Here
|
566 |
{ |
618 |
{ |
567 |
libcount =0; |
619 |
libcount =0; |
568 |
oLib = NULL; |
620 |
oLib = NULL; |
|
|
621 |
iMaxFuzzyDistance = MAX_FUZZY_DISTANCE; //need to read from cfg. |
569 |
} |
622 |
} |
570 |
|
623 |
|
571 |
Libs::~Libs() |
624 |
Libs::~Libs() |
Lines 592-601
Link Here
|
592 |
} |
645 |
} |
593 |
|
646 |
|
594 |
/********************************************************************/ |
647 |
/********************************************************************/ |
595 |
gboolean Libs::LookdupWordsWithRule(GPatternSpec *pspec,glong* aiIndexes,int iLen,int iLib) |
|
|
596 |
{ |
597 |
return (oLib[iLib]->LookupWithRule(pspec,aiIndexes,iLen)); |
598 |
} |
599 |
|
648 |
|
600 |
void Libs::LoadDir(const gchar *dirname, const GSList *order_list, const GSList *disable_list) |
649 |
void Libs::LoadDir(const gchar *dirname, const GSList *order_list, const GSList *disable_list) |
601 |
{ |
650 |
{ |
Lines 823-837
Link Here
|
823 |
return poCurrentWord; |
872 |
return poCurrentWord; |
824 |
} |
873 |
} |
825 |
|
874 |
|
826 |
gboolean Libs::LookupWord(const gchar* sWord,glong& iWordIndex,int iLib) |
875 |
bool Libs::LookupWord(const gchar* sWord,glong& iWordIndex,int iLib) |
827 |
{ |
876 |
{ |
828 |
return (oLib[iLib]->Lookup(sWord, &iWordIndex)); |
877 |
return oLib[iLib]->Lookup(sWord, &iWordIndex); |
829 |
} |
878 |
} |
830 |
|
879 |
|
831 |
gboolean Libs::LookupSimilarWord(const gchar* sWord,glong& iWordIndex,int iLib) |
880 |
bool Libs::LookupSimilarWord(const gchar* sWord, glong & iWordIndex, int iLib) |
832 |
{ |
881 |
{ |
833 |
glong iIndex; |
882 |
glong iIndex; |
834 |
gboolean bFound=false; |
883 |
bool bFound=false; |
835 |
gchar *casestr; |
884 |
gchar *casestr; |
836 |
|
885 |
|
837 |
if (!bFound) { |
886 |
if (!bFound) { |
Lines 865-885
Link Here
|
865 |
} |
914 |
} |
866 |
} |
915 |
} |
867 |
|
916 |
|
868 |
if (bIsPureEnglish(sWord)) |
917 |
if (bIsPureEnglish(sWord)) { |
869 |
{ |
|
|
870 |
// If not Found , try other status of sWord. |
918 |
// If not Found , try other status of sWord. |
871 |
int iWordLen=strlen(sWord); |
919 |
int iWordLen=strlen(sWord); |
872 |
gboolean isupcase; |
920 |
bool isupcase; |
873 |
|
921 |
|
874 |
gchar *sNewWord = (gchar *)g_malloc(iWordLen + 1); |
922 |
gchar *sNewWord = (gchar *)g_malloc(iWordLen + 1); |
875 |
|
923 |
|
876 |
//cut one char "s" or "d" |
924 |
//cut one char "s" or "d" |
877 |
if(!bFound && iWordLen>1) { |
925 |
if(!bFound && iWordLen>1) { |
878 |
isupcase = (sWord[iWordLen-1]=='S' || (!strncmp(&sWord[iWordLen-2],"ED",2))); |
926 |
isupcase = sWord[iWordLen-1]=='S' || !strncmp(&sWord[iWordLen-2],"ED",2); |
879 |
if (isupcase || sWord[iWordLen-1]=='s' || (!strncmp(&sWord[iWordLen-2],"ed",2))) { |
927 |
if (isupcase || sWord[iWordLen-1]=='s' || !strncmp(&sWord[iWordLen-2],"ed",2)) { |
880 |
strcpy(sNewWord,sWord); |
928 |
strcpy(sNewWord,sWord); |
881 |
sNewWord[iWordLen-1]='\0'; // cut "s" or "d" |
929 |
sNewWord[iWordLen-1]='\0'; // cut "s" or "d" |
882 |
if(oLib[iLib]->Lookup(sNewWord,&iIndex)) |
930 |
if (oLib[iLib]->Lookup(sNewWord,&iIndex)) |
883 |
bFound=true; |
931 |
bFound=true; |
884 |
else if (isupcase || g_ascii_isupper(sWord[0])) { |
932 |
else if (isupcase || g_ascii_isupper(sWord[0])) { |
885 |
casestr = g_ascii_strdown(sNewWord, -1); |
933 |
casestr = g_ascii_strdown(sNewWord, -1); |
Lines 898-906
Link Here
|
898 |
if (isupcase || (!strncmp(&sWord[iWordLen-2],"ly",2))) { |
946 |
if (isupcase || (!strncmp(&sWord[iWordLen-2],"ly",2))) { |
899 |
strcpy(sNewWord,sWord); |
947 |
strcpy(sNewWord,sWord); |
900 |
sNewWord[iWordLen-2]='\0'; // cut "ly" |
948 |
sNewWord[iWordLen-2]='\0'; // cut "ly" |
901 |
if ( iWordLen>5 && (sNewWord[iWordLen-3]==sNewWord[iWordLen-4]) |
949 |
if (iWordLen>5 && sNewWord[iWordLen-3]==sNewWord[iWordLen-4] |
902 |
&& !bIsVowel(sNewWord[iWordLen-4]) && bIsVowel(sNewWord[iWordLen-5]) ) //doubled |
950 |
&& !bIsVowel(sNewWord[iWordLen-4]) && |
903 |
{ |
951 |
bIsVowel(sNewWord[iWordLen-5])) {//doubled |
|
|
952 |
|
904 |
sNewWord[iWordLen-3]='\0'; |
953 |
sNewWord[iWordLen-3]='\0'; |
905 |
if( oLib[iLib]->Lookup(sNewWord,&iIndex) ) |
954 |
if( oLib[iLib]->Lookup(sNewWord,&iIndex) ) |
906 |
bFound=true; |
955 |
bFound=true; |
Lines 939-946
Link Here
|
939 |
strcpy(sNewWord,sWord); |
988 |
strcpy(sNewWord,sWord); |
940 |
sNewWord[iWordLen-3]='\0'; |
989 |
sNewWord[iWordLen-3]='\0'; |
941 |
if ( iWordLen>6 && (sNewWord[iWordLen-4]==sNewWord[iWordLen-5]) |
990 |
if ( iWordLen>6 && (sNewWord[iWordLen-4]==sNewWord[iWordLen-5]) |
942 |
&& !bIsVowel(sNewWord[iWordLen-5]) && bIsVowel(sNewWord[iWordLen-6]) ) //doubled |
991 |
&& !bIsVowel(sNewWord[iWordLen-5]) && |
943 |
{ |
992 |
bIsVowel(sNewWord[iWordLen-6])) { //doubled |
944 |
sNewWord[iWordLen-4]='\0'; |
993 |
sNewWord[iWordLen-4]='\0'; |
945 |
if (oLib[iLib]->Lookup(sNewWord,&iIndex)) |
994 |
if (oLib[iLib]->Lookup(sNewWord,&iIndex)) |
946 |
bFound=true; |
995 |
bFound=true; |
Lines 992-1004
Link Here
|
992 |
//cut two char "es" |
1041 |
//cut two char "es" |
993 |
if(!bFound && iWordLen>3) { |
1042 |
if(!bFound && iWordLen>3) { |
994 |
isupcase = (!strncmp(&sWord[iWordLen-2],"ES",2) && |
1043 |
isupcase = (!strncmp(&sWord[iWordLen-2],"ES",2) && |
995 |
(sWord[iWordLen-3] == 'S' || sWord[iWordLen-3] == 'X' || sWord[iWordLen-3] == 'O' |
1044 |
(sWord[iWordLen-3] == 'S' || |
996 |
|| (iWordLen >4 && sWord[iWordLen-3] == 'H' && (sWord[iWordLen-4] == 'C' || sWord[iWordLen-4] == 'S')))); |
1045 |
sWord[iWordLen-3] == 'X' || |
|
|
1046 |
sWord[iWordLen-3] == 'O' || |
1047 |
(iWordLen >4 && sWord[iWordLen-3] == 'H' && |
1048 |
(sWord[iWordLen-4] == 'C' || |
1049 |
sWord[iWordLen-4] == 'S')))); |
997 |
if (isupcase || |
1050 |
if (isupcase || |
998 |
(!strncmp(&sWord[iWordLen-2],"es",2) && |
1051 |
(!strncmp(&sWord[iWordLen-2],"es",2) && |
999 |
(sWord[iWordLen-3] == 's' || sWord[iWordLen-3] == 'x' || sWord[iWordLen-3] == 'o' |
1052 |
(sWord[iWordLen-3] == 's' || sWord[iWordLen-3] == 'x' || |
1000 |
|| (iWordLen >4 && sWord[iWordLen-3] == 'h' && (sWord[iWordLen-4] == 'c' || sWord[iWordLen-4] == 's'))))) |
1053 |
sWord[iWordLen-3] == 'o' || |
1001 |
{ |
1054 |
(iWordLen >4 && sWord[iWordLen-3] == 'h' && |
|
|
1055 |
(sWord[iWordLen-4] == 'c' || sWord[iWordLen-4] == 's'))))) { |
1002 |
strcpy(sNewWord,sWord); |
1056 |
strcpy(sNewWord,sWord); |
1003 |
sNewWord[iWordLen-2]='\0'; |
1057 |
sNewWord[iWordLen-2]='\0'; |
1004 |
if(oLib[iLib]->Lookup(sNewWord,&iIndex)) |
1058 |
if(oLib[iLib]->Lookup(sNewWord,&iIndex)) |
Lines 1015-1031
Link Here
|
1015 |
} |
1069 |
} |
1016 |
|
1070 |
|
1017 |
//cut "ed" |
1071 |
//cut "ed" |
1018 |
if( !bFound && iWordLen>3) { |
1072 |
if (!bFound && iWordLen>3) { |
1019 |
isupcase = !strncmp(&sWord[iWordLen-2],"ED",2); |
1073 |
isupcase = !strncmp(&sWord[iWordLen-2],"ED",2); |
1020 |
if (isupcase || !strncmp(&sWord[iWordLen-2],"ed",2) ) |
1074 |
if (isupcase || !strncmp(&sWord[iWordLen-2],"ed",2)) { |
1021 |
{ |
|
|
1022 |
strcpy(sNewWord,sWord); |
1075 |
strcpy(sNewWord,sWord); |
1023 |
sNewWord[iWordLen-2]='\0'; |
1076 |
sNewWord[iWordLen-2]='\0'; |
1024 |
if ( iWordLen>5 && (sNewWord[iWordLen-3]==sNewWord[iWordLen-4]) |
1077 |
if (iWordLen>5 && (sNewWord[iWordLen-3]==sNewWord[iWordLen-4]) |
1025 |
&& !bIsVowel(sNewWord[iWordLen-4]) && bIsVowel(sNewWord[iWordLen-5]) ) //doubled |
1078 |
&& !bIsVowel(sNewWord[iWordLen-4]) && |
1026 |
{ |
1079 |
bIsVowel(sNewWord[iWordLen-5])) {//doubled |
1027 |
sNewWord[iWordLen-3]='\0'; |
1080 |
sNewWord[iWordLen-3]='\0'; |
1028 |
if( oLib[iLib]->Lookup(sNewWord,&iIndex) ) |
1081 |
if (oLib[iLib]->Lookup(sNewWord,&iIndex)) |
1029 |
bFound=true; |
1082 |
bFound=true; |
1030 |
else { |
1083 |
else { |
1031 |
if (isupcase || g_ascii_isupper(sWord[0])) { |
1084 |
if (isupcase || g_ascii_isupper(sWord[0])) { |
Lines 1040-1046
Link Here
|
1040 |
sNewWord[iWordLen-3]=sNewWord[iWordLen-4]; //restore |
1093 |
sNewWord[iWordLen-3]=sNewWord[iWordLen-4]; //restore |
1041 |
} |
1094 |
} |
1042 |
} |
1095 |
} |
1043 |
if( !bFound ) { |
1096 |
if (!bFound) { |
1044 |
if (oLib[iLib]->Lookup(sNewWord,&iIndex)) |
1097 |
if (oLib[iLib]->Lookup(sNewWord,&iIndex)) |
1045 |
bFound=true; |
1098 |
bFound=true; |
1046 |
else if (isupcase || g_ascii_isupper(sWord[0])) { |
1099 |
else if (isupcase || g_ascii_isupper(sWord[0])) { |
Lines 1056-1072
Link Here
|
1056 |
} |
1109 |
} |
1057 |
|
1110 |
|
1058 |
// cut "ied" , add "y". |
1111 |
// cut "ied" , add "y". |
1059 |
if(!bFound && iWordLen>3) { |
1112 |
if (!bFound && iWordLen>3) { |
1060 |
isupcase = !strncmp(&sWord[iWordLen-3],"IED",3); |
1113 |
isupcase = !strncmp(&sWord[iWordLen-3],"IED",3); |
1061 |
if (isupcase || (!strncmp(&sWord[iWordLen-3],"ied",3))) |
1114 |
if (isupcase || (!strncmp(&sWord[iWordLen-3],"ied",3))) { |
1062 |
{ |
|
|
1063 |
strcpy(sNewWord,sWord); |
1115 |
strcpy(sNewWord,sWord); |
1064 |
sNewWord[iWordLen-3]='\0'; |
1116 |
sNewWord[iWordLen-3]='\0'; |
1065 |
if (isupcase) |
1117 |
if (isupcase) |
1066 |
strcat(sNewWord,"Y"); // add a char "Y" |
1118 |
strcat(sNewWord,"Y"); // add a char "Y" |
1067 |
else |
1119 |
else |
1068 |
strcat(sNewWord,"y"); // add a char "y" |
1120 |
strcat(sNewWord,"y"); // add a char "y" |
1069 |
if(oLib[iLib]->Lookup(sNewWord,&iIndex)) |
1121 |
if (oLib[iLib]->Lookup(sNewWord,&iIndex)) |
1070 |
bFound=true; |
1122 |
bFound=true; |
1071 |
else if (isupcase || g_ascii_isupper(sWord[0])) { |
1123 |
else if (isupcase || g_ascii_isupper(sWord[0])) { |
1072 |
casestr = g_ascii_strdown(sNewWord, -1); |
1124 |
casestr = g_ascii_strdown(sNewWord, -1); |
Lines 1080-1089
Link Here
|
1080 |
} |
1132 |
} |
1081 |
|
1133 |
|
1082 |
// cut "ies" , add "y". |
1134 |
// cut "ies" , add "y". |
1083 |
if(!bFound && iWordLen>3) { |
1135 |
if (!bFound && iWordLen>3) { |
1084 |
isupcase = !strncmp(&sWord[iWordLen-3],"IES",3); |
1136 |
isupcase = !strncmp(&sWord[iWordLen-3],"IES",3); |
1085 |
if (isupcase || (!strncmp(&sWord[iWordLen-3],"ies",3))) |
1137 |
if (isupcase || (!strncmp(&sWord[iWordLen-3],"ies",3))) { |
1086 |
{ |
|
|
1087 |
strcpy(sNewWord,sWord); |
1138 |
strcpy(sNewWord,sWord); |
1088 |
sNewWord[iWordLen-3]='\0'; |
1139 |
sNewWord[iWordLen-3]='\0'; |
1089 |
if (isupcase) |
1140 |
if (isupcase) |
Lines 1104-1113
Link Here
|
1104 |
} |
1155 |
} |
1105 |
|
1156 |
|
1106 |
// cut "er". |
1157 |
// cut "er". |
1107 |
if(!bFound && iWordLen>2) { |
1158 |
if (!bFound && iWordLen>2) { |
1108 |
isupcase = !strncmp(&sWord[iWordLen-2],"ER",2); |
1159 |
isupcase = !strncmp(&sWord[iWordLen-2],"ER",2); |
1109 |
if (isupcase || (!strncmp(&sWord[iWordLen-2],"er",2))) |
1160 |
if (isupcase || (!strncmp(&sWord[iWordLen-2],"er",2))) { |
1110 |
{ |
|
|
1111 |
strcpy(sNewWord,sWord); |
1161 |
strcpy(sNewWord,sWord); |
1112 |
sNewWord[iWordLen-2]='\0'; |
1162 |
sNewWord[iWordLen-2]='\0'; |
1113 |
if(oLib[iLib]->Lookup(sNewWord,&iIndex)) |
1163 |
if(oLib[iLib]->Lookup(sNewWord,&iIndex)) |
Lines 1124-1133
Link Here
|
1124 |
} |
1174 |
} |
1125 |
|
1175 |
|
1126 |
// cut "est". |
1176 |
// cut "est". |
1127 |
if(!bFound && iWordLen>3) { |
1177 |
if (!bFound && iWordLen>3) { |
1128 |
isupcase = !strncmp(&sWord[iWordLen-3], "EST", 3); |
1178 |
isupcase = !strncmp(&sWord[iWordLen-3], "EST", 3); |
1129 |
if (isupcase || (!strncmp(&sWord[iWordLen-3],"est", 3))) |
1179 |
if (isupcase || (!strncmp(&sWord[iWordLen-3],"est", 3))) { |
1130 |
{ |
|
|
1131 |
strcpy(sNewWord,sWord); |
1180 |
strcpy(sNewWord,sWord); |
1132 |
sNewWord[iWordLen-3]='\0'; |
1181 |
sNewWord[iWordLen-3]='\0'; |
1133 |
if(oLib[iLib]->Lookup(sNewWord,&iIndex)) |
1182 |
if(oLib[iLib]->Lookup(sNewWord,&iIndex)) |
Lines 1154-1213
Link Here
|
1154 |
//iWordIndex = INVALID_INDEX; |
1203 |
//iWordIndex = INVALID_INDEX; |
1155 |
} |
1204 |
} |
1156 |
|
1205 |
|
1157 |
return(bFound); |
1206 |
return bFound; |
1158 |
} |
1207 |
} |
1159 |
|
1208 |
|
1160 |
gboolean Libs::SimpleLookupWord(const gchar* sWord,glong& iWordIndex,int iLib) |
1209 |
bool Libs::SimpleLookupWord(const gchar* sWord, glong & iWordIndex, int iLib) |
1161 |
{ |
1210 |
{ |
1162 |
gboolean bFound = oLib[iLib]->Lookup(sWord, &iWordIndex); |
1211 |
bool bFound = oLib[iLib]->Lookup(sWord, &iWordIndex); |
1163 |
if (!bFound) |
1212 |
if (!bFound) |
1164 |
bFound = LookupSimilarWord(sWord, iWordIndex, iLib); |
1213 |
bFound = LookupSimilarWord(sWord, iWordIndex, iLib); |
1165 |
return bFound; |
1214 |
return bFound; |
1166 |
} |
1215 |
} |
1167 |
|
1216 |
|
1168 |
inline gboolean bIsVowel(gchar inputchar) |
1217 |
inline bool operator<(const Libs::Fuzzystruct & lh, const Libs::Fuzzystruct & rh) { |
|
|
1218 |
if (lh.iMatchWordDistance!=rh.iMatchWordDistance) |
1219 |
return lh.iMatchWordDistance<rh.iMatchWordDistance; |
1220 |
|
1221 |
if (lh.pMatchWord && rh.pMatchWord) |
1222 |
return stardict_strcmp(lh.pMatchWord, rh.pMatchWord)<0; |
1223 |
|
1224 |
return false; |
1225 |
} |
1226 |
|
1227 |
bool Libs::LookupWithFuzzy(const gchar *sWord, Fuzzystruct oFuzzystruct[], gint fuzzystruct_amount, TProgressFunc ProgressFunc) |
1169 |
{ |
1228 |
{ |
1170 |
gchar ch = g_ascii_toupper(inputchar); |
1229 |
if (sWord[0] == '\0') |
1171 |
return( ch=='A' || ch=='E' || ch=='I' || ch=='O' || ch=='U' ); |
1230 |
return false; |
|
|
1231 |
|
1232 |
for (int i=0; i<fuzzystruct_amount; i++) { |
1233 |
oFuzzystruct[i].pMatchWord = NULL; |
1234 |
oFuzzystruct[i].iMatchWordDistance = iMaxFuzzyDistance; |
1235 |
} |
1236 |
int iMaxDistance = iMaxFuzzyDistance; |
1237 |
int iDistance; |
1238 |
bool Found = false; |
1239 |
EditDistance oEditDistance; |
1240 |
|
1241 |
glong iCheckWordLen; |
1242 |
int sCheckLen; |
1243 |
const char *sCheck; |
1244 |
gunichar *ucs4_str1,*ucs4_str2; |
1245 |
glong ucs4_str2_len; |
1246 |
char *sLowerCheckWord; |
1247 |
gchar *sLowerWord = g_utf8_strdown(sWord, -1); |
1248 |
ucs4_str2 = g_utf8_to_ucs4_fast(sLowerWord,-1,&ucs4_str2_len); |
1249 |
g_free(sLowerWord); |
1250 |
for (int iLib=0; iLib<libcount; iLib++) { |
1251 |
if (ProgressFunc) |
1252 |
ProgressFunc(); |
1253 |
if (stardict_strcmp(sWord, poGetWord(0,iLib))>=0 && |
1254 |
stardict_strcmp(sWord, poGetWord(iLength(iLib)-1,iLib))<=0) { |
1255 |
//there are Chinese dicts and English dicts... |
1256 |
const int iwords = iLength(iLib); |
1257 |
for (int index=0; index<iwords; index++) { |
1258 |
sCheck = poGetWord(index,iLib); |
1259 |
// tolower and skip too long or too short words |
1260 |
sCheckLen = strlen(sCheck); |
1261 |
iCheckWordLen = g_utf8_strlen(sCheck, sCheckLen); |
1262 |
if (iCheckWordLen-ucs4_str2_len>=iMaxDistance || |
1263 |
ucs4_str2_len-iCheckWordLen>=iMaxDistance) |
1264 |
continue; |
1265 |
sLowerCheckWord = g_utf8_strdown(sCheck, sCheckLen); |
1266 |
if (iCheckWordLen > ucs4_str2_len) |
1267 |
(*g_utf8_offset_to_pointer(sLowerCheckWord, ucs4_str2_len)) = '\0'; |
1268 |
ucs4_str1 = g_utf8_to_ucs4_fast(sLowerCheckWord, -1,NULL); |
1269 |
g_free(sLowerCheckWord); |
1270 |
iDistance = oEditDistance.CalEditDistance(ucs4_str1,ucs4_str2,iMaxDistance); |
1271 |
g_free(ucs4_str1); |
1272 |
if (iDistance<iMaxDistance && iDistance < ucs4_str2_len) { |
1273 |
// when ucs4_str2_len=1,2 we need less fuzzy. |
1274 |
Found = true; |
1275 |
bool bAlreadyInList = false; |
1276 |
int iMaxDistanceAt=0; |
1277 |
for (int j=0; j<fuzzystruct_amount; j++) { |
1278 |
if (oFuzzystruct[j].pMatchWord && |
1279 |
strcmp(oFuzzystruct[j].pMatchWord,sCheck)==0 ) {//already in list |
1280 |
bAlreadyInList = true; |
1281 |
break; |
1282 |
} |
1283 |
//find the position,it will certainly be found (include the first time) as iMaxDistance is set by last time. |
1284 |
if (oFuzzystruct[j].iMatchWordDistance == iMaxDistance ) { |
1285 |
iMaxDistanceAt = j; |
1286 |
} |
1287 |
} |
1288 |
if (!bAlreadyInList) { |
1289 |
if (oFuzzystruct[iMaxDistanceAt].pMatchWord) |
1290 |
g_free(oFuzzystruct[iMaxDistanceAt].pMatchWord); |
1291 |
oFuzzystruct[iMaxDistanceAt].pMatchWord = g_strdup(sCheck); |
1292 |
oFuzzystruct[iMaxDistanceAt].iMatchWordDistance = iDistance; |
1293 |
// calc new iMaxDistance |
1294 |
iMaxDistance = iDistance; |
1295 |
for (int j=0; j<fuzzystruct_amount; j++) { |
1296 |
if (oFuzzystruct[j].iMatchWordDistance > iMaxDistance) |
1297 |
iMaxDistance = oFuzzystruct[j].iMatchWordDistance; |
1298 |
} // calc new iMaxDistance |
1299 |
} // add to list |
1300 |
} // find one |
1301 |
} // each word |
1302 |
} // ok for search |
1303 |
} // each lib |
1304 |
g_free(ucs4_str2); |
1305 |
|
1306 |
if (Found)// sort with distance |
1307 |
std::sort(oFuzzystruct, oFuzzystruct+fuzzystruct_amount); |
1308 |
|
1309 |
return Found; |
1172 |
} |
1310 |
} |
1173 |
|
1311 |
|
|
|
1312 |
inline bool less_for_compare(const char *lh, const char *rh) { |
1313 |
return stardict_strcmp(lh, rh)<0; |
1314 |
} |
1174 |
|
1315 |
|
|
|
1316 |
gint Libs::LookupWithRule(const gchar *word, TProgressFunc ProgressFunc, gchar **ppMatchWord) |
1317 |
{ |
1318 |
glong aiIndex[MAX_MATCH_ITEM_PER_LIB+1]; |
1319 |
gint iMatchCount = 0; |
1320 |
GPatternSpec *pspec = g_pattern_spec_new(word); |
1321 |
|
1322 |
for (int iLib=0; iLib<libcount; iLib++) { |
1323 |
//if(oLibs.LookdupWordsWithRule(pspec,aiIndex,MAX_MATCH_ITEM_PER_LIB+1-iMatchCount,iLib)) |
1324 |
// -iMatchCount,so save time,but may got less result and the word may repeat. |
1325 |
|
1326 |
if (oLib[iLib]->LookupWithRule(pspec,aiIndex, MAX_MATCH_ITEM_PER_LIB+1)) { |
1327 |
if (ProgressFunc) |
1328 |
ProgressFunc(); |
1329 |
for (int i=0; aiIndex[i]!=-1; i++) { |
1330 |
gchar * sMatchWord = poGetWord(aiIndex[i],iLib); |
1331 |
bool bAlreadyInList = false; |
1332 |
for (int j=0; j<iMatchCount; j++) { |
1333 |
if (strcmp(ppMatchWord[j],sMatchWord)==0) {//already in list |
1334 |
bAlreadyInList = true; |
1335 |
break; |
1336 |
} |
1337 |
} |
1338 |
if (!bAlreadyInList) |
1339 |
ppMatchWord[iMatchCount++] = g_strdup(sMatchWord); |
1340 |
} |
1341 |
} |
1342 |
} |
1343 |
g_pattern_spec_free(pspec); |
1344 |
|
1345 |
if (iMatchCount)// sort it. |
1346 |
std::sort(ppMatchWord, ppMatchWord+iMatchCount, less_for_compare); |
1347 |
|
1348 |
return iMatchCount; |
1349 |
} |
1175 |
|
1350 |
|
1176 |
/**************************************************/ |
1351 |
/**************************************************/ |
1177 |
gboolean TreeDict::load(const char *ifofilename, GtkTreeStore *model) |
1352 |
bool TreeDict::load(const char *ifofilename, GtkTreeStore *model) |
1178 |
{ |
1353 |
{ |
1179 |
gulong tdxfilesize; |
1354 |
gulong tdxfilesize; |
1180 |
if (!load_ifofile(ifofilename, &tdxfilesize)) |
1355 |
if (!load_ifofile(ifofilename, &tdxfilesize)) |
1181 |
return false; |
1356 |
return false; |
1182 |
|
1357 |
|
1183 |
gchar fullfilename[256]; |
1358 |
std::string fullfilename(ifofilename); |
1184 |
|
1359 |
fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "dict.dz"); |
1185 |
strcpy(fullfilename, ifofilename); |
|
|
1186 |
strcpy(fullfilename+strlen(fullfilename)-sizeof("ifo") +1, "dict.dz"); |
1187 |
|
1360 |
|
1188 |
if (g_file_test(fullfilename, G_FILE_TEST_EXISTS)) { |
1361 |
if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) { |
1189 |
dictdzfile = dict_data_open(fullfilename, 0); |
1362 |
dictdzfile = dict_data_open(fullfilename.c_str(), 0); |
1190 |
if (!dictdzfile) { |
1363 |
if (!dictdzfile) { |
1191 |
//g_print("open file %s failed!\n",fullfilename); |
1364 |
//g_print("open file %s failed!\n",fullfilename); |
1192 |
return false; |
1365 |
return false; |
1193 |
} |
1366 |
} |
1194 |
} |
1367 |
} else { |
1195 |
else { |
1368 |
fullfilename.erase(fullfilename.length()-sizeof(".dz")+1, sizeof(".dz")-1); |
1196 |
fullfilename[strlen(fullfilename)-3] = '\0'; |
1369 |
dictfile = fopen(fullfilename.c_str(),"rb"); |
1197 |
dictfile = fopen(fullfilename,"rb"); |
|
|
1198 |
if (!dictfile) { |
1370 |
if (!dictfile) { |
1199 |
//g_print("open file %s failed!\n",fullfilename); |
1371 |
//g_print("open file %s failed!\n",fullfilename); |
1200 |
return false; |
1372 |
return false; |
1201 |
} |
1373 |
} |
1202 |
} |
1374 |
} |
1203 |
|
1375 |
|
1204 |
strcpy(fullfilename, ifofilename); |
1376 |
fullfilename=ifofilename; |
1205 |
strcpy(fullfilename+strlen(fullfilename)-sizeof("ifo") +1, "tdx.gz"); |
1377 |
fullfilename.replace(fullfilename.length()-sizeof("ifo")+1, sizeof("ifo")-1, "tdx.gz"); |
1206 |
|
1378 |
|
1207 |
gchar *buffer= NULL; |
1379 |
gchar *buffer= NULL; |
1208 |
if (g_file_test(fullfilename, G_FILE_TEST_EXISTS)) { |
1380 |
if (g_file_test(fullfilename.c_str(), G_FILE_TEST_EXISTS)) { |
1209 |
gzFile in; |
1381 |
gzFile in; |
1210 |
in = gzopen(fullfilename,"rb"); |
1382 |
in = gzopen(fullfilename.c_str(),"rb"); |
1211 |
if (in == NULL) { |
1383 |
if (in == NULL) { |
1212 |
//g_print("Open file %s failed!\n",idxfilename); |
1384 |
//g_print("Open file %s failed!\n",idxfilename); |
1213 |
return false; |
1385 |
return false; |
Lines 1226-1243
Link Here
|
1226 |
g_free(buffer); |
1398 |
g_free(buffer); |
1227 |
return false; |
1399 |
return false; |
1228 |
} |
1400 |
} |
1229 |
} |
1401 |
} else { |
1230 |
else { |
1402 |
fullfilename.erase(fullfilename.length()-sizeof(".gz")+1, sizeof(".gz")-1); |
1231 |
fullfilename[strlen(fullfilename)-3] = '\0'; |
|
|
1232 |
FILE *file; |
1403 |
FILE *file; |
1233 |
if (!(file = fopen (fullfilename, "rb"))) { |
1404 |
if (!(file = fopen (fullfilename.c_str(), "rb"))) { |
1234 |
//g_print("Open file %s failed!\n",fullfilename); |
1405 |
//g_print("Open file %s failed!\n",fullfilename); |
1235 |
return false; |
1406 |
return false; |
1236 |
} |
1407 |
} |
1237 |
buffer = (gchar *)g_malloc(tdxfilesize); |
1408 |
buffer = (gchar *)g_malloc(tdxfilesize); |
1238 |
gulong read_len; |
1409 |
gulong read_len; |
1239 |
read_len = fread (buffer, 1, tdxfilesize, file); |
1410 |
read_len = fread(buffer, 1, tdxfilesize, file); |
1240 |
fclose (file); |
1411 |
fclose(file); |
1241 |
if (read_len!=tdxfilesize) { |
1412 |
if (read_len!=tdxfilesize) { |
1242 |
g_free(buffer); |
1413 |
g_free(buffer); |
1243 |
return false; |
1414 |
return false; |
Lines 1250-1301
Link Here
|
1250 |
return true; |
1421 |
return true; |
1251 |
} |
1422 |
} |
1252 |
|
1423 |
|
1253 |
gboolean TreeDict::load_ifofile(const char *ifofilename, gulong *tdxfilesize) |
1424 |
bool TreeDict::load_ifofile(const char *ifofilename, gulong *tdxfilesize) |
1254 |
{ |
1425 |
{ |
1255 |
struct stat stats; |
1426 |
DictInfo dict_info; |
1256 |
if (stat (ifofilename, &stats) == -1) { |
1427 |
if (!dict_info.load_from_ifo_file(ifofilename, true)) |
1257 |
//g_print("File: %s don't exist!\n",idxfilename); |
|
|
1258 |
return false; |
1259 |
} |
1260 |
|
1261 |
FILE *file; |
1262 |
if (!(file = fopen (ifofilename, "rb"))) { |
1263 |
//g_print("Open file %s failed!\n",idxfilename); |
1264 |
return false; |
1265 |
} |
1266 |
gchar *buffer = (gchar *)g_malloc (stats.st_size + 1); |
1267 |
fread (buffer, 1, stats.st_size, file); |
1268 |
buffer[stats.st_size] = '\0'; |
1269 |
fclose (file); |
1270 |
|
1271 |
if (!g_str_has_prefix(buffer, "StarDict's treedict ifo file\nversion=2.4.2\n")) { |
1272 |
g_print("Bad treedict ifo file %s, skiped!\n", ifofilename); |
1273 |
g_free(buffer); |
1274 |
return false; |
1275 |
} |
1276 |
gchar *p1= buffer + sizeof("StarDict's treedict ifo file\nversion=2.4.2\n")-1 -1; |
1277 |
|
1278 |
gchar *p2,*p3; |
1279 |
|
1280 |
p2 = strstr(p1,"\ntdxfilesize="); |
1281 |
if (!p2) { |
1282 |
g_free(buffer); |
1283 |
return false; |
1428 |
return false; |
1284 |
} |
|
|
1285 |
p3 = strchr(p2+ sizeof("\ntdxfilesize=")-1,'\n'); |
1286 |
gchar *tmpstr = (gchar *)g_memdup(p2+sizeof("\ntdxfilesize=")-1, p3-(p2+sizeof("\ntdxfilesize=")-1)+1); |
1287 |
tmpstr[p3-(p2+sizeof("\ntdxfilesize=")-1)] = '\0'; |
1288 |
*tdxfilesize = atol(tmpstr); |
1289 |
g_free(tmpstr); |
1290 |
|
1429 |
|
1291 |
p2 = strstr(p1,"\nsametypesequence="); |
1430 |
*tdxfilesize = dict_info.index_file_size; |
1292 |
if (p2) { |
1431 |
if (!dict_info.sametypesequence.empty()) |
1293 |
p3 = strchr(p2+sizeof("\nsametypesequence=")-1,'\n'); |
1432 |
sametypesequence=g_strdup(dict_info.sametypesequence.c_str()); |
1294 |
sametypesequence = (gchar *)g_memdup(p2+sizeof("\nsametypesequence=")-1, p3-(p2+sizeof("\nsametypesequence=")-1)+1); |
|
|
1295 |
sametypesequence[p3-(p2+sizeof("\nsametypesequence=")-1)] = '\0'; |
1296 |
} |
1297 |
|
1433 |
|
1298 |
g_free(buffer); |
|
|
1299 |
return true; |
1434 |
return true; |
1300 |
} |
1435 |
} |
1301 |
|
1436 |
|