|
|
| |
class QUtf8Decoder : public QTextDecoder { | class QUtf8Decoder : public QTextDecoder { |
uint uc; | uint uc; |
|
uint min_uc; |
int need; | int need; |
bool headerDone; | bool headerDone; |
public: | public: |
|
|
result.setLength( len ); // worst case | result.setLength( len ); // worst case |
QChar *qch = (QChar *)result.unicode(); | QChar *qch = (QChar *)result.unicode(); |
uchar ch; | uchar ch; |
|
int error = -1; |
for (int i=0; i<len; i++) { | for (int i=0; i<len; i++) { |
ch = *chars++; |
ch = chars[i]; |
if (need) { | if (need) { |
if ( (ch&0xc0) == 0x80 ) { | if ( (ch&0xc0) == 0x80 ) { |
uc = (uc << 6) | (ch & 0x3f); | uc = (uc << 6) | (ch & 0x3f); |
|
|
*qch++ = QChar(high); | *qch++ = QChar(high); |
*qch++ = QChar(low); | *qch++ = QChar(low); |
headerDone = TRUE; | headerDone = TRUE; |
|
} else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) { |
|
*qch++ = QChar::replacement; |
} else { | } else { |
if (headerDone || QChar(uc) != QChar::byteOrderMark) | if (headerDone || QChar(uc) != QChar::byteOrderMark) |
*qch++ = uc; | *qch++ = uc; |
|
|
} | } |
} else { | } else { |
// error | // error |
|
i = error; |
*qch++ = QChar::replacement; | *qch++ = QChar::replacement; |
need = 0; | need = 0; |
} | } |
|
|
} else if ((ch & 0xe0) == 0xc0) { | } else if ((ch & 0xe0) == 0xc0) { |
uc = ch & 0x1f; | uc = ch & 0x1f; |
need = 1; | need = 1; |
|
error = i; |
|
min_uc = 0x80; |
} else if ((ch & 0xf0) == 0xe0) { | } else if ((ch & 0xf0) == 0xe0) { |
uc = ch & 0x0f; | uc = ch & 0x0f; |
need = 2; | need = 2; |
|
error = i; |
|
min_uc = 0x800; |
} else if ((ch&0xf8) == 0xf0) { | } else if ((ch&0xf8) == 0xf0) { |
uc = ch & 0x07; | uc = ch & 0x07; |
need = 3; | need = 3; |
|
error = i; |
|
min_uc = 0x10000; |
|
} else { |
|
// error |
|
*qch++ = QChar::replacement; |
} | } |
} | } |
} | } |
|
|
result.setLength( len ); // worst case | result.setLength( len ); // worst case |
QChar *qch = (QChar *)result.unicode(); | QChar *qch = (QChar *)result.unicode(); |
uint uc = 0; | uint uc = 0; |
|
uint min_uc = 0; |
int need = 0; | int need = 0; |
int error = -1; | int error = -1; |
uchar ch; | uchar ch; |
|
|
unsigned short low = uc%0x400 + 0xdc00; | unsigned short low = uc%0x400 + 0xdc00; |
*qch++ = QChar(high); | *qch++ = QChar(high); |
*qch++ = QChar(low); | *qch++ = QChar(low); |
|
} else if (uc < min_uc || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) { |
|
// overlong seqence, UTF16 surrogate or BOM |
|
i = error; |
|
qch = addOne(qch, result); |
|
*qch++ = QChar(0xdbff); |
|
*qch++ = QChar(0xde00+((uchar)utf8[i])); |
} else { | } else { |
*qch++ = uc; | *qch++ = uc; |
} | } |
|
|
uc = ch & 0x1f; | uc = ch & 0x1f; |
need = 1; | need = 1; |
error = i; | error = i; |
|
min_uc = 0x80; |
} else if ((ch & 0xf0) == 0xe0) { | } else if ((ch & 0xf0) == 0xe0) { |
uc = ch & 0x0f; | uc = ch & 0x0f; |
need = 2; | need = 2; |
error = i; | error = i; |
|
min_uc = 0x800; |
} else if ((ch&0xf8) == 0xf0) { | } else if ((ch&0xf8) == 0xf0) { |
uc = ch & 0x07; | uc = ch & 0x07; |
need = 3; | need = 3; |
error = i; | error = i; |
|
min_uc = 0x10000; |
} else { | } else { |
// Error | // Error |
qch = addOne(qch, result); | qch = addOne(qch, result); |