--- src/codecs/qutfcodec.cpp +++ src/codecs/qutfcodec.cpp @@ -154,6 +154,7 @@ class QUtf8Decoder : public QTextDecoder { uint uc; + uint min_uc; int need; bool headerDone; public: @@ -167,8 +168,9 @@ result.setLength( len ); // worst case QChar *qch = (QChar *)result.unicode(); uchar ch; + int error = -1; for (int i=0; i= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) { + *qch++ = QChar::replacement; } else { if (headerDone || QChar(uc) != QChar::byteOrderMark) *qch++ = uc; @@ -190,6 +194,7 @@ } } else { // error + i = error; *qch++ = QChar::replacement; need = 0; } @@ -200,12 +205,21 @@ } else if ((ch & 0xe0) == 0xc0) { uc = ch & 0x1f; need = 1; + error = i; + min_uc = 0x80; } else if ((ch & 0xf0) == 0xe0) { uc = ch & 0x0f; need = 2; + error = i; + min_uc = 0x800; } else if ((ch&0xf8) == 0xf0) { uc = ch & 0x07; need = 3; + error = i; + min_uc = 0x10000; + } else { + // error + *qch++ = QChar::replacement; } } } --- src/tools/qstring.cpp +++ src/tools/qstring.cpp @@ -5805,6 +5805,7 @@ result.setLength( len ); // worst case QChar *qch = (QChar *)result.unicode(); uint uc = 0; + uint min_uc = 0; int need = 0; int error = -1; uchar ch; @@ -5822,6 +5823,12 @@ unsigned short low = uc%0x400 + 0xdc00; *qch++ = QChar(high); *qch++ = QChar(low); + } else if (uc < min_uc || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) { + // overlong seqence, UTF16 surrogate or BOM + i = error; + qch = addOne(qch, result); + *qch++ = QChar(0xdbff); + *qch++ = QChar(0xde00+((uchar)utf8[i])); } else { *qch++ = uc; } @@ -5844,14 +5851,17 @@ uc = ch & 0x1f; need = 1; error = i; + min_uc = 0x80; } else if ((ch & 0xf0) == 0xe0) { uc = ch & 0x0f; need = 2; error = i; + min_uc = 0x800; } else if ((ch&0xf8) == 0xf0) { uc = ch & 0x07; need = 3; error = i; + min_uc = 0x10000; } else { // Error qch = addOne(qch, result);