--- file_not_specified_in_diff +++ file_not_specified_in_diff @@ -, +, @@ --- src/corelib/codecs/qutfcodec.cpp +++ src/corelib/codecs/qutfcodec.cpp @@ -126,15 +126,19 @@ bool headerdone = false; QChar replacement = QChar::ReplacementCharacter; int need = 0; + int error = -1; uint uc = 0; + uint min_uc = 0; if (state) { if (state->flags & IgnoreHeader) headerdone = true; if (state->flags & ConvertInvalidToNull) replacement = QChar::Null; need = state->remainingChars; - if (need) + if (need) { uc = state->state_data[0]; + min_uc = state->state_data[1]; + } } if (!headerdone && len > 3 && (uchar)chars[0] == 0xef && (uchar)chars[1] == 0xbb && (uchar)chars[2] == 0xbf) { @@ -144,14 +148,15 @@ headerdone = true; } + int originalLength = target->length(); QString &result = *target; - result.resize(len); // worst case - QChar *qch = result.data(); + result.resize(originalLength + len); // worst case + QChar *qch = result.data() + originalLength; uchar ch; int invalid = 0; for (int i=0; i= result.length()) { + result.resize(where + 2); + qch = result.data() + where; + } + *qch++ = QChar(high); *qch++ = QChar(low); + } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) { + // error + *qch++ = QChar::ReplacementCharacter; + ++invalid; } else { *qch++ = uc; } } } else { // error + i = error; *qch++ = QChar::ReplacementCharacter; ++invalid; need = 0; @@ -180,15 +198,32 @@ } else if ((ch & 0xe0) == 0xc0) { uc = ch & 0x1f; need = 1; + error = i; + min_uc = 0x80; } else if ((ch & 0xf0) == 0xe0) { uc = ch & 0x0f; need = 2; + error = i; + min_uc = 0x800; } else if ((ch&0xf8) == 0xf0) { uc = ch & 0x07; need = 3; + error = i; + min_uc = 0x10000; + } else { + // error + *qch++ = QChar::ReplacementCharacter; + ++invalid; } } } + if (!state && need > 0) { + // unterminated UTF sequence + for (int i = error; i < len; ++i) { + *qch++ = QChar::ReplacementCharacter; + ++invalid; + } + } result.truncate(qch - result.unicode()); if (state) { state->invalidChars += invalid; @@ -196,6 +231,7 @@ if (headerdone) state->flags |= IgnoreHeader; state->state_data[0] = need ? uc : 0; + state->state_data[1] = need ? min_uc : 0; } } --- src/corelib/tools/qstring.cpp +++ src/corelib/tools/qstring.cpp @@ -3416,6 +3416,7 @@ result.resize(size); // worst case ushort *qch = result.d->data; uint uc = 0; + uint min_uc = 0; int need = 0; int error = -1; uchar ch; @@ -3430,6 +3431,12 @@ // surrogate pair *qch++ = QChar::highSurrogate(uc); uc = QChar::lowSurrogate(uc); + } else if ((uc < min_uc) || (uc >= 0xd800 && uc <= 0xdfff) || (uc >= 0xfffe)) { + // overlong seqence, UTF16 surrogate or BOM + i = error; + qch = addOne(qch, result); + *qch++ = 0xdbff; + uc = 0xde00 + ((uchar)str[i]); } *qch++ = uc; } @@ -3451,14 +3458,17 @@ uc = ch & 0x1f; need = 1; error = i; + min_uc = 0x80; } else if ((ch & 0xf0) == 0xe0) { uc = ch & 0x0f; need = 2; error = i; + min_uc = 0x800; } else if ((ch&0xf8) == 0xf0) { uc = ch & 0x07; need = 3; error = i; + min_uc = 0x10000; } else { // Error qch = addOne(qch, result); @@ -6348,6 +6358,7 @@ /*! \fn QString &QString::inline_append(QChar ch) + \internal An inlined version of append(). */