Lines 318-356
enum { /* UTF8_cvalN is value of masked first byte of N byte sequence */
Link Here
|
318 |
UTF8_cval4 = 0xf0 |
318 |
UTF8_cval4 = 0xf0 |
319 |
}; |
319 |
}; |
320 |
|
320 |
|
321 |
static void PTRCALL |
321 |
static enum XML_Convert_Result PTRCALL |
322 |
utf8_toUtf8(const ENCODING *enc, |
322 |
utf8_toUtf8(const ENCODING *enc, |
323 |
const char **fromP, const char *fromLim, |
323 |
const char **fromP, const char *fromLim, |
324 |
char **toP, const char *toLim) |
324 |
char **toP, const char *toLim) |
325 |
{ |
325 |
{ |
|
|
326 |
enum XML_Convert_Result res = XML_CONVERT_COMPLETED; |
326 |
char *to; |
327 |
char *to; |
327 |
const char *from; |
328 |
const char *from; |
328 |
if (fromLim - *fromP > toLim - *toP) { |
329 |
if (fromLim - *fromP > toLim - *toP) { |
329 |
/* Avoid copying partial characters. */ |
330 |
/* Avoid copying partial characters. */ |
|
|
331 |
res = XML_CONVERT_OUTPUT_EXHAUSTED; |
330 |
for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) |
332 |
for (fromLim = *fromP + (toLim - *toP); fromLim > *fromP; fromLim--) |
331 |
if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) |
333 |
if (((unsigned char)fromLim[-1] & 0xc0) != 0x80) |
332 |
break; |
334 |
break; |
333 |
} |
335 |
} |
334 |
for (to = *toP, from = *fromP; from != fromLim; from++, to++) |
336 |
for (to = *toP, from = *fromP; (from < fromLim) && (to < toLim); from++, to++) |
335 |
*to = *from; |
337 |
*to = *from; |
336 |
*fromP = from; |
338 |
*fromP = from; |
337 |
*toP = to; |
339 |
*toP = to; |
|
|
340 |
|
341 |
if ((to == toLim) && (from < fromLim)) |
342 |
return XML_CONVERT_OUTPUT_EXHAUSTED; |
343 |
else |
344 |
return res; |
338 |
} |
345 |
} |
339 |
|
346 |
|
340 |
static void PTRCALL |
347 |
static enum XML_Convert_Result PTRCALL |
341 |
utf8_toUtf16(const ENCODING *enc, |
348 |
utf8_toUtf16(const ENCODING *enc, |
342 |
const char **fromP, const char *fromLim, |
349 |
const char **fromP, const char *fromLim, |
343 |
unsigned short **toP, const unsigned short *toLim) |
350 |
unsigned short **toP, const unsigned short *toLim) |
344 |
{ |
351 |
{ |
|
|
352 |
enum XML_Convert_Result res = XML_CONVERT_COMPLETED; |
345 |
unsigned short *to = *toP; |
353 |
unsigned short *to = *toP; |
346 |
const char *from = *fromP; |
354 |
const char *from = *fromP; |
347 |
while (from != fromLim && to != toLim) { |
355 |
while (from < fromLim && to < toLim) { |
348 |
switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { |
356 |
switch (((struct normal_encoding *)enc)->type[(unsigned char)*from]) { |
349 |
case BT_LEAD2: |
357 |
case BT_LEAD2: |
|
|
358 |
if (fromLim - from < 2) { |
359 |
res = XML_CONVERT_INPUT_INCOMPLETE; |
360 |
break; |
361 |
} |
350 |
*to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); |
362 |
*to++ = (unsigned short)(((from[0] & 0x1f) << 6) | (from[1] & 0x3f)); |
351 |
from += 2; |
363 |
from += 2; |
352 |
break; |
364 |
break; |
353 |
case BT_LEAD3: |
365 |
case BT_LEAD3: |
|
|
366 |
if (fromLim - from < 3) { |
367 |
res = XML_CONVERT_INPUT_INCOMPLETE; |
368 |
break; |
369 |
} |
354 |
*to++ = (unsigned short)(((from[0] & 0xf) << 12) |
370 |
*to++ = (unsigned short)(((from[0] & 0xf) << 12) |
355 |
| ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); |
371 |
| ((from[1] & 0x3f) << 6) | (from[2] & 0x3f)); |
356 |
from += 3; |
372 |
from += 3; |
Lines 358-365
utf8_toUtf16(const ENCODING *enc,
Link Here
|
358 |
case BT_LEAD4: |
374 |
case BT_LEAD4: |
359 |
{ |
375 |
{ |
360 |
unsigned long n; |
376 |
unsigned long n; |
361 |
if (to + 1 == toLim) |
377 |
if (toLim - to < 2) { |
|
|
378 |
res = XML_CONVERT_OUTPUT_EXHAUSTED; |
362 |
goto after; |
379 |
goto after; |
|
|
380 |
} |
381 |
if (fromLim - from < 4) { |
382 |
res = XML_CONVERT_INPUT_INCOMPLETE; |
383 |
goto after; |
384 |
} |
363 |
n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) |
385 |
n = ((from[0] & 0x7) << 18) | ((from[1] & 0x3f) << 12) |
364 |
| ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); |
386 |
| ((from[2] & 0x3f) << 6) | (from[3] & 0x3f); |
365 |
n -= 0x10000; |
387 |
n -= 0x10000; |
Lines 377-382
utf8_toUtf16(const ENCODING *enc,
Link Here
|
377 |
after: |
399 |
after: |
378 |
*fromP = from; |
400 |
*fromP = from; |
379 |
*toP = to; |
401 |
*toP = to; |
|
|
402 |
return res; |
380 |
} |
403 |
} |
381 |
|
404 |
|
382 |
#ifdef XML_NS |
405 |
#ifdef XML_NS |
Lines 425-431
static const struct normal_encoding internal_utf8_encoding = {
Link Here
|
425 |
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) |
448 |
STANDARD_VTABLE(sb_) NORMAL_VTABLE(utf8_) |
426 |
}; |
449 |
}; |
427 |
|
450 |
|
428 |
static void PTRCALL |
451 |
static enum XML_Convert_Result PTRCALL |
429 |
latin1_toUtf8(const ENCODING *enc, |
452 |
latin1_toUtf8(const ENCODING *enc, |
430 |
const char **fromP, const char *fromLim, |
453 |
const char **fromP, const char *fromLim, |
431 |
char **toP, const char *toLim) |
454 |
char **toP, const char *toLim) |
Lines 433-462
latin1_toUtf8(const ENCODING *enc,
Link Here
|
433 |
for (;;) { |
456 |
for (;;) { |
434 |
unsigned char c; |
457 |
unsigned char c; |
435 |
if (*fromP == fromLim) |
458 |
if (*fromP == fromLim) |
436 |
break; |
459 |
return XML_CONVERT_COMPLETED; |
437 |
c = (unsigned char)**fromP; |
460 |
c = (unsigned char)**fromP; |
438 |
if (c & 0x80) { |
461 |
if (c & 0x80) { |
439 |
if (toLim - *toP < 2) |
462 |
if (toLim - *toP < 2) |
440 |
break; |
463 |
return XML_CONVERT_OUTPUT_EXHAUSTED; |
441 |
*(*toP)++ = (char)((c >> 6) | UTF8_cval2); |
464 |
*(*toP)++ = (char)((c >> 6) | UTF8_cval2); |
442 |
*(*toP)++ = (char)((c & 0x3f) | 0x80); |
465 |
*(*toP)++ = (char)((c & 0x3f) | 0x80); |
443 |
(*fromP)++; |
466 |
(*fromP)++; |
444 |
} |
467 |
} |
445 |
else { |
468 |
else { |
446 |
if (*toP == toLim) |
469 |
if (*toP == toLim) |
447 |
break; |
470 |
return XML_CONVERT_OUTPUT_EXHAUSTED; |
448 |
*(*toP)++ = *(*fromP)++; |
471 |
*(*toP)++ = *(*fromP)++; |
449 |
} |
472 |
} |
450 |
} |
473 |
} |
451 |
} |
474 |
} |
452 |
|
475 |
|
453 |
static void PTRCALL |
476 |
static enum XML_Convert_Result PTRCALL |
454 |
latin1_toUtf16(const ENCODING *enc, |
477 |
latin1_toUtf16(const ENCODING *enc, |
455 |
const char **fromP, const char *fromLim, |
478 |
const char **fromP, const char *fromLim, |
456 |
unsigned short **toP, const unsigned short *toLim) |
479 |
unsigned short **toP, const unsigned short *toLim) |
457 |
{ |
480 |
{ |
458 |
while (*fromP != fromLim && *toP != toLim) |
481 |
while (*fromP < fromLim && *toP < toLim) |
459 |
*(*toP)++ = (unsigned char)*(*fromP)++; |
482 |
*(*toP)++ = (unsigned char)*(*fromP)++; |
|
|
483 |
|
484 |
if ((*toP == toLim) && (*fromP < fromLim)) |
485 |
return XML_CONVERT_OUTPUT_EXHAUSTED; |
486 |
else |
487 |
return XML_CONVERT_COMPLETED; |
460 |
} |
488 |
} |
461 |
|
489 |
|
462 |
#ifdef XML_NS |
490 |
#ifdef XML_NS |
Lines 483-495
static const struct normal_encoding latin1_encoding = {
Link Here
|
483 |
STANDARD_VTABLE(sb_) |
511 |
STANDARD_VTABLE(sb_) |
484 |
}; |
512 |
}; |
485 |
|
513 |
|
486 |
static void PTRCALL |
514 |
static enum XML_Convert_Result PTRCALL |
487 |
ascii_toUtf8(const ENCODING *enc, |
515 |
ascii_toUtf8(const ENCODING *enc, |
488 |
const char **fromP, const char *fromLim, |
516 |
const char **fromP, const char *fromLim, |
489 |
char **toP, const char *toLim) |
517 |
char **toP, const char *toLim) |
490 |
{ |
518 |
{ |
491 |
while (*fromP != fromLim && *toP != toLim) |
519 |
while (*fromP < fromLim && *toP < toLim) |
492 |
*(*toP)++ = *(*fromP)++; |
520 |
*(*toP)++ = *(*fromP)++; |
|
|
521 |
|
522 |
if ((*toP == toLim) && (*fromP < fromLim)) |
523 |
return XML_CONVERT_OUTPUT_EXHAUSTED; |
524 |
else |
525 |
return XML_CONVERT_COMPLETED; |
493 |
} |
526 |
} |
494 |
|
527 |
|
495 |
#ifdef XML_NS |
528 |
#ifdef XML_NS |
Lines 536-548
unicode_byte_type(char hi, char lo)
Link Here
|
536 |
} |
569 |
} |
537 |
|
570 |
|
538 |
#define DEFINE_UTF16_TO_UTF8(E) \ |
571 |
#define DEFINE_UTF16_TO_UTF8(E) \ |
539 |
static void PTRCALL \ |
572 |
static enum XML_Convert_Result PTRCALL \ |
540 |
E ## toUtf8(const ENCODING *enc, \ |
573 |
E ## toUtf8(const ENCODING *enc, \ |
541 |
const char **fromP, const char *fromLim, \ |
574 |
const char **fromP, const char *fromLim, \ |
542 |
char **toP, const char *toLim) \ |
575 |
char **toP, const char *toLim) \ |
543 |
{ \ |
576 |
{ \ |
544 |
const char *from; \ |
577 |
const char *from = *fromP; \ |
545 |
for (from = *fromP; from != fromLim; from += 2) { \ |
578 |
fromLim = from + (((fromLim - from) >> 1) << 1); /* shrink to even */ \ |
|
|
579 |
for (; from < fromLim; from += 2) { \ |
546 |
int plane; \ |
580 |
int plane; \ |
547 |
unsigned char lo2; \ |
581 |
unsigned char lo2; \ |
548 |
unsigned char lo = GET_LO(from); \ |
582 |
unsigned char lo = GET_LO(from); \ |
Lines 552-558
E ## toUtf8(const ENCODING *enc, \
Link Here
|
552 |
if (lo < 0x80) { \ |
586 |
if (lo < 0x80) { \ |
553 |
if (*toP == toLim) { \ |
587 |
if (*toP == toLim) { \ |
554 |
*fromP = from; \ |
588 |
*fromP = from; \ |
555 |
return; \ |
589 |
return XML_CONVERT_OUTPUT_EXHAUSTED; \ |
556 |
} \ |
590 |
} \ |
557 |
*(*toP)++ = lo; \ |
591 |
*(*toP)++ = lo; \ |
558 |
break; \ |
592 |
break; \ |
Lines 562-568
E ## toUtf8(const ENCODING *enc, \
Link Here
|
562 |
case 0x4: case 0x5: case 0x6: case 0x7: \ |
596 |
case 0x4: case 0x5: case 0x6: case 0x7: \ |
563 |
if (toLim - *toP < 2) { \ |
597 |
if (toLim - *toP < 2) { \ |
564 |
*fromP = from; \ |
598 |
*fromP = from; \ |
565 |
return; \ |
599 |
return XML_CONVERT_OUTPUT_EXHAUSTED; \ |
566 |
} \ |
600 |
} \ |
567 |
*(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ |
601 |
*(*toP)++ = ((lo >> 6) | (hi << 2) | UTF8_cval2); \ |
568 |
*(*toP)++ = ((lo & 0x3f) | 0x80); \ |
602 |
*(*toP)++ = ((lo & 0x3f) | 0x80); \ |
Lines 570-576
E ## toUtf8(const ENCODING *enc, \
Link Here
|
570 |
default: \ |
604 |
default: \ |
571 |
if (toLim - *toP < 3) { \ |
605 |
if (toLim - *toP < 3) { \ |
572 |
*fromP = from; \ |
606 |
*fromP = from; \ |
573 |
return; \ |
607 |
return XML_CONVERT_OUTPUT_EXHAUSTED; \ |
574 |
} \ |
608 |
} \ |
575 |
/* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ |
609 |
/* 16 bits divided 4, 6, 6 amongst 3 bytes */ \ |
576 |
*(*toP)++ = ((hi >> 4) | UTF8_cval3); \ |
610 |
*(*toP)++ = ((hi >> 4) | UTF8_cval3); \ |
Lines 580-586
E ## toUtf8(const ENCODING *enc, \
Link Here
|
580 |
case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ |
614 |
case 0xD8: case 0xD9: case 0xDA: case 0xDB: \ |
581 |
if (toLim - *toP < 4) { \ |
615 |
if (toLim - *toP < 4) { \ |
582 |
*fromP = from; \ |
616 |
*fromP = from; \ |
583 |
return; \ |
617 |
return XML_CONVERT_OUTPUT_EXHAUSTED; \ |
|
|
618 |
} \ |
619 |
if (fromLim - from < 4) { \ |
620 |
*fromP = from; \ |
621 |
return XML_CONVERT_INPUT_INCOMPLETE; \ |
584 |
} \ |
622 |
} \ |
585 |
plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ |
623 |
plane = (((hi & 0x3) << 2) | ((lo >> 6) & 0x3)) + 1; \ |
586 |
*(*toP)++ = ((plane >> 2) | UTF8_cval4); \ |
624 |
*(*toP)++ = ((plane >> 2) | UTF8_cval4); \ |
Lines 596-615
E ## toUtf8(const ENCODING *enc, \
Link Here
|
596 |
} \ |
634 |
} \ |
597 |
} \ |
635 |
} \ |
598 |
*fromP = from; \ |
636 |
*fromP = from; \ |
|
|
637 |
if (from < fromLim) \ |
638 |
return XML_CONVERT_INPUT_INCOMPLETE; \ |
639 |
else \ |
640 |
return XML_CONVERT_COMPLETED; \ |
599 |
} |
641 |
} |
600 |
|
642 |
|
601 |
#define DEFINE_UTF16_TO_UTF16(E) \ |
643 |
#define DEFINE_UTF16_TO_UTF16(E) \ |
602 |
static void PTRCALL \ |
644 |
static enum XML_Convert_Result PTRCALL \ |
603 |
E ## toUtf16(const ENCODING *enc, \ |
645 |
E ## toUtf16(const ENCODING *enc, \ |
604 |
const char **fromP, const char *fromLim, \ |
646 |
const char **fromP, const char *fromLim, \ |
605 |
unsigned short **toP, const unsigned short *toLim) \ |
647 |
unsigned short **toP, const unsigned short *toLim) \ |
606 |
{ \ |
648 |
{ \ |
|
|
649 |
enum XML_Convert_Result res = XML_CONVERT_COMPLETED; \ |
650 |
fromLim = *fromP + (((fromLim - *fromP) >> 1) << 1); /* shrink to even */ \ |
607 |
/* Avoid copying first half only of surrogate */ \ |
651 |
/* Avoid copying first half only of surrogate */ \ |
608 |
if (fromLim - *fromP > ((toLim - *toP) << 1) \ |
652 |
if (fromLim - *fromP > ((toLim - *toP) << 1) \ |
609 |
&& (GET_HI(fromLim - 2) & 0xF8) == 0xD8) \ |
653 |
&& (GET_HI(fromLim - 2) & 0xF8) == 0xD8) { \ |
610 |
fromLim -= 2; \ |
654 |
fromLim -= 2; \ |
611 |
for (; *fromP != fromLim && *toP != toLim; *fromP += 2) \ |
655 |
res = XML_CONVERT_INPUT_INCOMPLETE; \ |
|
|
656 |
} \ |
657 |
for (; *fromP < fromLim && *toP < toLim; *fromP += 2) \ |
612 |
*(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ |
658 |
*(*toP)++ = (GET_HI(*fromP) << 8) | GET_LO(*fromP); \ |
|
|
659 |
if ((*toP == toLim) && (*fromP < fromLim)) \ |
660 |
return XML_CONVERT_OUTPUT_EXHAUSTED; \ |
661 |
else \ |
662 |
return res; \ |
613 |
} |
663 |
} |
614 |
|
664 |
|
615 |
#define SET2(ptr, ch) \ |
665 |
#define SET2(ptr, ch) \ |
Lines 1288-1294
unknown_isInvalid(const ENCODING *enc, const char *p)
Link Here
|
1288 |
return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; |
1338 |
return (c & ~0xFFFF) || checkCharRefNumber(c) < 0; |
1289 |
} |
1339 |
} |
1290 |
|
1340 |
|
1291 |
static void PTRCALL |
1341 |
static enum XML_Convert_Result PTRCALL |
1292 |
unknown_toUtf8(const ENCODING *enc, |
1342 |
unknown_toUtf8(const ENCODING *enc, |
1293 |
const char **fromP, const char *fromLim, |
1343 |
const char **fromP, const char *fromLim, |
1294 |
char **toP, const char *toLim) |
1344 |
char **toP, const char *toLim) |
Lines 1299-1319
unknown_toUtf8(const ENCODING *enc,
Link Here
|
1299 |
const char *utf8; |
1349 |
const char *utf8; |
1300 |
int n; |
1350 |
int n; |
1301 |
if (*fromP == fromLim) |
1351 |
if (*fromP == fromLim) |
1302 |
break; |
1352 |
return XML_CONVERT_COMPLETED; |
1303 |
utf8 = uenc->utf8[(unsigned char)**fromP]; |
1353 |
utf8 = uenc->utf8[(unsigned char)**fromP]; |
1304 |
n = *utf8++; |
1354 |
n = *utf8++; |
1305 |
if (n == 0) { |
1355 |
if (n == 0) { |
1306 |
int c = uenc->convert(uenc->userData, *fromP); |
1356 |
int c = uenc->convert(uenc->userData, *fromP); |
1307 |
n = XmlUtf8Encode(c, buf); |
1357 |
n = XmlUtf8Encode(c, buf); |
1308 |
if (n > toLim - *toP) |
1358 |
if (n > toLim - *toP) |
1309 |
break; |
1359 |
return XML_CONVERT_OUTPUT_EXHAUSTED; |
1310 |
utf8 = buf; |
1360 |
utf8 = buf; |
1311 |
*fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] |
1361 |
*fromP += (AS_NORMAL_ENCODING(enc)->type[(unsigned char)**fromP] |
1312 |
- (BT_LEAD2 - 2)); |
1362 |
- (BT_LEAD2 - 2)); |
1313 |
} |
1363 |
} |
1314 |
else { |
1364 |
else { |
1315 |
if (n > toLim - *toP) |
1365 |
if (n > toLim - *toP) |
1316 |
break; |
1366 |
return XML_CONVERT_OUTPUT_EXHAUSTED; |
1317 |
(*fromP)++; |
1367 |
(*fromP)++; |
1318 |
} |
1368 |
} |
1319 |
do { |
1369 |
do { |
Lines 1322-1334
unknown_toUtf8(const ENCODING *enc,
Link Here
|
1322 |
} |
1372 |
} |
1323 |
} |
1373 |
} |
1324 |
|
1374 |
|
1325 |
static void PTRCALL |
1375 |
static enum XML_Convert_Result PTRCALL |
1326 |
unknown_toUtf16(const ENCODING *enc, |
1376 |
unknown_toUtf16(const ENCODING *enc, |
1327 |
const char **fromP, const char *fromLim, |
1377 |
const char **fromP, const char *fromLim, |
1328 |
unsigned short **toP, const unsigned short *toLim) |
1378 |
unsigned short **toP, const unsigned short *toLim) |
1329 |
{ |
1379 |
{ |
1330 |
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); |
1380 |
const struct unknown_encoding *uenc = AS_UNKNOWN_ENCODING(enc); |
1331 |
while (*fromP != fromLim && *toP != toLim) { |
1381 |
while (*fromP < fromLim && *toP < toLim) { |
1332 |
unsigned short c = uenc->utf16[(unsigned char)**fromP]; |
1382 |
unsigned short c = uenc->utf16[(unsigned char)**fromP]; |
1333 |
if (c == 0) { |
1383 |
if (c == 0) { |
1334 |
c = (unsigned short) |
1384 |
c = (unsigned short) |
Lines 1340-1345
unknown_toUtf16(const ENCODING *enc,
Link Here
|
1340 |
(*fromP)++; |
1390 |
(*fromP)++; |
1341 |
*(*toP)++ = c; |
1391 |
*(*toP)++ = c; |
1342 |
} |
1392 |
} |
|
|
1393 |
|
1394 |
if ((*toP == toLim) && (*fromP < fromLim)) |
1395 |
return XML_CONVERT_OUTPUT_EXHAUSTED; |
1396 |
else |
1397 |
return XML_CONVERT_COMPLETED; |
1343 |
} |
1398 |
} |
1344 |
|
1399 |
|
1345 |
ENCODING * |
1400 |
ENCODING * |
Lines 1503-1509
initScan(const ENCODING * const *encodingTable,
Link Here
|
1503 |
{ |
1558 |
{ |
1504 |
const ENCODING **encPtr; |
1559 |
const ENCODING **encPtr; |
1505 |
|
1560 |
|
1506 |
if (ptr == end) |
1561 |
if (ptr >= end) |
1507 |
return XML_TOK_NONE; |
1562 |
return XML_TOK_NONE; |
1508 |
encPtr = enc->encPtr; |
1563 |
encPtr = enc->encPtr; |
1509 |
if (ptr + 1 == end) { |
1564 |
if (ptr + 1 == end) { |