Lines 38-43
Link Here
|
38 |
#include <string.h> |
38 |
#include <string.h> |
39 |
#include <unistd.h> |
39 |
#include <unistd.h> |
40 |
#include <zlib.h> |
40 |
#include <zlib.h> |
|
|
41 |
#include <bzlib.h> |
41 |
|
42 |
|
42 |
#include "mandoc_aux.h" |
43 |
#include "mandoc_aux.h" |
43 |
#include "mandoc.h" |
44 |
#include "mandoc.h" |
Lines 51-56
Link Here
|
51 |
|
52 |
|
52 |
#define REPARSE_LIMIT 1000 |
53 |
#define REPARSE_LIMIT 1000 |
53 |
|
54 |
|
|
|
55 |
enum compressor { compressor_none, compressor_gzip, compressor_bzip2 }; |
56 |
|
54 |
struct mparse { |
57 |
struct mparse { |
55 |
struct roff *roff; /* roff parser (!NULL) */ |
58 |
struct roff *roff; /* roff parser (!NULL) */ |
56 |
struct roff_man *man; /* man parser */ |
59 |
struct roff_man *man; /* man parser */ |
Lines 59-65
Link Here
|
59 |
struct buf *loop; /* open .while request line */ |
62 |
struct buf *loop; /* open .while request line */ |
60 |
const char *os_s; /* default operating system */ |
63 |
const char *os_s; /* default operating system */ |
61 |
int options; /* parser options */ |
64 |
int options; /* parser options */ |
62 |
int gzip; /* current input file is gzipped */ |
65 |
enum compressor compressor; /* current input file is compressed */ |
63 |
int filenc; /* encoding of the current file */ |
66 |
int filenc; /* encoding of the current file */ |
64 |
int reparse_count; /* finite interp. stack */ |
67 |
int reparse_count; /* finite interp. stack */ |
65 |
int line; /* line number in the file */ |
68 |
int line; /* line number in the file */ |
Lines 435-444
Link Here
|
435 |
read_whole_file(struct mparse *curp, int fd, struct buf *fb, int *with_mmap) |
438 |
read_whole_file(struct mparse *curp, int fd, struct buf *fb, int *with_mmap) |
436 |
{ |
439 |
{ |
437 |
struct stat st; |
440 |
struct stat st; |
|
|
441 |
BZFILE *bz; |
438 |
gzFile gz; |
442 |
gzFile gz; |
439 |
size_t off; |
443 |
size_t off; |
440 |
ssize_t ssz; |
444 |
ssize_t ssz; |
441 |
int gzerrnum, retval; |
445 |
int bzerrnum = 0, gzerrnum = 0, retval; |
442 |
|
446 |
|
443 |
if (fstat(fd, &st) == -1) { |
447 |
if (fstat(fd, &st) == -1) { |
444 |
mandoc_msg(MANDOCERR_FSTAT, 0, 0, "%s", strerror(errno)); |
448 |
mandoc_msg(MANDOCERR_FSTAT, 0, 0, "%s", strerror(errno)); |
Lines 452-458
Link Here
|
452 |
* concerned that this is going to tank any machines. |
456 |
* concerned that this is going to tank any machines. |
453 |
*/ |
457 |
*/ |
454 |
|
458 |
|
455 |
if (curp->gzip == 0 && S_ISREG(st.st_mode)) { |
459 |
if (curp->compressor == 0 && S_ISREG(st.st_mode)) { |
456 |
if (st.st_size > 0x7fffffff) { |
460 |
if (st.st_size > 0x7fffffff) { |
457 |
mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL); |
461 |
mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL); |
458 |
return -1; |
462 |
return -1; |
Lines 464-470
Link Here
|
464 |
return 0; |
468 |
return 0; |
465 |
} |
469 |
} |
466 |
|
470 |
|
467 |
if (curp->gzip) { |
471 |
if (curp->compressor) { |
468 |
/* |
472 |
/* |
469 |
* Duplicating the file descriptor is required |
473 |
* Duplicating the file descriptor is required |
470 |
* because we will have to call gzclose(3) |
474 |
* because we will have to call gzclose(3) |
Lines 477-490
Link Here
|
477 |
"%s", strerror(errno)); |
481 |
"%s", strerror(errno)); |
478 |
return -1; |
482 |
return -1; |
479 |
} |
483 |
} |
|
|
484 |
} |
485 |
|
486 |
bz = NULL; |
487 |
gz = NULL; |
488 |
switch (curp->compressor) { |
489 |
case compressor_none: |
490 |
break; |
491 |
case compressor_bzip2: |
492 |
{ |
493 |
FILE *fp = fdopen(fd, "rb"); |
494 |
if (!fp) { |
495 |
close(fd); |
496 |
return -1; |
497 |
} |
498 |
if ((bz = BZ2_bzReadOpen(&bzerrnum, fp, 0, 0, NULL, 0)) == NULL) { |
499 |
mandoc_msg(MANDOCERR_BZREADOPEN, 0, 0, |
500 |
"%s", strerror(errno)); |
501 |
close(fd); |
502 |
return -1; |
503 |
} |
504 |
} |
505 |
break; |
506 |
case compressor_gzip: |
480 |
if ((gz = gzdopen(fd, "rb")) == NULL) { |
507 |
if ((gz = gzdopen(fd, "rb")) == NULL) { |
481 |
mandoc_msg(MANDOCERR_GZDOPEN, 0, 0, |
508 |
mandoc_msg(MANDOCERR_GZDOPEN, 0, 0, |
482 |
"%s", strerror(errno)); |
509 |
"%s", strerror(errno)); |
483 |
close(fd); |
510 |
close(fd); |
484 |
return -1; |
511 |
return -1; |
485 |
} |
512 |
} |
486 |
} else |
513 |
break; |
487 |
gz = NULL; |
514 |
} |
488 |
|
515 |
|
489 |
/* |
516 |
/* |
490 |
* If this isn't a regular file (like, say, stdin), then we must |
517 |
* If this isn't a regular file (like, say, stdin), then we must |
Lines 504-511
Link Here
|
504 |
} |
531 |
} |
505 |
resize_buf(fb, 65536); |
532 |
resize_buf(fb, 65536); |
506 |
} |
533 |
} |
507 |
ssz = curp->gzip ? |
534 |
ssz = bz ? BZ2_bzRead(&bzerrnum, bz, fb->buf + (int)off, fb->sz - off) : |
508 |
gzread(gz, fb->buf + (int)off, fb->sz - off) : |
535 |
gz ? gzread(gz, fb->buf + (int)off, fb->sz - off) : |
509 |
read(fd, fb->buf + (int)off, fb->sz - off); |
536 |
read(fd, fb->buf + (int)off, fb->sz - off); |
510 |
if (ssz == 0) { |
537 |
if (ssz == 0) { |
511 |
fb->sz = off; |
538 |
fb->sz = off; |
Lines 513-529
Link Here
|
513 |
break; |
540 |
break; |
514 |
} |
541 |
} |
515 |
if (ssz == -1) { |
542 |
if (ssz == -1) { |
516 |
if (curp->gzip) |
543 |
if (gz) |
517 |
(void)gzerror(gz, &gzerrnum); |
544 |
// BZ2_bzerror is the only way to get an error string, |
|
|
545 |
// yet it accepts a BZFILE* and writes to bzerrnum. |
518 |
mandoc_msg(MANDOCERR_READ, 0, 0, "%s", |
546 |
mandoc_msg(MANDOCERR_READ, 0, 0, "%s", |
519 |
curp->gzip && gzerrnum != Z_ERRNO ? |
547 |
bz && bzerrnum < 0 ? BZ2_bzerror(bz, &bzerrnum) : |
520 |
zError(gzerrnum) : strerror(errno)); |
548 |
gz && gzerrnum != Z_ERRNO ? zError(gzerrnum) : |
|
|
549 |
strerror(errno)); |
521 |
break; |
550 |
break; |
522 |
} |
551 |
} |
523 |
off += (size_t)ssz; |
552 |
off += (size_t)ssz; |
524 |
} |
553 |
} |
525 |
|
554 |
|
526 |
if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK) |
555 |
if (bz) |
|
|
556 |
BZ2_bzclose(bz); |
557 |
else if (gz && (gzerrnum = gzclose(gz)) != Z_OK) |
527 |
mandoc_msg(MANDOCERR_GZCLOSE, 0, 0, "%s", |
558 |
mandoc_msg(MANDOCERR_GZCLOSE, 0, 0, "%s", |
528 |
gzerrnum == Z_ERRNO ? strerror(errno) : |
559 |
gzerrnum == Z_ERRNO ? strerror(errno) : |
529 |
zError(gzerrnum)); |
560 |
zError(gzerrnum)); |
Lines 627-633
Link Here
|
627 |
int fd, save_errno; |
658 |
int fd, save_errno; |
628 |
|
659 |
|
629 |
cp = strrchr(file, '.'); |
660 |
cp = strrchr(file, '.'); |
630 |
curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz")); |
661 |
if (cp) { |
|
|
662 |
if (!strcmp(cp+1, "gz")) |
663 |
curp->compressor = compressor_gzip; |
664 |
else if (!strcmp(cp+1, "bz2")) |
665 |
curp->compressor = compressor_bzip2; |
666 |
} |
631 |
|
667 |
|
632 |
/* First try to use the filename as it is. */ |
668 |
/* First try to use the filename as it is. */ |
633 |
|
669 |
|
Lines 635-657
Link Here
|
635 |
return fd; |
671 |
return fd; |
636 |
|
672 |
|
637 |
/* |
673 |
/* |
638 |
* If that doesn't work and the filename doesn't |
674 |
* If that doesn't work and the filename doesn't already |
639 |
* already end in .gz, try appending .gz. |
675 |
* end in a compressor extension, try them all. |
640 |
*/ |
676 |
*/ |
641 |
|
677 |
|
642 |
if ( ! curp->gzip) { |
678 |
if (curp->compressor) { |
643 |
save_errno = errno; |
679 |
/* Already ends in a compressor extension. */ |
644 |
mandoc_asprintf(&cp, "%s.gz", file); |
680 |
return -1; |
645 |
fd = open(cp, O_RDONLY); |
681 |
} |
646 |
free(cp); |
682 |
|
647 |
errno = save_errno; |
683 |
save_errno = errno; |
648 |
if (fd != -1) { |
684 |
mandoc_asprintf(&cp, "%s.bz2", file); |
649 |
curp->gzip = 1; |
685 |
fd = open(cp, O_RDONLY); |
650 |
return fd; |
686 |
free(cp); |
651 |
} |
687 |
errno = save_errno; |
|
|
688 |
if (fd != -1) { |
689 |
curp->compressor = compressor_bzip2; |
690 |
return fd; |
691 |
} |
692 |
|
693 |
save_errno = errno; |
694 |
mandoc_asprintf(&cp, "%s.gz", file); |
695 |
fd = open(cp, O_RDONLY); |
696 |
free(cp); |
697 |
errno = save_errno; |
698 |
if (fd != -1) { |
699 |
curp->compressor = compressor_gzip; |
700 |
return fd; |
652 |
} |
701 |
} |
653 |
|
702 |
|
654 |
/* Neither worked, give up. */ |
703 |
/* Nothing worked, give up. */ |
655 |
|
704 |
|
656 |
return -1; |
705 |
return -1; |
657 |
} |
706 |
} |
Lines 692-698
Link Here
|
692 |
roff_man_reset(curp->man); |
741 |
roff_man_reset(curp->man); |
693 |
free_buf_list(curp->secondary); |
742 |
free_buf_list(curp->secondary); |
694 |
curp->secondary = NULL; |
743 |
curp->secondary = NULL; |
695 |
curp->gzip = 0; |
744 |
curp->compressor = 0; |
696 |
tag_alloc(); |
745 |
tag_alloc(); |
697 |
} |
746 |
} |
698 |
|
747 |
|