diff -ru mandoc-1.14.6.orig/configure mandoc-1.14.6/configure --- mandoc-1.14.6.orig/configure 2021-09-23 11:03:23.000000000 -0700 +++ mandoc-1.14.6/configure 2022-06-25 02:04:14.250972763 -0700 @@ -430,7 +430,7 @@ [ "${FATAL}" -eq 0 ] || exit 1 # --- LDADD --- -LDADD="${LDADD} ${LD_NANOSLEEP} ${LD_RECVMSG} ${LD_OHASH} -lz" +LDADD="${LDADD} ${LD_NANOSLEEP} ${LD_RECVMSG} ${LD_OHASH} -lbz2 -lz" echo "selected LDADD=\"${LDADD}\"" 1>&2 echo "selected LDADD=\"${LDADD}\"" 1>&3 echo 1>&3 diff -ru mandoc-1.14.6.orig/lib.in mandoc-1.14.6/lib.in --- mandoc-1.14.6.orig/lib.in 2021-09-23 11:03:23.000000000 -0700 +++ mandoc-1.14.6/lib.in 2022-06-25 01:32:29.163255485 -0700 @@ -33,6 +33,7 @@ LINE("libbluetooth", "Bluetooth Library (libbluetooth, \\-lbluetooth)") LINE("libbsdxml", "eXpat XML parser library (libbsdxml, \\-lbsdxml)") LINE("libbsm", "Basic Security Module Library (libbsm, \\-lbsm)") +LINE("libbz2", "Compression Library (libbz2, \\-lbzz)") LINE("libc", "Standard C\\~Library (libc, \\-lc)") LINE("libc_r", "Reentrant C\\~Library (libc_r, \\-lc_r)") LINE("libcalendar", "Calendar Arithmetic Library (libcalendar, \\-lcalendar)") diff -ru mandoc-1.14.6.orig/main.c mandoc-1.14.6/main.c --- mandoc-1.14.6.orig/main.c 2021-09-23 11:03:23.000000000 -0700 +++ mandoc-1.14.6/main.c 2022-06-25 02:16:14.881985507 -0700 @@ -538,7 +538,10 @@ prio += 10; /* Wrong dir name. */ if (search.sec != NULL) { ep = strchr(sec, '\0'); - if (ep - sec > 3 && + if (ep - sec > 4 && + strncmp(ep - 4, ".bz2", 4) == 0) + ep -= 4; + else if (ep - sec > 3 && strncmp(ep - 3, ".gz", 3) == 0) ep -= 3; if ((size_t)(ep - sec) < ssz + 3 || diff -ru mandoc-1.14.6.orig/mandocdb.c mandoc-1.14.6/mandocdb.c --- mandoc-1.14.6.orig/mandocdb.c 2021-09-23 11:03:23.000000000 -0700 +++ mandoc-1.14.6/mandocdb.c 2022-06-25 02:23:20.122402932 -0700 @@ -70,6 +70,12 @@ OP_TEST /* change no databases, report potential problems */ }; +enum compressor { + compressor_none, + compressor_bzip2, + compressor_gzip +}; + struct str { const struct mpage *mpage; /* if set, the owning parse */ uint64_t mask; /* bitmask in sequence */ @@ -102,7 +108,7 @@ char *fsec; /* section from file name suffix */ struct mlink *next; /* singly linked list */ struct mpage *mpage; /* parent */ - int gzip; /* filename has a .gz suffix */ + enum compressor compressor; /* filename has a compressor suffix */ enum form dform; /* format from directory */ enum form fform; /* format from file name suffix */ }; @@ -580,7 +586,7 @@ FTS *f; FTSENT *ff; struct mlink *mlink; - int gzip; + enum compressor compressor; enum form dform; char *dsec, *arch, *fsec, *cp; const char *path; @@ -645,13 +651,18 @@ say(path, "Extraneous file"); continue; } - gzip = 0; + compressor = 0; fsec = NULL; while (fsec == NULL) { fsec = strrchr(ff->fts_name, '.'); - if (fsec == NULL || strcmp(fsec+1, "gz")) + if (fsec == NULL) + break; + if (!strcmp(fsec+1, "bz2")) + compressor = compressor_bzip2; + else if (!strcmp(fsec+1, "gz")) + compressor = compressor_gzip; + else break; - gzip = 1; *fsec = '\0'; fsec = NULL; } @@ -697,7 +708,7 @@ mlink->arch = arch; mlink->name = ff->fts_name; mlink->fsec = fsec; - mlink->gzip = gzip; + mlink->compressor = compressor; mlink_add(mlink, ff->fts_statp); continue; @@ -1209,6 +1220,12 @@ mlink_dest = ohash_find(&mlinks, ohash_qlookup(&mlinks, meta->sodest)); if (mlink_dest == NULL) { + mandoc_asprintf(&cp, "%s.bz2", meta->sodest); + mlink_dest = ohash_find(&mlinks, + ohash_qlookup(&mlinks, cp)); + free(cp); + } + if (mlink_dest == NULL) { mandoc_asprintf(&cp, "%s.gz", meta->sodest); mlink_dest = ohash_find(&mlinks, ohash_qlookup(&mlinks, cp)); diff -ru mandoc-1.14.6.orig/mandoc.h mandoc-1.14.6/mandoc.h --- mandoc-1.14.6.orig/mandoc.h 2021-09-23 11:03:23.000000000 -0700 +++ mandoc-1.14.6/mandoc.h 2022-06-25 02:06:03.674822854 -0700 @@ -261,6 +261,7 @@ MANDOCERR_SYSERR, /* ===== start of system errors ===== */ + MANDOCERR_BZREADOPEN, MANDOCERR_DUP, MANDOCERR_EXEC, MANDOCERR_FDOPEN, diff -ru mandoc-1.14.6.orig/read.c mandoc-1.14.6/read.c --- mandoc-1.14.6.orig/read.c 2021-09-23 11:03:23.000000000 -0700 +++ mandoc-1.14.6/read.c 2022-06-25 04:05:55.535970114 -0700 @@ -38,6 +38,7 @@ #include #include #include +#include #include "mandoc_aux.h" #include "mandoc.h" @@ -51,6 +52,8 @@ #define REPARSE_LIMIT 1000 +enum compressor { compressor_none, compressor_gzip, compressor_bzip2 }; + struct mparse { struct roff *roff; /* roff parser (!NULL) */ struct roff_man *man; /* man parser */ @@ -59,7 +62,7 @@ struct buf *loop; /* open .while request line */ const char *os_s; /* default operating system */ int options; /* parser options */ - int gzip; /* current input file is gzipped */ + enum compressor compressor; /* current input file is compressed */ int filenc; /* encoding of the current file */ int reparse_count; /* finite interp. stack */ int line; /* line number in the file */ @@ -435,10 +438,11 @@ read_whole_file(struct mparse *curp, int fd, struct buf *fb, int *with_mmap) { struct stat st; + BZFILE *bz; gzFile gz; size_t off; ssize_t ssz; - int gzerrnum, retval; + int bzerrnum = 0, gzerrnum = 0, retval; if (fstat(fd, &st) == -1) { mandoc_msg(MANDOCERR_FSTAT, 0, 0, "%s", strerror(errno)); @@ -452,7 +456,7 @@ * concerned that this is going to tank any machines. */ - if (curp->gzip == 0 && S_ISREG(st.st_mode)) { + if (curp->compressor == 0 && S_ISREG(st.st_mode)) { if (st.st_size > 0x7fffffff) { mandoc_msg(MANDOCERR_TOOLARGE, 0, 0, NULL); return -1; @@ -464,7 +468,7 @@ return 0; } - if (curp->gzip) { + if (curp->compressor) { /* * Duplicating the file descriptor is required * because we will have to call gzclose(3) @@ -477,14 +481,37 @@ "%s", strerror(errno)); return -1; } + } + + bz = NULL; + gz = NULL; + switch (curp->compressor) { + case compressor_none: + break; + case compressor_bzip2: + { + FILE *fp = fdopen(fd, "rb"); + if (!fp) { + close(fd); + return -1; + } + if ((bz = BZ2_bzReadOpen(&bzerrnum, fp, 0, 0, NULL, 0)) == NULL) { + mandoc_msg(MANDOCERR_BZREADOPEN, 0, 0, + "%s", strerror(errno)); + close(fd); + return -1; + } + } + break; + case compressor_gzip: if ((gz = gzdopen(fd, "rb")) == NULL) { mandoc_msg(MANDOCERR_GZDOPEN, 0, 0, "%s", strerror(errno)); close(fd); return -1; } - } else - gz = NULL; + break; + } /* * If this isn't a regular file (like, say, stdin), then we must @@ -504,8 +531,8 @@ } resize_buf(fb, 65536); } - ssz = curp->gzip ? - gzread(gz, fb->buf + (int)off, fb->sz - off) : + ssz = bz ? BZ2_bzRead(&bzerrnum, bz, fb->buf + (int)off, fb->sz - off) : + gz ? gzread(gz, fb->buf + (int)off, fb->sz - off) : read(fd, fb->buf + (int)off, fb->sz - off); if (ssz == 0) { fb->sz = off; @@ -513,17 +540,22 @@ break; } if (ssz == -1) { - if (curp->gzip) + if (gz) (void)gzerror(gz, &gzerrnum); + // BZ2_bzerror is the only way to get an error string, + // yet it accepts a BZFILE* and writes to bzerrnum. mandoc_msg(MANDOCERR_READ, 0, 0, "%s", - curp->gzip && gzerrnum != Z_ERRNO ? - zError(gzerrnum) : strerror(errno)); + bz && bzerrnum < 0 ? BZ2_bzerror(bz, &bzerrnum) : + gz && gzerrnum != Z_ERRNO ? zError(gzerrnum) : + strerror(errno)); break; } off += (size_t)ssz; } - if (curp->gzip && (gzerrnum = gzclose(gz)) != Z_OK) + if (bz) + BZ2_bzclose(bz); + else if (gz && (gzerrnum = gzclose(gz)) != Z_OK) mandoc_msg(MANDOCERR_GZCLOSE, 0, 0, "%s", gzerrnum == Z_ERRNO ? strerror(errno) : zError(gzerrnum)); @@ -627,7 +659,12 @@ int fd, save_errno; cp = strrchr(file, '.'); - curp->gzip = (cp != NULL && ! strcmp(cp + 1, "gz")); + if (cp) { + if (!strcmp(cp+1, "gz")) + curp->compressor = compressor_gzip; + else if (!strcmp(cp+1, "bz2")) + curp->compressor = compressor_bzip2; + } /* First try to use the filename as it is. */ @@ -635,23 +672,36 @@ return fd; /* - * If that doesn't work and the filename doesn't - * already end in .gz, try appending .gz. + * If that doesn't work and the filename doesn't already + * end in a compressor extension, try them all. */ - if ( ! curp->gzip) { - save_errno = errno; - mandoc_asprintf(&cp, "%s.gz", file); - fd = open(cp, O_RDONLY); - free(cp); - errno = save_errno; - if (fd != -1) { - curp->gzip = 1; - return fd; - } + if (curp->compressor) { + /* Already ends in a compressor extension. */ + return -1; + } + + save_errno = errno; + mandoc_asprintf(&cp, "%s.bz2", file); + fd = open(cp, O_RDONLY); + free(cp); + errno = save_errno; + if (fd != -1) { + curp->compressor = compressor_bzip2; + return fd; + } + + save_errno = errno; + mandoc_asprintf(&cp, "%s.gz", file); + fd = open(cp, O_RDONLY); + free(cp); + errno = save_errno; + if (fd != -1) { + curp->compressor = compressor_gzip; + return fd; } - /* Neither worked, give up. */ + /* Nothing worked, give up. */ return -1; } @@ -692,7 +742,7 @@ roff_man_reset(curp->man); free_buf_list(curp->secondary); curp->secondary = NULL; - curp->gzip = 0; + curp->compressor = 0; tag_alloc(); }