diff options
Diffstat (limited to 'zlib/examples/gzjoin.c')
-rw-r--r-- | zlib/examples/gzjoin.c | 448 |
1 files changed, 448 insertions, 0 deletions
diff --git a/zlib/examples/gzjoin.c b/zlib/examples/gzjoin.c new file mode 100644 index 000000000..129347ce3 --- /dev/null +++ b/zlib/examples/gzjoin.c @@ -0,0 +1,448 @@ +/* gzjoin -- command to join gzip files into one gzip file + + Copyright (C) 2004 Mark Adler, all rights reserved + version 1.0, 11 Dec 2004 + + This software is provided 'as-is', without any express or implied + warranty. In no event will the author be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Mark Adler madler@alumni.caltech.edu + */ + +/* + * Change history: + * + * 1.0 11 Dec 2004 - First version + * 1.1 12 Jun 2005 - Changed ssize_t to long for portability + */ + +/* + gzjoin takes one or more gzip files on the command line and writes out a + single gzip file that will uncompress to the concatenation of the + uncompressed data from the individual gzip files. gzjoin does this without + having to recompress any of the data and without having to calculate a new + crc32 for the concatenated uncompressed data. gzjoin does however have to + decompress all of the input data in order to find the bits in the compressed + data that need to be modified to concatenate the streams. + + gzjoin does not do an integrity check on the input gzip files other than + checking the gzip header and decompressing the compressed data. They are + otherwise assumed to be complete and correct. + + Each joint between gzip files removes at least 18 bytes of previous trailer + and subsequent header, and inserts an average of about three bytes to the + compressed data in order to connect the streams. The output gzip file + has a minimal ten-byte gzip header with no file name or modification time. + + This program was written to illustrate the use of the Z_BLOCK option of + inflate() and the crc32_combine() function. gzjoin will not compile with + versions of zlib earlier than 1.2.3. + */ + +#include <stdio.h> /* fputs(), fprintf(), fwrite(), putc() */ +#include <stdlib.h> /* exit(), malloc(), free() */ +#include <fcntl.h> /* open() */ +#include <unistd.h> /* close(), read(), lseek() */ +#include "zlib.h" + /* crc32(), crc32_combine(), inflateInit2(), inflate(), inflateEnd() */ + +#define local static + +/* exit with an error (return a value to allow use in an expression) */ +local int bail(char *why1, char *why2) +{ + fprintf(stderr, "gzjoin error: %s%s, output incomplete\n", why1, why2); + exit(1); + return 0; +} + +/* -- simple buffered file input with access to the buffer -- */ + +#define CHUNK 32768 /* must be a power of two and fit in unsigned */ + +/* bin buffered input file type */ +typedef struct { + char *name; /* name of file for error messages */ + int fd; /* file descriptor */ + unsigned left; /* bytes remaining at next */ + unsigned char *next; /* next byte to read */ + unsigned char *buf; /* allocated buffer of length CHUNK */ +} bin; + +/* close a buffered file and free allocated memory */ +local void bclose(bin *in) +{ + if (in != NULL) { + if (in->fd != -1) + close(in->fd); + if (in->buf != NULL) + free(in->buf); + free(in); + } +} + +/* open a buffered file for input, return a pointer to type bin, or NULL on + failure */ +local bin *bopen(char *name) +{ + bin *in; + + in = malloc(sizeof(bin)); + if (in == NULL) + return NULL; + in->buf = malloc(CHUNK); + in->fd = open(name, O_RDONLY, 0); + if (in->buf == NULL || in->fd == -1) { + bclose(in); + return NULL; + } + in->left = 0; + in->next = in->buf; + in->name = name; + return in; +} + +/* load buffer from file, return -1 on read error, 0 or 1 on success, with + 1 indicating that end-of-file was reached */ +local int bload(bin *in) +{ + long len; + + if (in == NULL) + return -1; + if (in->left != 0) + return 0; + in->next = in->buf; + do { + len = (long)read(in->fd, in->buf + in->left, CHUNK - in->left); + if (len < 0) + return -1; + in->left += (unsigned)len; + } while (len != 0 && in->left < CHUNK); + return len == 0 ? 1 : 0; +} + +/* get a byte from the file, bail if end of file */ +#define bget(in) (in->left ? 0 : bload(in), \ + in->left ? (in->left--, *(in->next)++) : \ + bail("unexpected end of file on ", in->name)) + +/* get a four-byte little-endian unsigned integer from file */ +local unsigned long bget4(bin *in) +{ + unsigned long val; + + val = bget(in); + val += (unsigned long)(bget(in)) << 8; + val += (unsigned long)(bget(in)) << 16; + val += (unsigned long)(bget(in)) << 24; + return val; +} + +/* skip bytes in file */ +local void bskip(bin *in, unsigned skip) +{ + /* check pointer */ + if (in == NULL) + return; + + /* easy case -- skip bytes in buffer */ + if (skip <= in->left) { + in->left -= skip; + in->next += skip; + return; + } + + /* skip what's in buffer, discard buffer contents */ + skip -= in->left; + in->left = 0; + + /* seek past multiples of CHUNK bytes */ + if (skip > CHUNK) { + unsigned left; + + left = skip & (CHUNK - 1); + if (left == 0) { + /* exact number of chunks: seek all the way minus one byte to check + for end-of-file with a read */ + lseek(in->fd, skip - 1, SEEK_CUR); + if (read(in->fd, in->buf, 1) != 1) + bail("unexpected end of file on ", in->name); + return; + } + + /* skip the integral chunks, update skip with remainder */ + lseek(in->fd, skip - left, SEEK_CUR); + skip = left; + } + + /* read more input and skip remainder */ + bload(in); + if (skip > in->left) + bail("unexpected end of file on ", in->name); + in->left -= skip; + in->next += skip; +} + +/* -- end of buffered input functions -- */ + +/* skip the gzip header from file in */ +local void gzhead(bin *in) +{ + int flags; + + /* verify gzip magic header and compression method */ + if (bget(in) != 0x1f || bget(in) != 0x8b || bget(in) != 8) + bail(in->name, " is not a valid gzip file"); + + /* get and verify flags */ + flags = bget(in); + if ((flags & 0xe0) != 0) + bail("unknown reserved bits set in ", in->name); + + /* skip modification time, extra flags, and os */ + bskip(in, 6); + + /* skip extra field if present */ + if (flags & 4) { + unsigned len; + + len = bget(in); + len += (unsigned)(bget(in)) << 8; + bskip(in, len); + } + + /* skip file name if present */ + if (flags & 8) + while (bget(in) != 0) + ; + + /* skip comment if present */ + if (flags & 16) + while (bget(in) != 0) + ; + + /* skip header crc if present */ + if (flags & 2) + bskip(in, 2); +} + +/* write a four-byte little-endian unsigned integer to out */ +local void put4(unsigned long val, FILE *out) +{ + putc(val & 0xff, out); + putc((val >> 8) & 0xff, out); + putc((val >> 16) & 0xff, out); + putc((val >> 24) & 0xff, out); +} + +/* Load up zlib stream from buffered input, bail if end of file */ +local void zpull(z_streamp strm, bin *in) +{ + if (in->left == 0) + bload(in); + if (in->left == 0) + bail("unexpected end of file on ", in->name); + strm->avail_in = in->left; + strm->next_in = in->next; +} + +/* Write header for gzip file to out and initialize trailer. */ +local void gzinit(unsigned long *crc, unsigned long *tot, FILE *out) +{ + fwrite("\x1f\x8b\x08\0\0\0\0\0\0\xff", 1, 10, out); + *crc = crc32(0L, Z_NULL, 0); + *tot = 0; +} + +/* Copy the compressed data from name, zeroing the last block bit of the last + block if clr is true, and adding empty blocks as needed to get to a byte + boundary. If clr is false, then the last block becomes the last block of + the output, and the gzip trailer is written. crc and tot maintains the + crc and length (modulo 2^32) of the output for the trailer. The resulting + gzip file is written to out. gzinit() must be called before the first call + of gzcopy() to write the gzip header and to initialize crc and tot. */ +local void gzcopy(char *name, int clr, unsigned long *crc, unsigned long *tot, + FILE *out) +{ + int ret; /* return value from zlib functions */ + int pos; /* where the "last block" bit is in byte */ + int last; /* true if processing the last block */ + bin *in; /* buffered input file */ + unsigned char *start; /* start of compressed data in buffer */ + unsigned char *junk; /* buffer for uncompressed data -- discarded */ + z_off_t len; /* length of uncompressed data (support > 4 GB) */ + z_stream strm; /* zlib inflate stream */ + + /* open gzip file and skip header */ + in = bopen(name); + if (in == NULL) + bail("could not open ", name); + gzhead(in); + + /* allocate buffer for uncompressed data and initialize raw inflate + stream */ + junk = malloc(CHUNK); + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + strm.avail_in = 0; + strm.next_in = Z_NULL; + ret = inflateInit2(&strm, -15); + if (junk == NULL || ret != Z_OK) + bail("out of memory", ""); + + /* inflate and copy compressed data, clear last-block bit if requested */ + len = 0; + zpull(&strm, in); + start = strm.next_in; + last = start[0] & 1; + if (last && clr) + start[0] &= ~1; + strm.avail_out = 0; + for (;;) { + /* if input used and output done, write used input and get more */ + if (strm.avail_in == 0 && strm.avail_out != 0) { + fwrite(start, 1, strm.next_in - start, out); + start = in->buf; + in->left = 0; + zpull(&strm, in); + } + + /* decompress -- return early when end-of-block reached */ + strm.avail_out = CHUNK; + strm.next_out = junk; + ret = inflate(&strm, Z_BLOCK); + switch (ret) { + case Z_MEM_ERROR: + bail("out of memory", ""); + case Z_DATA_ERROR: + bail("invalid compressed data in ", in->name); + } + + /* update length of uncompressed data */ + len += CHUNK - strm.avail_out; + + /* check for block boundary (only get this when block copied out) */ + if (strm.data_type & 128) { + /* if that was the last block, then done */ + if (last) + break; + + /* number of unused bits in last byte */ + pos = strm.data_type & 7; + + /* find the next last-block bit */ + if (pos != 0) { + /* next last-block bit is in last used byte */ + pos = 0x100 >> pos; + last = strm.next_in[-1] & pos; + if (last && clr) + strm.next_in[-1] &= ~pos; + } + else { + /* next last-block bit is in next unused byte */ + if (strm.avail_in == 0) { + /* don't have that byte yet -- get it */ + fwrite(start, 1, strm.next_in - start, out); + start = in->buf; + in->left = 0; + zpull(&strm, in); + } + last = strm.next_in[0] & 1; + if (last && clr) + strm.next_in[0] &= ~1; + } + } + } + + /* update buffer with unused input */ + in->left = strm.avail_in; + in->next = strm.next_in; + + /* copy used input, write empty blocks to get to byte boundary */ + pos = strm.data_type & 7; + fwrite(start, 1, in->next - start - 1, out); + last = in->next[-1]; + if (pos == 0 || !clr) + /* already at byte boundary, or last file: write last byte */ + putc(last, out); + else { + /* append empty blocks to last byte */ + last &= ((0x100 >> pos) - 1); /* assure unused bits are zero */ + if (pos & 1) { + /* odd -- append an empty stored block */ + putc(last, out); + if (pos == 1) + putc(0, out); /* two more bits in block header */ + fwrite("\0\0\xff\xff", 1, 4, out); + } + else { + /* even -- append 1, 2, or 3 empty fixed blocks */ + switch (pos) { + case 6: + putc(last | 8, out); + last = 0; + case 4: + putc(last | 0x20, out); + last = 0; + case 2: + putc(last | 0x80, out); + putc(0, out); + } + } + } + + /* update crc and tot */ + *crc = crc32_combine(*crc, bget4(in), len); + *tot += (unsigned long)len; + + /* clean up */ + inflateEnd(&strm); + free(junk); + bclose(in); + + /* write trailer if this is the last gzip file */ + if (!clr) { + put4(*crc, out); + put4(*tot, out); + } +} + +/* join the gzip files on the command line, write result to stdout */ +int main(int argc, char **argv) +{ + unsigned long crc, tot; /* running crc and total uncompressed length */ + + /* skip command name */ + argc--; + argv++; + + /* show usage if no arguments */ + if (argc == 0) { + fputs("gzjoin usage: gzjoin f1.gz [f2.gz [f3.gz ...]] > fjoin.gz\n", + stderr); + return 0; + } + + /* join gzip files on command line and write to stdout */ + gzinit(&crc, &tot, stdout); + while (argc--) + gzcopy(*argv++, argc, &crc, &tot, stdout); + + /* done */ + return 0; +} |