/*- * SPDX-License-Identifier: BSD-2-Clause * * Copyright (c) 2004-2016 Maxim Sobolev <sobomax@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include <sys/cdefs.h> #include <sys/types.h> #include <sys/endian.h> #include <sys/param.h> #include <sys/sysctl.h> #include <sys/stat.h> #include <sys/uio.h> #include <netinet/in.h> #include <assert.h> #include <ctype.h> #include <err.h> #include <fcntl.h> #include <pthread.h> #include <signal.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include "mkuzip.h" #include "mkuz_cloop.h" #include "mkuz_blockcache.h" #include "mkuz_lzma.h" #include "mkuz_zlib.h" #include "mkuz_zstd.h" #include "mkuz_blk.h" #include "mkuz_cfg.h" #include "mkuz_conveyor.h" #include "mkuz_format.h" #include "mkuz_fqueue.h" #include "mkuz_time.h" #include "mkuz_insize.h" #define DEFAULT_CLSTSIZE 16384 enum UZ_ALGORITHM { UZ_ZLIB = 0, UZ_LZMA, UZ_ZSTD, UZ_INVALID }; static const struct mkuz_format uzip_fmts[] = { [UZ_ZLIB] = { .option = "zlib", .magic = CLOOP_MAGIC_ZLIB, .default_sufx = DEFAULT_SUFX_ZLIB, .f_compress_bound = mkuz_zlib_cbound, .f_init = mkuz_zlib_init, .f_compress = mkuz_zlib_compress, }, [UZ_LZMA] = { .option = "lzma", .magic = CLOOP_MAGIC_LZMA, .default_sufx = DEFAULT_SUFX_LZMA, .f_compress_bound = mkuz_lzma_cbound, .f_init = mkuz_lzma_init, .f_compress = mkuz_lzma_compress, }, [UZ_ZSTD] = { .option = "zstd", .magic = CLOOP_MAGIC_ZSTD, .default_sufx = DEFAULT_SUFX_ZSTD, .f_compress_bound = mkuz_zstd_cbound, .f_init = mkuz_zstd_init, .f_compress = mkuz_zstd_compress, }, }; static struct mkuz_blk *readblock(int, u_int32_t); static void usage(void) __dead2; static void cleanup(void); static char *cleanfile = NULL; static int cmp_blkno(const struct mkuz_blk *bp, void *p) { uint32_t *ap; ap = (uint32_t *)p; return (bp->info.blkno == *ap); } int main(int argc, char **argv) { struct mkuz_cfg cfs; char *oname; uint64_t *toc; int i, io, opt, tmp; struct { int en; FILE *f; } summary; struct iovec iov[2]; uint64_t offset, last_offset; struct cloop_header hdr; struct mkuz_conveyor *cvp; struct mkuz_blk_info *chit; size_t ncpusz, ncpu, magiclen; double st, et; enum UZ_ALGORITHM comp_alg; int comp_level; st = getdtime(); ncpusz = sizeof(size_t); if (sysctlbyname("hw.ncpu", &ncpu, &ncpusz, NULL, 0) < 0) { ncpu = 1; } else if (ncpu > MAX_WORKERS_AUTO) { ncpu = MAX_WORKERS_AUTO; } memset(&hdr, 0, sizeof(hdr)); cfs.blksz = DEFAULT_CLSTSIZE; oname = NULL; cfs.verbose = 0; cfs.no_zcomp = 0; cfs.en_dedup = 0; summary.en = 0; summary.f = stderr; comp_alg = UZ_ZLIB; comp_level = USE_DEFAULT_LEVEL; cfs.nworkers = ncpu; struct mkuz_blk *iblk, *oblk; while((opt = getopt(argc, argv, "A:C:o:s:vZdLSj:")) != -1) { switch(opt) { case 'A': for (tmp = UZ_ZLIB; tmp < UZ_INVALID; tmp++) { if (strcmp(uzip_fmts[tmp].option, optarg) == 0) break; } if (tmp == UZ_INVALID) errx(1, "invalid algorithm specified: %s", optarg); /* Not reached */ comp_alg = tmp; break; case 'C': comp_level = atoi(optarg); break; case 'o': oname = optarg; break; case 's': tmp = atoi(optarg); if (tmp <= 0) { errx(1, "invalid cluster size specified: %s", optarg); /* Not reached */ } cfs.blksz = tmp; break; case 'v': cfs.verbose = 1; break; case 'Z': cfs.no_zcomp = 1; break; case 'd': cfs.en_dedup = 1; break; case 'L': comp_alg = UZ_LZMA; break; case 'S': summary.en = 1; summary.f = stdout; break; case 'j': tmp = atoi(optarg); if (tmp <= 0) { errx(1, "invalid number of compression threads" " specified: %s", optarg); /* Not reached */ } cfs.nworkers = tmp; break; default: usage(); /* Not reached */ } } argc -= optind; argv += optind; if (argc != 1) { usage(); /* Not reached */ } cfs.handler = &uzip_fmts[comp_alg]; magiclen = strlcpy(hdr.magic, cfs.handler->magic, sizeof(hdr.magic)); assert(magiclen < sizeof(hdr.magic)); if (cfs.en_dedup != 0) { /* * Dedupe requires a version 3 format. Don't downgrade newer * formats. */ if (hdr.magic[CLOOP_OFS_VERSN] == CLOOP_MAJVER_2) hdr.magic[CLOOP_OFS_VERSN] = CLOOP_MAJVER_3; hdr.magic[CLOOP_OFS_COMPR] = tolower(hdr.magic[CLOOP_OFS_COMPR]); } if (cfs.blksz % DEV_BSIZE != 0) errx(1, "cluster size should be multiple of %d", DEV_BSIZE); cfs.cbound_blksz = cfs.handler->f_compress_bound(cfs.blksz); if (cfs.cbound_blksz > MAXPHYS) errx(1, "maximal compressed cluster size %zu greater than MAXPHYS %zu", cfs.cbound_blksz, (size_t)MAXPHYS); cfs.handler->f_init(&comp_level); cfs.comp_level = comp_level; cfs.iname = argv[0]; if (oname == NULL) { asprintf(&oname, "%s%s", cfs.iname, cfs.handler->default_sufx); if (oname == NULL) { err(1, "can't allocate memory"); /* Not reached */ } } signal(SIGHUP, exit); signal(SIGINT, exit); signal(SIGTERM, exit); signal(SIGXCPU, exit); signal(SIGXFSZ, exit); atexit(cleanup); cfs.fdr = open(cfs.iname, O_RDONLY); if (cfs.fdr < 0) { err(1, "open(%s)", cfs.iname); /* Not reached */ } cfs.isize = mkuz_get_insize(&cfs); if (cfs.isize < 0) { errx(1, "can't determine input image size"); /* Not reached */ } hdr.nblocks = cfs.isize / cfs.blksz; if ((cfs.isize % cfs.blksz) != 0) { if (cfs.verbose != 0) fprintf(stderr, "file size is not multiple " "of %d, padding data\n", cfs.blksz); hdr.nblocks++; } toc = mkuz_safe_malloc((hdr.nblocks + 1) * sizeof(*toc)); /* * Initialize last+1 entry with non-heap trash. If final padding is * added later, it may or may not be overwritten with an offset * representing the length of the final compressed block. If not, * initialize to a defined value. */ toc[hdr.nblocks] = 0; cfs.fdw = open(oname, (cfs.en_dedup ? O_RDWR : O_WRONLY) | O_TRUNC | O_CREAT, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH); if (cfs.fdw < 0) { err(1, "open(%s)", oname); /* Not reached */ } cleanfile = oname; /* Prepare header that we will write later when we have index ready. */ iov[0].iov_base = (char *)&hdr; iov[0].iov_len = sizeof(hdr); iov[1].iov_base = (char *)toc; iov[1].iov_len = (hdr.nblocks + 1) * sizeof(*toc); offset = iov[0].iov_len + iov[1].iov_len; /* Reserve space for header */ lseek(cfs.fdw, offset, SEEK_SET); if (cfs.verbose != 0) { fprintf(stderr, "data size %ju bytes, number of clusters " "%u, index length %zu bytes\n", cfs.isize, hdr.nblocks, iov[1].iov_len); } cvp = mkuz_conveyor_ctor(&cfs); last_offset = 0; iblk = oblk = NULL; for(i = io = 0; iblk != MKUZ_BLK_EOF; i++) { iblk = readblock(cfs.fdr, cfs.blksz); mkuz_fqueue_enq(cvp->wrk_queue, iblk); if (iblk != MKUZ_BLK_EOF && (i < (cfs.nworkers * ITEMS_PER_WORKER))) { continue; } drain: oblk = mkuz_fqueue_deq_when(cvp->results, cmp_blkno, &io); assert(oblk->info.blkno == (unsigned)io); oblk->info.offset = offset; chit = NULL; if (cfs.en_dedup != 0 && oblk->info.len > 0) { chit = mkuz_blkcache_regblock(cfs.fdw, oblk); /* * There should be at least one non-empty block * between us and the backref'ed offset, otherwise * we won't be able to parse that sequence correctly * as it would be indistinguishible from another * empty block. */ if (chit != NULL && chit->offset == last_offset) { chit = NULL; } } if (chit != NULL) { toc[io] = htobe64(chit->offset); oblk->info.len = 0; } else { if (oblk->info.len > 0 && write(cfs.fdw, oblk->data, oblk->info.len) < 0) { err(1, "write(%s)", oname); /* Not reached */ } toc[io] = htobe64(offset); last_offset = offset; offset += oblk->info.len; } if (cfs.verbose != 0) { fprintf(stderr, "cluster #%d, in %u bytes, " "out len=%lu offset=%lu", io, cfs.blksz, (u_long)oblk->info.len, (u_long)be64toh(toc[io])); if (chit != NULL) { fprintf(stderr, " (backref'ed to #%d)", chit->blkno); } fprintf(stderr, "\n"); } free(oblk); io += 1; if (iblk == MKUZ_BLK_EOF) { if (io < i) goto drain; /* Last block, see if we need to add some padding */ if ((offset % DEV_BSIZE) == 0) continue; oblk = mkuz_blk_ctor(DEV_BSIZE - (offset % DEV_BSIZE)); oblk->info.blkno = io; oblk->info.len = oblk->alen; if (cfs.verbose != 0) { fprintf(stderr, "padding data with %lu bytes " "so that file size is multiple of %d\n", (u_long)oblk->alen, DEV_BSIZE); } mkuz_fqueue_enq(cvp->results, oblk); goto drain; } } close(cfs.fdr); if (cfs.verbose != 0 || summary.en != 0) { et = getdtime(); fprintf(summary.f, "compressed data to %ju bytes, saved %lld " "bytes, %.2f%% decrease, %.2f bytes/sec.\n", offset, (long long)(cfs.isize - offset), 100.0 * (long long)(cfs.isize - offset) / (float)cfs.isize, (float)cfs.isize / (et - st)); } /* Convert to big endian */ hdr.blksz = htonl(cfs.blksz); hdr.nblocks = htonl(hdr.nblocks); /* Write headers into pre-allocated space */ lseek(cfs.fdw, 0, SEEK_SET); if (writev(cfs.fdw, iov, 2) < 0) { err(1, "writev(%s)", oname); /* Not reached */ } cleanfile = NULL; close(cfs.fdw); exit(0); } static struct mkuz_blk * readblock(int fd, u_int32_t clstsize) { int numread; struct mkuz_blk *rval; static int blockcnt; off_t cpos; rval = mkuz_blk_ctor(clstsize); rval->info.blkno = blockcnt; blockcnt += 1; cpos = lseek(fd, 0, SEEK_CUR); if (cpos < 0) { err(1, "readblock: lseek() failed"); /* Not reached */ } rval->info.offset = cpos; numread = read(fd, rval->data, clstsize); if (numread < 0) { err(1, "readblock: read() failed"); /* Not reached */ } if (numread == 0) { free(rval); return MKUZ_BLK_EOF; } rval->info.len = numread; return rval; } static void usage(void) { fprintf(stderr, "usage: mkuzip [-vZdLS] [-o outfile] [-s cluster_size] " "[-j ncompr] infile\n"); exit(1); } void * mkuz_safe_malloc(size_t size) { void *retval; retval = malloc(size); if (retval == NULL) { err(1, "can't allocate memory"); /* Not reached */ } return retval; } void * mkuz_safe_zmalloc(size_t size) { void *retval; retval = mkuz_safe_malloc(size); bzero(retval, size); return retval; } static void cleanup(void) { if (cleanfile != NULL) unlink(cleanfile); } int mkuz_memvcmp(const void *memory, unsigned char val, size_t size) { const u_char *mm; mm = (const u_char *)memory; return (*mm == val) && memcmp(mm, mm + 1, size - 1) == 0; }