tar.c (14756B)
1 /* See LICENSE file for copyright and license details. */ 2 #include <sys/stat.h> 3 #include <sys/time.h> 4 #include <sys/types.h> 5 #ifndef major 6 #include <sys/sysmacros.h> 7 #endif 8 9 #include <assert.h> 10 #include <errno.h> 11 #include <fcntl.h> 12 #include <grp.h> 13 #include <libgen.h> 14 #include <pwd.h> 15 #include <stdio.h> 16 #include <stdlib.h> 17 #include <string.h> 18 #include <unistd.h> 19 20 #include "fs.h" 21 #include "util.h" 22 23 #define BLKSIZ (sizeof (struct header)) /* must equal 512 bytes */ 24 25 enum Type { 26 REG = '0', 27 AREG = '\0', 28 HARDLINK = '1', 29 SYMLINK = '2', 30 CHARDEV = '3', 31 BLOCKDEV = '4', 32 DIRECTORY = '5', 33 FIFO = '6', 34 RESERVED = '7' 35 }; 36 37 struct header { 38 char name[100]; 39 char mode[8]; 40 char uid[8]; 41 char gid[8]; 42 char size[12]; 43 char mtime[12]; 44 char chksum[8]; 45 char type; 46 char linkname[100]; 47 char magic[6]; 48 char version[2]; 49 char uname[32]; 50 char gname[32]; 51 char major[8]; 52 char minor[8]; 53 char prefix[155]; 54 char padding[12]; 55 }; 56 57 static struct dirtime { 58 char *name; 59 time_t mtime; 60 } *dirtimes; 61 62 static size_t dirtimeslen; 63 64 static int tarfd; 65 static ino_t tarinode; 66 static dev_t tardev; 67 68 static int mflag, vflag; 69 static int filtermode; 70 static const char *filtertool; 71 72 static const char *filtertools[] = { 73 ['J'] = "xz", 74 ['Z'] = "compress", 75 ['a'] = "lzma", 76 ['j'] = "bzip2", 77 ['z'] = "gzip", 78 }; 79 80 static void 81 pushdirtime(char *name, time_t mtime) 82 { 83 dirtimes = ereallocarray(dirtimes, dirtimeslen + 1, sizeof(*dirtimes)); 84 dirtimes[dirtimeslen].name = estrdup(name); 85 dirtimes[dirtimeslen].mtime = mtime; 86 dirtimeslen++; 87 } 88 89 static struct dirtime * 90 popdirtime(void) 91 { 92 if (dirtimeslen) { 93 dirtimeslen--; 94 return &dirtimes[dirtimeslen]; 95 } 96 return NULL; 97 } 98 99 static int 100 comp(int fd, const char *tool, const char *flags) 101 { 102 int fds[2]; 103 104 if (pipe(fds) < 0) 105 eprintf("pipe:"); 106 107 switch (fork()) { 108 case -1: 109 eprintf("fork:"); 110 case 0: 111 dup2(fd, 1); 112 dup2(fds[0], 0); 113 close(fds[0]); 114 close(fds[1]); 115 116 execlp(tool, tool, flags, NULL); 117 weprintf("execlp %s:", tool); 118 _exit(1); 119 } 120 close(fds[0]); 121 return fds[1]; 122 } 123 124 static int 125 decomp(int fd, const char *tool, const char *flags) 126 { 127 int fds[2]; 128 129 if (pipe(fds) < 0) 130 eprintf("pipe:"); 131 132 switch (fork()) { 133 case -1: 134 eprintf("fork:"); 135 case 0: 136 dup2(fd, 0); 137 dup2(fds[1], 1); 138 close(fds[0]); 139 close(fds[1]); 140 141 execlp(tool, tool, flags, NULL); 142 weprintf("execlp %s:", tool); 143 _exit(1); 144 } 145 close(fds[1]); 146 return fds[0]; 147 } 148 149 static ssize_t 150 eread(int fd, void *buf, size_t n) 151 { 152 ssize_t r; 153 154 again: 155 r = read(fd, buf, n); 156 if (r < 0) { 157 if (errno == EINTR) 158 goto again; 159 eprintf("read:"); 160 } 161 return r; 162 } 163 164 static ssize_t 165 ewrite(int fd, const void *buf, size_t n) 166 { 167 ssize_t r; 168 169 if ((r = write(fd, buf, n)) != n) 170 eprintf("write:"); 171 return r; 172 } 173 174 static unsigned 175 chksum(struct header *h) 176 { 177 unsigned sum, i; 178 179 memset(h->chksum, ' ', sizeof(h->chksum)); 180 for (i = 0, sum = 0, assert(BLKSIZ == 512); i < BLKSIZ; i++) 181 sum += *((unsigned char *)h + i); 182 return sum; 183 } 184 185 static void 186 putoctal(char *dst, unsigned num, int size) 187 { 188 if (snprintf(dst, size, "%.*o", size - 1, num) >= size) 189 eprintf("snprintf: input number too large\n"); 190 } 191 192 static int 193 archive(const char *path) 194 { 195 static const struct header blank = { 196 "././@LongLink", "0000000" , "0000000", "0000000", "00000000000", 197 "00000000000" , " ", AREG , "" , "ustar", "00", 198 }; 199 char b[BLKSIZ + BLKSIZ], *p; 200 struct header *h = (struct header *)b; 201 struct group *gr; 202 struct passwd *pw; 203 struct stat st; 204 ssize_t l, n, r; 205 int fd = -1; 206 207 if (lstat(path, &st) < 0) { 208 weprintf("lstat %s:", path); 209 return 0; 210 } else if (st.st_ino == tarinode && st.st_dev == tardev) { 211 weprintf("ignoring %s\n", path); 212 return 0; 213 } 214 pw = getpwuid(st.st_uid); 215 gr = getgrgid(st.st_gid); 216 217 *h = blank; 218 n = strlcpy(h->name, path, sizeof(h->name)); 219 if (n >= sizeof(h->name)) { 220 *++h = blank; 221 h->type = 'L'; 222 putoctal(h->size, n, sizeof(h->size)); 223 putoctal(h->chksum, chksum(h), sizeof(h->chksum)); 224 225 ewrite(tarfd, (char *)h, BLKSIZ); 226 for (p = (char *)path; n > 0; n -= BLKSIZ, p += BLKSIZ) { 227 if (n < BLKSIZ) { 228 p = memcpy(h--, p, n); 229 memset(p + n, 0, BLKSIZ - n); 230 } 231 ewrite(tarfd, p, BLKSIZ); 232 } 233 } 234 235 putoctal(h->mode, (unsigned)st.st_mode & 0777, sizeof(h->mode)); 236 putoctal(h->uid, (unsigned)st.st_uid, sizeof(h->uid)); 237 putoctal(h->gid, (unsigned)st.st_gid, sizeof(h->gid)); 238 putoctal(h->mtime, (unsigned)st.st_mtime, sizeof(h->mtime)); 239 estrlcpy(h->uname, pw ? pw->pw_name : "", sizeof(h->uname)); 240 estrlcpy(h->gname, gr ? gr->gr_name : "", sizeof(h->gname)); 241 242 if (S_ISREG(st.st_mode)) { 243 h->type = REG; 244 putoctal(h->size, st.st_size, sizeof(h->size)); 245 fd = open(path, O_RDONLY); 246 if (fd < 0) 247 eprintf("open %s:", path); 248 } else if (S_ISDIR(st.st_mode)) { 249 h->type = DIRECTORY; 250 } else if (S_ISLNK(st.st_mode)) { 251 h->type = SYMLINK; 252 if ((r = readlink(path, h->linkname, sizeof(h->linkname) - 1)) < 0) 253 eprintf("readlink %s:", path); 254 h->linkname[r] = '\0'; 255 } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) { 256 h->type = S_ISCHR(st.st_mode) ? CHARDEV : BLOCKDEV; 257 putoctal(h->major, (unsigned)major(st.st_dev), sizeof(h->major)); 258 putoctal(h->minor, (unsigned)minor(st.st_dev), sizeof(h->minor)); 259 } else if (S_ISFIFO(st.st_mode)) { 260 h->type = FIFO; 261 } 262 263 putoctal(h->chksum, chksum(h), sizeof(h->chksum)); 264 ewrite(tarfd, b, BLKSIZ); 265 266 if (fd != -1) { 267 while ((l = eread(fd, b, BLKSIZ)) > 0) { 268 if (l < BLKSIZ) 269 memset(b + l, 0, BLKSIZ - l); 270 ewrite(tarfd, b, BLKSIZ); 271 } 272 close(fd); 273 } 274 275 return 0; 276 } 277 278 static int 279 unarchive(char *fname, ssize_t l, char b[BLKSIZ]) 280 { 281 struct header *h = (struct header *)b; 282 struct timespec times[2]; 283 char lname[101], *tmp, *p; 284 long mode, major, minor, type, mtime, uid, gid; 285 int fd = -1, lnk = h->type == SYMLINK; 286 287 if (!mflag && ((mtime = strtol(h->mtime, &p, 8)) < 0 || *p != '\0')) 288 eprintf("strtol %s: invalid number\n", h->mtime); 289 if (strcmp(fname, ".") && strcmp(fname, "./") && remove(fname) < 0) 290 if (errno != ENOENT) weprintf("remove %s:", fname); 291 292 tmp = estrdup(fname); 293 mkdirp(dirname(tmp), 0777, 0777); 294 free(tmp); 295 296 switch (h->type) { 297 case REG: 298 case AREG: 299 case RESERVED: 300 if ((mode = strtol(h->mode, &p, 8)) < 0 || *p != '\0') 301 eprintf("strtol %s: invalid number\n", h->mode); 302 fd = open(fname, O_WRONLY | O_TRUNC | O_CREAT, 0600); 303 if (fd < 0) 304 eprintf("open %s:", fname); 305 break; 306 case HARDLINK: 307 case SYMLINK: 308 snprintf(lname, sizeof(lname), "%.*s", (int)sizeof(h->linkname), 309 h->linkname); 310 if ((lnk ? symlink:link)(lname, fname) < 0) 311 eprintf("%s %s -> %s:", lnk ? "symlink":"link", fname, lname); 312 lnk++; 313 break; 314 case DIRECTORY: 315 if ((mode = strtol(h->mode, &p, 8)) < 0 || *p != '\0') 316 eprintf("strtol %s: invalid number\n", h->mode); 317 if (mkdir(fname, (mode_t)mode) < 0 && errno != EEXIST) 318 eprintf("mkdir %s:", fname); 319 pushdirtime(fname, mtime); 320 break; 321 case CHARDEV: 322 case BLOCKDEV: 323 if ((mode = strtol(h->mode, &p, 8)) < 0 || *p != '\0') 324 eprintf("strtol %s: invalid number\n", h->mode); 325 if ((major = strtol(h->major, &p, 8)) < 0 || *p != '\0') 326 eprintf("strtol %s: invalid number\n", h->major); 327 if ((minor = strtol(h->minor, &p, 8)) < 0 || *p != '\0') 328 eprintf("strtol %s: invalid number\n", h->minor); 329 type = (h->type == CHARDEV) ? S_IFCHR : S_IFBLK; 330 if (mknod(fname, type | mode, makedev(major, minor)) < 0) 331 eprintf("mknod %s:", fname); 332 break; 333 case FIFO: 334 if ((mode = strtol(h->mode, &p, 8)) < 0 || *p != '\0') 335 eprintf("strtol %s: invalid number\n", h->mode); 336 if (mknod(fname, S_IFIFO | mode, 0) < 0) 337 eprintf("mknod %s:", fname); 338 break; 339 default: 340 eprintf("unsupported tar-filetype %c\n", h->type); 341 } 342 343 if ((uid = strtol(h->uid, &p, 8)) < 0 || *p != '\0') 344 eprintf("strtol %s: invalid number\n", h->uid); 345 if ((gid = strtol(h->gid, &p, 8)) < 0 || *p != '\0') 346 eprintf("strtol %s: invalid number\n", h->gid); 347 348 if (fd != -1) { 349 for (; l > 0; l -= BLKSIZ) 350 if (eread(tarfd, b, BLKSIZ) > 0) 351 ewrite(fd, b, MIN(l, BLKSIZ)); 352 close(fd); 353 } 354 355 if (lnk == 1) 356 return 0; 357 358 times[0].tv_sec = times[1].tv_sec = mtime; 359 times[0].tv_nsec = times[1].tv_nsec = 0; 360 if (!mflag && utimensat(AT_FDCWD, fname, times, AT_SYMLINK_NOFOLLOW) < 0) 361 weprintf("utimensat %s:", fname); 362 if (lnk) { 363 if (!getuid() && lchown(fname, uid, gid)) 364 weprintf("lchown %s:", fname); 365 } else { 366 if (!getuid() && chown(fname, uid, gid)) 367 weprintf("chown %s:", fname); 368 if (chmod(fname, mode) < 0) 369 eprintf("fchmod %s:", fname); 370 } 371 372 return 0; 373 } 374 375 static void 376 skipblk(ssize_t l) 377 { 378 char b[BLKSIZ]; 379 380 for (; l > 0; l -= BLKSIZ) 381 if (!eread(tarfd, b, BLKSIZ)) 382 break; 383 } 384 385 static int 386 print(char *fname, ssize_t l, char b[BLKSIZ]) 387 { 388 puts(fname); 389 skipblk(l); 390 return 0; 391 } 392 393 static void 394 c(int dirfd, const char *name, struct stat *st, void *data, struct recursor *r) 395 { 396 archive(r->path); 397 if (vflag) 398 puts(r->path); 399 400 if (S_ISDIR(st->st_mode)) 401 recurse(dirfd, name, NULL, r); 402 } 403 404 static void 405 sanitize(struct header *h) 406 { 407 size_t i, j; 408 struct { 409 char *f; 410 size_t l; 411 } fields[] = { 412 { h->mode, sizeof(h->mode) }, 413 { h->uid, sizeof(h->uid) }, 414 { h->gid, sizeof(h->gid) }, 415 { h->size, sizeof(h->size) }, 416 { h->mtime, sizeof(h->mtime) }, 417 { h->chksum, sizeof(h->chksum) }, 418 { h->major, sizeof(h->major) }, 419 { h->minor, sizeof(h->minor) } 420 }; 421 422 /* Numeric fields can be terminated with spaces instead of 423 * NULs as per the ustar specification. Patch all of them to 424 * use NULs so we can perform string operations on them. */ 425 for (i = 0; i < LEN(fields); i++){ 426 for (j = 0; j < fields[i].l && fields[i].f[j] == ' '; j++); 427 for (; j < fields[i].l; j++) 428 if (fields[i].f[j] == ' ') 429 fields[i].f[j] = '\0'; 430 } 431 } 432 433 static void 434 chktar(struct header *h) 435 { 436 const char *reason; 437 char tmp[sizeof h->chksum], *err = ""; 438 long sum, i; 439 440 if (h->prefix[0] == '\0' && h->name[0] == '\0') { 441 reason = "empty filename"; 442 goto bad; 443 } 444 if (h->magic[0] && strncmp("ustar", h->magic, 5)) { 445 reason = "not ustar format"; 446 goto bad; 447 } 448 memcpy(tmp, h->chksum, sizeof(tmp)); 449 for (i = sizeof(tmp)-1; i > 0 && tmp[i] == ' '; i--) { 450 tmp[i] = '\0'; 451 } 452 sum = strtol(tmp, &err, 8); 453 if (sum < 0 || sum >= BLKSIZ*256 || *err != '\0') { 454 reason = "invalid checksum"; 455 goto bad; 456 } 457 if (sum != chksum(h)) { 458 reason = "incorrect checksum"; 459 goto bad; 460 } 461 memcpy(h->chksum, tmp, sizeof(tmp)); 462 return; 463 bad: 464 eprintf("malformed tar archive: %s\n", reason); 465 } 466 467 static void 468 xt(int argc, char *argv[], int mode) 469 { 470 long size, l; 471 char b[BLKSIZ], fname[l = PATH_MAX + 1], *p, *q = NULL; 472 int i, m, n; 473 int (*fn)(char *, ssize_t, char[BLKSIZ]) = (mode == 'x') ? unarchive : print; 474 struct timespec times[2]; 475 struct header *h = (struct header *)b; 476 struct dirtime *dirtime; 477 478 while (eread(tarfd, b, BLKSIZ) > 0 && (h->name[0] || h->prefix[0])) { 479 chktar(h); 480 sanitize(h); 481 482 if ((size = strtol(h->size, &p, 8)) < 0 || *p != '\0') 483 eprintf("strtol %s: invalid size\n", h->size); 484 485 /* Long file path is read direcly into fname*/ 486 if (h->type == 'L' || h->type == 'x' || h->type == 'g') { 487 488 /* Read header only up to size of fname buffer */ 489 for (q = fname; q < fname+size; q += BLKSIZ) { 490 if (q + BLKSIZ >= fname + l) 491 eprintf("name exceeds buffer: %s\n", fname); 492 eread(tarfd, q, BLKSIZ); 493 } 494 495 /* Convert pax x header with 'path=' field into L header */ 496 if (h->type == 'x') for (q = fname; q < fname+size-16; q += n) { 497 if ((n = strtol(q, &p, 10)) < 0 || *p != ' ') 498 eprintf("strtol %.*s: invalid number\n", p+1-q, q); 499 if (n && strncmp(p+1, "path=", 5) == 0) { 500 memmove(fname, p+6, size = q+n - p-6 - 1); 501 h->type = 'L'; 502 break; 503 } 504 } 505 fname[size] = '\0'; 506 507 /* Non L-like header (eg. pax 'g') is skipped by setting q=null */ 508 if (h->type != 'L') 509 q = NULL; 510 continue; 511 } 512 513 /* Ustar path is copied into fname if no L header (ie: q is NULL) */ 514 if (!q) { 515 m = sizeof h->prefix, n = sizeof h->name; 516 p = "/" + !h->prefix[0]; 517 snprintf(fname, l, "%.*s%s%.*s", m, h->prefix, p, n, h->name); 518 } 519 q = NULL; 520 521 /* If argc > 0 then only extract the given files/dirs */ 522 if (argc) { 523 for (i = 0; i < argc; i++) { 524 if (strncmp(argv[i], fname, n = strlen(argv[i])) == 0) 525 if (strchr("/", fname[n]) || argv[i][n-1] == '/') 526 break; 527 } 528 if (i == argc) { 529 skipblk(size); 530 continue; 531 } 532 } 533 534 fn(fname, size, b); 535 if (vflag && mode != 't') 536 puts(fname); 537 } 538 539 if (mode == 'x' && !mflag) { 540 while ((dirtime = popdirtime())) { 541 times[0].tv_sec = times[1].tv_sec = dirtime->mtime; 542 times[0].tv_nsec = times[1].tv_nsec = 0; 543 if (utimensat(AT_FDCWD, dirtime->name, times, 0) < 0) 544 eprintf("utimensat %s:", fname); 545 free(dirtime->name); 546 } 547 free(dirtimes); 548 dirtimes = NULL; 549 } 550 } 551 552 char **args; 553 int argn; 554 555 static void 556 usage(void) 557 { 558 eprintf("usage: %s [x | t | -x | -t] [-C dir] [-J | -Z | -a | -j | -z] [-m] [-p] " 559 "[-f file] [file ...]\n" 560 " %s [c | -c] [-C dir] [-J | -Z | -a | -j | -z] [-h] path ... " 561 "[-f file]\n", argv0, argv0); 562 } 563 564 int 565 main(int argc, char *argv[]) 566 { 567 struct recursor r = { .fn = c, .follow = 'P', .flags = DIRFIRST }; 568 struct stat st; 569 char *file = NULL, *dir = ".", mode = '\0'; 570 int fd; 571 572 argv0 = argv[0]; 573 if (argc > 1 && strchr("cxt", mode = *argv[1])) 574 *(argv[1]+1) ? *argv[1] = '-' : (*++argv = argv0, --argc); 575 576 ARGBEGIN { 577 case 'x': 578 case 'c': 579 case 't': 580 mode = ARGC(); 581 break; 582 case 'C': 583 dir = EARGF(usage()); 584 break; 585 case 'f': 586 file = EARGF(usage()); 587 break; 588 case 'm': 589 mflag = 1; 590 break; 591 case 'J': 592 case 'Z': 593 case 'a': 594 case 'j': 595 case 'z': 596 filtermode = ARGC(); 597 filtertool = filtertools[filtermode]; 598 break; 599 case 'h': 600 r.follow = 'L'; 601 break; 602 case 'v': 603 vflag = 1; 604 break; 605 case 'p': 606 break; /* Do nothing as already default behaviour */ 607 default: 608 usage(); 609 } ARGEND 610 611 switch (mode) { 612 case 'c': 613 if (!argc) 614 usage(); 615 tarfd = 1; 616 if (file && *file != '-') { 617 tarfd = open(file, O_WRONLY | O_TRUNC | O_CREAT, 0644); 618 if (tarfd < 0) 619 eprintf("open %s:", file); 620 if (lstat(file, &st) < 0) 621 eprintf("lstat %s:", file); 622 tarinode = st.st_ino; 623 tardev = st.st_dev; 624 } 625 626 if (filtertool) 627 tarfd = comp(tarfd, filtertool, "-cf"); 628 629 if (chdir(dir) < 0) 630 eprintf("chdir %s:", dir); 631 for (; *argv; argc--, argv++) 632 recurse(AT_FDCWD, *argv, NULL, &r); 633 break; 634 case 't': 635 case 'x': 636 tarfd = 0; 637 if (file && *file != '-') { 638 tarfd = open(file, O_RDONLY); 639 if (tarfd < 0) 640 eprintf("open %s:", file); 641 } 642 643 if (filtertool) { 644 fd = tarfd; 645 tarfd = decomp(tarfd, filtertool, "-cdf"); 646 close(fd); 647 } 648 649 if (chdir(dir) < 0) 650 eprintf("chdir %s:", dir); 651 xt(argc, argv, mode); 652 break; 653 default: 654 usage(); 655 } 656 657 return recurse_status; 658 }