tar.c (13960B)
1 /* See LICENSE file for copyright and license details. */ 2 #include <sys/stat.h> 3 #include <sys/time.h> 4 #include <sys/types.h> 5 #ifndef major 6 #include <sys/sysmacros.h> 7 #endif 8 9 #include <errno.h> 10 #include <fcntl.h> 11 #include <grp.h> 12 #include <libgen.h> 13 #include <pwd.h> 14 #include <stdio.h> 15 #include <stdlib.h> 16 #include <string.h> 17 #include <unistd.h> 18 19 #include "fs.h" 20 #include "util.h" 21 22 #define BLKSIZ 512 23 24 enum Type { 25 REG = '0', 26 AREG = '\0', 27 HARDLINK = '1', 28 SYMLINK = '2', 29 CHARDEV = '3', 30 BLOCKDEV = '4', 31 DIRECTORY = '5', 32 FIFO = '6', 33 RESERVED = '7' 34 }; 35 36 struct header { 37 char name[100]; 38 char mode[8]; 39 char uid[8]; 40 char gid[8]; 41 char size[12]; 42 char mtime[12]; 43 char chksum[8]; 44 char type; 45 char linkname[100]; 46 char magic[6]; 47 char version[2]; 48 char uname[32]; 49 char gname[32]; 50 char major[8]; 51 char minor[8]; 52 char prefix[155]; 53 }; 54 55 static struct dirtime { 56 char *name; 57 time_t mtime; 58 } *dirtimes; 59 60 static size_t dirtimeslen; 61 62 static int tarfd; 63 static ino_t tarinode; 64 static dev_t tardev; 65 66 static int mflag, vflag; 67 static int filtermode; 68 static const char *filtertool; 69 70 static const char *filtertools[] = { 71 ['J'] = "xz", 72 ['Z'] = "compress", 73 ['a'] = "lzma", 74 ['j'] = "bzip2", 75 ['z'] = "gzip", 76 }; 77 78 static void 79 pushdirtime(char *name, time_t mtime) 80 { 81 dirtimes = ereallocarray(dirtimes, dirtimeslen + 1, sizeof(*dirtimes)); 82 dirtimes[dirtimeslen].name = estrdup(name); 83 dirtimes[dirtimeslen].mtime = mtime; 84 dirtimeslen++; 85 } 86 87 static struct dirtime * 88 popdirtime(void) 89 { 90 if (dirtimeslen) { 91 dirtimeslen--; 92 return &dirtimes[dirtimeslen]; 93 } 94 return NULL; 95 } 96 97 static int 98 comp(int fd, const char *tool, const char *flags) 99 { 100 int fds[2]; 101 102 if (pipe(fds) < 0) 103 eprintf("pipe:"); 104 105 switch (fork()) { 106 case -1: 107 eprintf("fork:"); 108 case 0: 109 dup2(fd, 1); 110 dup2(fds[0], 0); 111 close(fds[0]); 112 close(fds[1]); 113 114 execlp(tool, tool, flags, NULL); 115 weprintf("execlp %s:", tool); 116 _exit(1); 117 } 118 close(fds[0]); 119 return fds[1]; 120 } 121 122 static int 123 decomp(int fd, const char *tool, const char *flags) 124 { 125 int fds[2]; 126 127 if (pipe(fds) < 0) 128 eprintf("pipe:"); 129 130 switch (fork()) { 131 case -1: 132 eprintf("fork:"); 133 case 0: 134 dup2(fd, 0); 135 dup2(fds[1], 1); 136 close(fds[0]); 137 close(fds[1]); 138 139 execlp(tool, tool, flags, NULL); 140 weprintf("execlp %s:", tool); 141 _exit(1); 142 } 143 close(fds[1]); 144 return fds[0]; 145 } 146 147 static ssize_t 148 eread(int fd, void *buf, size_t n) 149 { 150 ssize_t r; 151 152 again: 153 r = read(fd, buf, n); 154 if (r < 0) { 155 if (errno == EINTR) 156 goto again; 157 eprintf("read:"); 158 } 159 return r; 160 } 161 162 static ssize_t 163 ewrite(int fd, const void *buf, size_t n) 164 { 165 ssize_t r; 166 167 if ((r = write(fd, buf, n)) != n) 168 eprintf("write:"); 169 return r; 170 } 171 172 static void 173 putoctal(char *dst, unsigned num, int size) 174 { 175 if (snprintf(dst, size, "%.*o", size - 1, num) >= size) 176 eprintf("snprintf: input number too large\n"); 177 } 178 179 static int 180 archive(const char *path) 181 { 182 char b[BLKSIZ]; 183 struct group *gr; 184 struct header *h; 185 struct passwd *pw; 186 struct stat st; 187 size_t chksum, i; 188 ssize_t l, r; 189 int fd = -1; 190 191 if (lstat(path, &st) < 0) { 192 weprintf("lstat %s:", path); 193 return 0; 194 } else if (st.st_ino == tarinode && st.st_dev == tardev) { 195 weprintf("ignoring %s\n", path); 196 return 0; 197 } 198 199 pw = getpwuid(st.st_uid); 200 gr = getgrgid(st.st_gid); 201 202 h = (struct header *)b; 203 memset(b, 0, sizeof(b)); 204 205 if (strlen(path) > 255) { 206 const char *reason = "path exceeds 255 character limit"; 207 eprintf("malformed tar archive: %s\n", reason); 208 } else if (strlen(path) >= 100) { 209 size_t prefix_len = 155; 210 const char *last_slash = strrchr(path, '/'); 211 212 if (last_slash && last_slash < path + prefix_len) { 213 prefix_len = last_slash - path + 1; 214 } 215 216 /* strlcpy is fine here - for path ONLY -, 217 * since we're splitting the path. 218 * It's not an issue if the prefix can't hold 219 * the full path — name will take the rest. */ 220 strlcpy(h->prefix, path, prefix_len); 221 estrlcpy(h->name, path + prefix_len, sizeof(h->name)); 222 } else { 223 estrlcpy(h->name, path, sizeof(h->name)); 224 } 225 226 putoctal(h->mode, (unsigned)st.st_mode & 0777, sizeof(h->mode)); 227 putoctal(h->uid, (unsigned)st.st_uid, sizeof(h->uid)); 228 putoctal(h->gid, (unsigned)st.st_gid, sizeof(h->gid)); 229 putoctal(h->size, 0, sizeof(h->size)); 230 putoctal(h->mtime, (unsigned)st.st_mtime, sizeof(h->mtime)); 231 memcpy( h->magic, "ustar", sizeof(h->magic)); 232 memcpy( h->version, "00", sizeof(h->version)); 233 estrlcpy(h->uname, pw ? pw->pw_name : "", sizeof(h->uname)); 234 estrlcpy(h->gname, gr ? gr->gr_name : "", sizeof(h->gname)); 235 236 if (S_ISREG(st.st_mode)) { 237 h->type = REG; 238 putoctal(h->size, (unsigned)st.st_size, sizeof(h->size)); 239 fd = open(path, O_RDONLY); 240 if (fd < 0) 241 eprintf("open %s:", path); 242 } else if (S_ISDIR(st.st_mode)) { 243 h->type = DIRECTORY; 244 } else if (S_ISLNK(st.st_mode)) { 245 h->type = SYMLINK; 246 if ((r = readlink(path, h->linkname, sizeof(h->linkname) - 1)) < 0) 247 eprintf("readlink %s:", path); 248 h->linkname[r] = '\0'; 249 } else if (S_ISCHR(st.st_mode) || S_ISBLK(st.st_mode)) { 250 h->type = S_ISCHR(st.st_mode) ? CHARDEV : BLOCKDEV; 251 putoctal(h->major, (unsigned)major(st.st_dev), sizeof(h->major)); 252 putoctal(h->minor, (unsigned)minor(st.st_dev), sizeof(h->minor)); 253 } else if (S_ISFIFO(st.st_mode)) { 254 h->type = FIFO; 255 } 256 257 memset(h->chksum, ' ', sizeof(h->chksum)); 258 for (i = 0, chksum = 0; i < sizeof(*h); i++) 259 chksum += (unsigned char)b[i]; 260 putoctal(h->chksum, chksum, sizeof(h->chksum)); 261 ewrite(tarfd, b, BLKSIZ); 262 263 if (fd != -1) { 264 while ((l = eread(fd, b, BLKSIZ)) > 0) { 265 if (l < BLKSIZ) 266 memset(b + l, 0, BLKSIZ - l); 267 ewrite(tarfd, b, BLKSIZ); 268 } 269 close(fd); 270 } 271 272 return 0; 273 } 274 275 static int 276 unarchive(char *fname, ssize_t l, char b[BLKSIZ]) 277 { 278 char lname[101], *tmp, *p; 279 long mode, major, minor, type, mtime, uid, gid; 280 struct header *h = (struct header *)b; 281 int fd = -1; 282 struct timespec times[2]; 283 284 if (!mflag && ((mtime = strtol(h->mtime, &p, 8)) < 0 || *p != '\0')) 285 eprintf("strtol %s: invalid number\n", h->mtime); 286 if (remove(fname) < 0 && errno != ENOENT) 287 weprintf("remove %s:", fname); 288 289 tmp = estrdup(fname); 290 mkdirp(dirname(tmp), 0777, 0777); 291 free(tmp); 292 293 switch (h->type) { 294 case REG: 295 case AREG: 296 case RESERVED: 297 if ((mode = strtol(h->mode, &p, 8)) < 0 || *p != '\0') 298 eprintf("strtol %s: invalid number\n", h->mode); 299 fd = open(fname, O_WRONLY | O_TRUNC | O_CREAT, 0600); 300 if (fd < 0) 301 eprintf("open %s:", fname); 302 break; 303 case HARDLINK: 304 case SYMLINK: 305 snprintf(lname, sizeof(lname), "%.*s", (int)sizeof(h->linkname), 306 h->linkname); 307 if (((h->type == HARDLINK) ? link : symlink)(lname, fname) < 0) 308 eprintf("%s %s -> %s:", 309 (h->type == HARDLINK) ? "link" : "symlink", 310 fname, lname); 311 break; 312 case DIRECTORY: 313 if ((mode = strtol(h->mode, &p, 8)) < 0 || *p != '\0') 314 eprintf("strtol %s: invalid number\n", h->mode); 315 if (mkdir(fname, (mode_t)mode) < 0 && errno != EEXIST) 316 eprintf("mkdir %s:", fname); 317 pushdirtime(fname, mtime); 318 break; 319 case CHARDEV: 320 case BLOCKDEV: 321 if ((mode = strtol(h->mode, &p, 8)) < 0 || *p != '\0') 322 eprintf("strtol %s: invalid number\n", h->mode); 323 if ((major = strtol(h->major, &p, 8)) < 0 || *p != '\0') 324 eprintf("strtol %s: invalid number\n", h->major); 325 if ((minor = strtol(h->minor, &p, 8)) < 0 || *p != '\0') 326 eprintf("strtol %s: invalid number\n", h->minor); 327 type = (h->type == CHARDEV) ? S_IFCHR : S_IFBLK; 328 if (mknod(fname, type | mode, makedev(major, minor)) < 0) 329 eprintf("mknod %s:", fname); 330 break; 331 case FIFO: 332 if ((mode = strtol(h->mode, &p, 8)) < 0 || *p != '\0') 333 eprintf("strtol %s: invalid number\n", h->mode); 334 if (mknod(fname, S_IFIFO | mode, 0) < 0) 335 eprintf("mknod %s:", fname); 336 break; 337 default: 338 eprintf("unsupported tar-filetype %c\n", h->type); 339 } 340 341 if ((uid = strtol(h->uid, &p, 8)) < 0 || *p != '\0') 342 eprintf("strtol %s: invalid number\n", h->uid); 343 if ((gid = strtol(h->gid, &p, 8)) < 0 || *p != '\0') 344 eprintf("strtol %s: invalid number\n", h->gid); 345 346 if (fd != -1) { 347 for (; l > 0; l -= BLKSIZ) 348 if (eread(tarfd, b, BLKSIZ) > 0) 349 ewrite(fd, b, MIN(l, BLKSIZ)); 350 close(fd); 351 } 352 353 if (h->type == HARDLINK) 354 return 0; 355 356 times[0].tv_sec = times[1].tv_sec = mtime; 357 times[0].tv_nsec = times[1].tv_nsec = 0; 358 if (!mflag && utimensat(AT_FDCWD, fname, times, AT_SYMLINK_NOFOLLOW) < 0) 359 weprintf("utimensat %s:", fname); 360 if (h->type == SYMLINK) { 361 if (!getuid() && lchown(fname, uid, gid)) 362 weprintf("lchown %s:", fname); 363 } else { 364 if (!getuid() && chown(fname, uid, gid)) 365 weprintf("chown %s:", fname); 366 if (chmod(fname, mode) < 0) 367 eprintf("fchmod %s:", fname); 368 } 369 370 return 0; 371 } 372 373 static void 374 skipblk(ssize_t l) 375 { 376 char b[BLKSIZ]; 377 378 for (; l > 0; l -= BLKSIZ) 379 if (!eread(tarfd, b, BLKSIZ)) 380 break; 381 } 382 383 static int 384 print(char *fname, ssize_t l, char b[BLKSIZ]) 385 { 386 puts(fname); 387 skipblk(l); 388 return 0; 389 } 390 391 static void 392 c(int dirfd, const char *name, struct stat *st, void *data, struct recursor *r) 393 { 394 archive(r->path); 395 if (vflag) 396 puts(r->path); 397 398 if (S_ISDIR(st->st_mode)) 399 recurse(dirfd, name, NULL, r); 400 } 401 402 static void 403 sanitize(struct header *h) 404 { 405 size_t i, j; 406 struct { 407 char *f; 408 size_t l; 409 } fields[] = { 410 { h->mode, sizeof(h->mode) }, 411 { h->uid, sizeof(h->uid) }, 412 { h->gid, sizeof(h->gid) }, 413 { h->size, sizeof(h->size) }, 414 { h->mtime, sizeof(h->mtime) }, 415 { h->chksum, sizeof(h->chksum) }, 416 { h->major, sizeof(h->major) }, 417 { h->minor, sizeof(h->minor) } 418 }; 419 420 /* Numeric fields can be terminated with spaces instead of 421 * NULs as per the ustar specification. Patch all of them to 422 * use NULs so we can perform string operations on them. */ 423 for (i = 0; i < LEN(fields); i++){ 424 for (j = 0; j < fields[i].l && fields[i].f[j] == ' '; j++); 425 for (; j < fields[i].l; j++) 426 if (fields[i].f[j] == ' ') 427 fields[i].f[j] = '\0'; 428 } 429 } 430 431 static void 432 chktar(struct header *h) 433 { 434 char tmp[8], *err, *p = (char *)h; 435 const char *reason; 436 long s1, s2, i; 437 438 if (h->prefix[0] == '\0' && h->name[0] == '\0') { 439 reason = "empty filename"; 440 goto bad; 441 } 442 if (h->magic[0] && strncmp("ustar", h->magic, 5)) { 443 reason = "not ustar format"; 444 goto bad; 445 } 446 memcpy(tmp, h->chksum, sizeof(tmp)); 447 for (i = 0; i < sizeof(tmp) && tmp[i] == ' '; i++); 448 for (; i < sizeof(tmp); i++) 449 if (tmp[i] == ' ') 450 tmp[i] = '\0'; 451 s1 = strtol(tmp, &err, 8); 452 if (s1 < 0 || *err != '\0') { 453 reason = "invalid checksum"; 454 goto bad; 455 } 456 memset(h->chksum, ' ', sizeof(h->chksum)); 457 for (i = 0, s2 = 0; i < sizeof(*h); i++) 458 s2 += (unsigned char)p[i]; 459 if (s1 != s2) { 460 reason = "incorrect checksum"; 461 goto bad; 462 } 463 memcpy(h->chksum, tmp, sizeof(h->chksum)); 464 return; 465 bad: 466 eprintf("malformed tar archive: %s\n", reason); 467 } 468 469 static void 470 xt(int argc, char *argv[], int mode) 471 { 472 char b[BLKSIZ], fname[256 + 1], *p; 473 struct timespec times[2]; 474 struct header *h = (struct header *)b; 475 struct dirtime *dirtime; 476 long size; 477 int i, n; 478 int (*fn)(char *, ssize_t, char[BLKSIZ]) = (mode == 'x') ? unarchive : print; 479 480 while (eread(tarfd, b, BLKSIZ) > 0 && (h->name[0] || h->prefix[0])) { 481 chktar(h); 482 sanitize(h), n = 0; 483 484 /* small dance around non-null terminated fields */ 485 if (h->prefix[0]) 486 n = snprintf(fname, sizeof(fname), "%.*s/", 487 (int)sizeof(h->prefix), h->prefix); 488 snprintf(fname + n, sizeof(fname) - n, "%.*s", 489 (int)sizeof(h->name), h->name); 490 491 if ((size = strtol(h->size, &p, 8)) < 0 || *p != '\0') 492 eprintf("strtol %s: invalid number\n", h->size); 493 494 if (argc) { 495 /* only extract the given files */ 496 for (i = 0; i < argc; i++) 497 if (!strcmp(argv[i], fname)) 498 break; 499 if (i == argc) { 500 skipblk(size); 501 continue; 502 } 503 } 504 505 /* ignore global pax header craziness */ 506 if (h->type == 'g' || h->type == 'x') { 507 skipblk(size); 508 continue; 509 } 510 511 fn(fname, size, b); 512 if (vflag && mode != 't') 513 puts(fname); 514 } 515 516 if (mode == 'x' && !mflag) { 517 while ((dirtime = popdirtime())) { 518 times[0].tv_sec = times[1].tv_sec = dirtime->mtime; 519 times[0].tv_nsec = times[1].tv_nsec = 0; 520 if (utimensat(AT_FDCWD, dirtime->name, times, 0) < 0) 521 eprintf("utimensat %s:", fname); 522 free(dirtime->name); 523 } 524 free(dirtimes); 525 dirtimes = NULL; 526 } 527 } 528 529 static void 530 usage(void) 531 { 532 eprintf("usage: %s [-C dir] [-J | -Z | -a | -j | -z] -x [-m | -t] " 533 "[-f file] [file ...]\n" 534 " %s [-C dir] [-J | -Z | -a | -j | -z] [-h] -c path ... " 535 "[-f file]\n", argv0, argv0); 536 } 537 538 int 539 main(int argc, char *argv[]) 540 { 541 struct recursor r = { .fn = c, .follow = 'P', .flags = DIRFIRST }; 542 struct stat st; 543 char *file = NULL, *dir = ".", mode = '\0'; 544 int fd; 545 546 ARGBEGIN { 547 case 'x': 548 case 'c': 549 case 't': 550 mode = ARGC(); 551 break; 552 case 'C': 553 dir = EARGF(usage()); 554 break; 555 case 'f': 556 file = EARGF(usage()); 557 break; 558 case 'm': 559 mflag = 1; 560 break; 561 case 'J': 562 case 'Z': 563 case 'a': 564 case 'j': 565 case 'z': 566 filtermode = ARGC(); 567 filtertool = filtertools[filtermode]; 568 break; 569 case 'h': 570 r.follow = 'L'; 571 break; 572 case 'v': 573 vflag = 1; 574 break; 575 default: 576 usage(); 577 } ARGEND 578 579 if (!mode) 580 usage(); 581 if (mode == 'c') 582 if (!argc) 583 usage(); 584 585 switch (mode) { 586 case 'c': 587 tarfd = 1; 588 if (file && *file != '-') { 589 tarfd = open(file, O_WRONLY | O_TRUNC | O_CREAT, 0644); 590 if (tarfd < 0) 591 eprintf("open %s:", file); 592 if (lstat(file, &st) < 0) 593 eprintf("lstat %s:", file); 594 tarinode = st.st_ino; 595 tardev = st.st_dev; 596 } 597 598 if (filtertool) 599 tarfd = comp(tarfd, filtertool, "-cf"); 600 601 if (chdir(dir) < 0) 602 eprintf("chdir %s:", dir); 603 for (; *argv; argc--, argv++) 604 recurse(AT_FDCWD, *argv, NULL, &r); 605 break; 606 case 't': 607 case 'x': 608 tarfd = 0; 609 if (file && *file != '-') { 610 tarfd = open(file, O_RDONLY); 611 if (tarfd < 0) 612 eprintf("open %s:", file); 613 } 614 615 if (filtertool) { 616 fd = tarfd; 617 tarfd = decomp(tarfd, filtertool, "-cdf"); 618 close(fd); 619 } 620 621 if (chdir(dir) < 0) 622 eprintf("chdir %s:", dir); 623 xt(argc, argv, mode); 624 break; 625 } 626 627 return recurse_status; 628 }