1: /* 2: * Copyright (c) 1980 Regents of the University of California. 3: * All rights reserved. The Berkeley software License Agreement 4: * specifies the terms and conditions for redistribution. 5: */ 6: 7: #if !defined(lint) && defined(DOSCCS) 8: static char sccsid[] = "@(#)dumptape.c 5.5.1 (2.11BSD) 1996/2/7"; 9: #endif 10: 11: #include <sys/file.h> 12: #include "dump.h" 13: 14: char (*tblock)[DEV_BSIZE]; /* Pointer to malloc()ed buffer for tape */ 15: #define writesize (NTREC * DEV_BSIZE) /* Size of malloc()ed buffer for tape */ 16: int trecno = 0; 17: #ifdef RDUMP 18: extern char *host; 19: int rmtopen(), rmtwrite(); 20: void rmtclose(); 21: #endif RDUMP 22: extern int read(), write(); 23: 24: /* 25: * Concurrent dump mods (Caltech) - disk block reading and tape writing 26: * are exported to several slave processes. While one slave writes the 27: * tape, the others read disk blocks; they pass control of the tape in 28: * a ring via flock(). The parent process traverses the filesystem and 29: * sends spclrec()'s and lists of daddr's to the slaves via pipes. 30: */ 31: struct req { /* instruction packets sent to slaves */ 32: daddr_t dblk; 33: int count; 34: } *req; 35: #define reqsiz (NTREC * sizeof (struct req)) 36: 37: /* 38: * this is allocated here becaue malloc can all too easily fail later on. 39: * IF anymore functionality is added to 'dump' then the bitmaps will have 40: * to be moved to a file and paged. 41: */ 42: char ___xxx[reqsiz + writesize]; 43: 44: #define SLAVES 3 /* 1 slave writing, 1 reading, 1 for slack */ 45: int slavefd[SLAVES]; /* pipes from master to each slave */ 46: int slavepid[SLAVES]; /* used by killall() */ 47: int rotor; /* next slave to be instructed */ 48: int master; /* pid of master, for sending error signals */ 49: u_int tenths; /* length of tape used per block written */ 50: 51: alloctape() 52: { 53: 54: /* 55: * CDC 92181's and 92185's make 0.8" gaps in 1600-bpi start/stop mode 56: * (see DEC TU80 User's Guide). The shorter gaps of 6250-bpi require 57: * repositioning after stopping, i.e, streaming mode, where the gap is 58: * variable, 0.30" to 0.45". The gap is maximal when the tape stops. 59: */ 60: tenths = writesize/density + (density == 625 ? 5 : 8); 61: /* 62: * Allocate tape buffer contiguous with the array of instruction 63: * packets, so flusht() can write them together with one write(). 64: */ 65: req = (struct req *)___xxx; 66: if (req == NULL) 67: return(0); 68: tblock = (char (*)[DEV_BSIZE]) &req[NTREC]; 69: req = (struct req *)tblock - NTREC; 70: return(1); 71: } 72: 73: 74: taprec(dp) 75: char *dp; 76: { 77: req[trecno].dblk = (daddr_t)0; 78: req[trecno].count = 1; 79: bcopy(dp, tblock, DEV_BSIZE); tblock++; 80: /* *(union u_spcl *)(*tblock++) = *(union u_spcl *)dp; /* movc3 */ 81: trecno++; 82: spcl.c_tapea++; 83: if(trecno >= NTREC) 84: flusht(); 85: } 86: 87: tapsrec(blkno) 88: daddr_t blkno; 89: { 90: 91: if (blkno == 0) 92: return; 93: req[trecno].dblk = blkno; 94: req[trecno].count = 1; 95: trecno++; 96: spcl.c_tapea++; 97: if (trecno >= NTREC) 98: flusht(); 99: } 100: 101: int nogripe = 0; 102: 103: tperror() { 104: if (pipeout) { 105: msg("Tape write error on %s\n", tape); 106: msg("Cannot recover\n"); 107: dumpabort(); 108: /* NOTREACHED */ 109: } 110: msg("Tape write error %ld feet into tape %d\n", asize/120L, tapeno); 111: broadcast("TAPE ERROR!\n"); 112: if (!query("Do you want to restart?")) 113: dumpabort(); 114: msg("This tape will rewind. After it is rewound,\n"); 115: msg("replace the faulty tape with a new one;\n"); 116: msg("this dump volume will be rewritten.\n"); 117: killall(); 118: nogripe = 1; 119: close_rewind(); 120: Exit(X_REWRITE); 121: } 122: 123: sigpipe() 124: { 125: 126: msg("Broken pipe\n"); 127: dumpabort(); 128: } 129: 130: flusht() 131: { 132: int siz = (char *)tblock - (char *)req; 133: 134: if (atomic(write, slavefd[rotor], req, siz) != siz) { 135: perror(" DUMP: error writing command pipe"); 136: dumpabort(); 137: } 138: if (++rotor >= SLAVES) rotor = 0; 139: tblock = (char (*)[DEV_BSIZE]) &req[NTREC]; 140: trecno = 0; 141: asize += tenths; 142: blockswritten += NTREC; 143: if (!pipeout && asize > tsize) { 144: close_rewind(); 145: otape(); 146: } 147: timeest(); 148: } 149: 150: tape_rewind() 151: { 152: int f; 153: 154: if (pipeout) 155: return; 156: for (f = 0; f < SLAVES; f++) 157: close(slavefd[f]); 158: while (wait(NULL) >= 0) ; /* wait for any signals from slaves */ 159: msg("Closing %s\n", tape); 160: #ifdef RDUMP 161: if (host) { 162: rmtclose(); 163: while (rmtopen(tape, 0) < 0) 164: sleep(10); 165: rmtclose(); 166: return; 167: } 168: #endif RDUMP 169: close(to); 170: while ((f = open(tape, 0)) < 0) 171: sleep (10); 172: close(f); 173: } 174: 175: close_rewind() 176: { 177: tape_rewind(); 178: if (!nogripe) { 179: msg("Change Tapes: Mount tape #%d\n", tapeno+1); 180: broadcast("CHANGE TAPES!\7\7\n"); 181: } 182: while (!query("Is the new tape mounted and ready to go?")) 183: if (query("Do you want to abort?")) { 184: dumpabort(); 185: /*NOTREACHED*/ 186: } 187: } 188: 189: /* 190: * We implement taking and restoring checkpoints on the tape level. 191: * When each tape is opened, a new process is created by forking; this 192: * saves all of the necessary context in the parent. The child 193: * continues the dump; the parent waits around, saving the context. 194: * If the child returns X_REWRITE, then it had problems writing that tape; 195: * this causes the parent to fork again, duplicating the context, and 196: * everything continues as if nothing had happened. 197: */ 198: 199: otape() 200: { 201: int parentpid; 202: int childpid; 203: int status; 204: int waitpid; 205: int (*interrupt)() = signal(SIGINT, SIG_IGN); 206: 207: parentpid = getpid(); 208: 209: restore_check_point: 210: signal(SIGINT, interrupt); 211: /* 212: * All signals are inherited... 213: */ 214: childpid = fork(); 215: if (childpid < 0) { 216: msg("Context save fork fails in parent %d\n", parentpid); 217: Exit(X_ABORT); 218: } 219: if (childpid != 0) { 220: /* 221: * PARENT: 222: * save the context by waiting 223: * until the child doing all of the work returns. 224: * don't catch the interrupt 225: */ 226: signal(SIGINT, SIG_IGN); 227: #ifdef TDEBUG 228: msg("Tape: %d; parent process: %d child process %d\n", 229: tapeno+1, parentpid, childpid); 230: #endif TDEBUG 231: while ((waitpid = wait(&status)) != childpid) 232: msg("Parent %d waiting for child %d has another child %d return\n", 233: parentpid, childpid, waitpid); 234: if (status & 0xFF) { 235: msg("Child %d returns LOB status %o\n", 236: childpid, status&0xFF); 237: } 238: status = (status >> 8) & 0xFF; 239: #ifdef TDEBUG 240: switch(status) { 241: case X_FINOK: 242: msg("Child %d finishes X_FINOK\n", childpid); 243: break; 244: case X_ABORT: 245: msg("Child %d finishes X_ABORT\n", childpid); 246: break; 247: case X_REWRITE: 248: msg("Child %d finishes X_REWRITE\n", childpid); 249: break; 250: default: 251: msg("Child %d finishes unknown %d\n", 252: childpid, status); 253: break; 254: } 255: #endif TDEBUG 256: switch(status) { 257: case X_FINOK: 258: Exit(X_FINOK); 259: case X_ABORT: 260: Exit(X_ABORT); 261: case X_REWRITE: 262: goto restore_check_point; 263: default: 264: msg("Bad return code from dump: %d\n", status); 265: Exit(X_ABORT); 266: } 267: /*NOTREACHED*/ 268: } else { /* we are the child; just continue */ 269: #ifdef TDEBUG 270: sleep(4); /* allow time for parent's message to get out */ 271: msg("Child on Tape %d has parent %d, my pid = %d\n", 272: tapeno+1, parentpid, getpid()); 273: #endif TDEBUG 274: #ifdef RDUMP 275: while ((to = (host ? rmtopen(tape, 2) : 276: pipeout ? 1 : open(tape, O_WRONLY|O_CREAT, 0666))) < 0) 277: #else RDUMP 278: while ((to = 279: pipeout ? 1 : open(tape, O_WRONLY|O_CREAT, 0666)) < 0) 280: #endif RDUMP 281: { 282: msg("Cannot open output \"%s\".\n", tape); 283: if (!query("Do you want to retry the open?")) 284: dumpabort(); 285: } 286: 287: enslave(); /* Share open tape file descriptor with slaves */ 288: 289: asize = 0; 290: tapeno++; /* current tape sequence */ 291: newtape++; /* new tape signal */ 292: spcl.c_volume++; 293: spcl.c_type = TS_TAPE; 294: spclrec(); 295: if (tapeno > 1) 296: msg("Tape %d begins with blocks from ino %d\n", 297: tapeno, ino); 298: } 299: } 300: 301: dumpabort() 302: { 303: if (master != 0 && master != getpid()) 304: kill(master, SIGTERM); /* Signals master to call dumpabort */ 305: else { 306: killall(); 307: msg("The ENTIRE dump is aborted.\n"); 308: } 309: Exit(X_ABORT); 310: } 311: 312: Exit(status) 313: { 314: #ifdef TDEBUG 315: msg("pid = %d exits with status %d\n", getpid(), status); 316: #endif TDEBUG 317: exit(status); 318: } 319: 320: /* 321: * could use pipe() for this if flock() worked on pipes 322: */ 323: lockfile(fd) 324: int fd[2]; 325: { 326: char tmpname[20]; 327: 328: strcpy(tmpname, "/tmp/dumplockXXXXXX"); 329: mktemp(tmpname); 330: if ((fd[1] = creat(tmpname, 0400)) < 0) { 331: msg("Could not create lockfile "); 332: perror(tmpname); 333: dumpabort(); 334: } 335: if ((fd[0] = open(tmpname, 0)) < 0) { 336: msg("Could not reopen lockfile "); 337: perror(tmpname); 338: dumpabort(); 339: } 340: unlink(tmpname); 341: } 342: 343: enslave() 344: { 345: int first[2], prev[2], next[2], cmd[2]; /* file descriptors */ 346: register int i, j; 347: 348: master = getpid(); 349: signal(SIGTERM, dumpabort); /* Slave sends SIGTERM on dumpabort() */ 350: signal(SIGPIPE, sigpipe); 351: signal(SIGUSR1, tperror); /* Slave sends SIGUSR1 on tape errors */ 352: lockfile(first); 353: for (i = 0; i < SLAVES; i++) { 354: if (i == 0) { 355: prev[0] = first[1]; 356: prev[1] = first[0]; 357: } else { 358: prev[0] = next[0]; 359: prev[1] = next[1]; 360: flock(prev[1], LOCK_EX); 361: } 362: if (i < SLAVES - 1) { 363: lockfile(next); 364: } else { 365: next[0] = first[0]; 366: next[1] = first[1]; /* Last slave loops back */ 367: } 368: if (pipe(cmd) < 0 || (slavepid[i] = fork()) < 0) { 369: msg("too many slaves, %d (recompile smaller) ", i); 370: perror(""); 371: dumpabort(); 372: } 373: slavefd[i] = cmd[1]; 374: if (slavepid[i] == 0) { /* Slave starts up here */ 375: for (j = 0; j <= i; j++) 376: close(slavefd[j]); 377: signal(SIGINT, SIG_IGN); /* Master handles this */ 378: doslave(cmd[0], prev, next); 379: Exit(X_FINOK); 380: } 381: close(cmd[0]); 382: if (i > 0) { 383: close(prev[0]); 384: close(prev[1]); 385: } 386: } 387: close(first[0]); 388: close(first[1]); 389: master = 0; rotor = 0; 390: } 391: 392: killall() 393: { 394: register int i; 395: 396: for (i = 0; i < SLAVES; i++) 397: if (slavepid[i] > 0) 398: kill(slavepid[i], SIGKILL); 399: } 400: 401: /* 402: * Synchronization - each process has a lockfile, and shares file 403: * descriptors to the following process's lockfile. When our write 404: * completes, we release our lock on the following process's lock- 405: * file, allowing the following process to lock it and proceed. We 406: * get the lock back for the next cycle by swapping descriptors. 407: */ 408: doslave(cmd, prev, next) 409: register int cmd, prev[2], next[2]; 410: { 411: register int nread, toggle = 0; 412: int nwrite; 413: 414: close(fi); 415: if ((fi = open(disk, 0)) < 0) { /* Need our own seek pointer */ 416: perror(" DUMP: slave couldn't reopen disk"); 417: dumpabort(); 418: } 419: /* 420: * Get list of blocks to dump, read the blocks into tape buffer 421: */ 422: while ((nread = atomic(read, cmd, req, reqsiz)) == reqsiz) { 423: register struct req *p = req; 424: for (trecno = 0; trecno < NTREC; trecno += p->count, p += p->count) { 425: if (p->dblk) { 426: bread(p->dblk, tblock[trecno], 427: p->count * DEV_BSIZE); 428: } else { 429: if (p->count != 1 || atomic(read, cmd, 430: tblock[trecno], DEV_BSIZE) != DEV_BSIZE) { 431: msg("Master/slave protocol botched.\n"); 432: dumpabort(); 433: } 434: } 435: } 436: flock(prev[toggle], LOCK_EX); /* Wait our turn */ 437: 438: #ifdef RDUMP 439: if ((nwrite = (host ? rmtwrite(tblock[0], writesize) 440: : write(to, tblock[0], writesize))) != writesize) { 441: #else RDUMP 442: if ((nwrite = write(to, tblock[0], writesize)) 443: != writesize) { 444: #endif RDUMP 445: if (nwrite == -1) 446: perror("write"); 447: else 448: msg("short write: got %d instead of %d\n", 449: nwrite, writesize); 450: kill(master, SIGUSR1); 451: for (;;) 452: sigpause(0L); 453: } 454: toggle ^= 1; 455: flock(next[toggle], LOCK_UN); /* Next slave's turn */ 456: } /* Also jolts him awake */ 457: if (nread != 0) { 458: perror(" DUMP: error reading command pipe"); 459: dumpabort(); 460: } 461: } 462: 463: /* 464: * Since a read from a pipe may not return all we asked for, 465: * or a write may not write all we ask if we get a signal, 466: * loop until the count is satisfied (or error). 467: */ 468: atomic(func, fd, buf, count) 469: int (*func)(), fd, count; 470: char *buf; 471: { 472: int got, need = count; 473: 474: while ((got = (*func)(fd, buf, need)) > 0 && (need -= got) > 0) 475: buf += got; 476: return (got < 0 ? got : count - need); 477: }