1: #if !defined(lint) && defined(DOSCCS) 2: static char *sccsid = "@(#)ptx.c 4.2.1 (2.11BSD) 1996/10/23"; 3: #endif 4: 5: /* permuted title index 6: ptx [-t] [-i ignore] [-o only] [-w num] [-f] [input] [output] 7: Ptx reads the input file and permutes on words in it. 8: It excludes all words in the ignore file. 9: Alternately it includes words in the only file. 10: if neither is given it excludes the words in /usr/share/misc/eign. 11: 12: The width of the output line can be changed to num 13: characters. If omitted 72 is default unless troff than 100. 14: the -f flag tells the program to fold the output 15: the -t flag says the output is for troff and the 16: output is then wider. 17: 18: make: cc ptx.c -lS 19: */ 20: 21: #include <stdio.h> 22: #include <ctype.h> 23: #include <signal.h> 24: #define DEFLTX "/usr/share/misc/eign" 25: #define TILDE 0177 26: #define SORT "/usr/bin/sort" 27: #define N 30 28: #define MAX N*BUFSIZ 29: #define LMAX 200 30: #define MAXT 2048 31: #define MASK 03777 32: #define SET 1 33: 34: #define isabreak(c) (btable[c]) 35: 36: extern char *calloc(), *mktemp(); 37: extern char *getline(); 38: int status; 39: 40: 41: char *hasht[MAXT]; 42: char line[LMAX]; 43: char btable[128]; 44: int ignore; 45: int only; 46: int llen = 72; 47: int gap = 3; 48: int gutter = 3; 49: int mlen = LMAX; 50: int wlen; 51: int rflag; 52: int halflen; 53: char *strtbufp, *endbufp; 54: char *empty = ""; 55: 56: char *infile; 57: FILE *inptr = stdin; 58: 59: char *outfile; 60: FILE *outptr = stdout; 61: 62: char *sortfile; /* output of sort program */ 63: char nofold[] = {'-', 'd', 't', TILDE, 0}; 64: char fold[] = {'-', 'd', 'f', 't', TILDE, 0}; 65: char *sortopt = nofold; 66: FILE *sortptr; 67: 68: char *bfile; /*contains user supplied break chars */ 69: FILE *bptr; 70: 71: main(argc,argv) 72: int argc; 73: char **argv; 74: { 75: register int c; 76: register char *bufp; 77: int pid; 78: char *pend; 79: extern onintr(); 80: 81: char *xfile; 82: FILE *xptr; 83: 84: if(signal(SIGHUP,onintr)==SIG_IGN) 85: signal(SIGHUP,SIG_IGN); 86: if(signal(SIGINT,onintr)==SIG_IGN) 87: signal(SIGINT,SIG_IGN); 88: signal(SIGPIPE,onintr); 89: signal(SIGTERM,onintr); 90: 91: /* argument decoding */ 92: 93: xfile = DEFLTX; 94: argv++; 95: while(argc>1 && **argv == '-') { 96: switch (*++*argv){ 97: 98: case 'r': 99: rflag++; 100: break; 101: case 'f': 102: sortopt = fold; 103: break; 104: 105: case 'w': 106: if(argc >= 2) { 107: argc--; 108: wlen++; 109: llen = atoi(*++argv); 110: if(llen == 0) 111: diag("Wrong width:",*argv); 112: if(llen > LMAX) { 113: llen = LMAX; 114: msg("Lines truncated to 200 chars.",empty); 115: } 116: break; 117: } 118: 119: case 't': 120: if(wlen == 0) 121: llen = 100; 122: break; 123: case 'g': 124: if(argc >=2) { 125: argc--; 126: gap = gutter = atoi(*++argv); 127: } 128: break; 129: 130: case 'i': 131: if(only) 132: diag("Only file already given.",empty); 133: if (argc>=2){ 134: argc--; 135: ignore++; 136: xfile = *++argv; 137: } 138: break; 139: 140: case 'o': 141: if(ignore) 142: diag("Ignore file already given",empty); 143: if (argc>=2){ 144: only++; 145: argc--; 146: xfile = *++argv; 147: } 148: break; 149: 150: case 'b': 151: if(argc>=2) { 152: argc--; 153: bfile = *++argv; 154: } 155: break; 156: 157: default: 158: msg("Illegal argument:",*argv); 159: } 160: argc--; 161: argv++; 162: } 163: 164: if(argc>3) 165: diag("Too many filenames",empty); 166: else if(argc==3){ 167: infile = *argv++; 168: outfile = *argv; 169: if((outptr = fopen(outfile,"w")) == NULL) 170: diag("Cannot open output file:",outfile); 171: } else if(argc==2) { 172: infile = *argv; 173: outfile = 0; 174: } 175: 176: 177: /* Default breaks of blank, tab and newline */ 178: btable[' '] = SET; 179: btable['\t'] = SET; 180: btable['\n'] = SET; 181: if(bfile) { 182: if((bptr = fopen(bfile,"r")) == NULL) 183: diag("Cannot open break char file",bfile); 184: 185: while((c = getc(bptr)) != EOF) 186: btable[c] = SET; 187: } 188: 189: /* Allocate space for a buffer. If only or ignore file present 190: read it into buffer. Else read in default ignore file 191: and put resulting words in buffer. 192: */ 193: 194: 195: if((strtbufp = calloc(N,BUFSIZ)) == NULL) 196: diag("Out of memory space",empty); 197: bufp = strtbufp; 198: endbufp = strtbufp+MAX; 199: 200: if((xptr = fopen(xfile,"r")) == NULL) 201: diag("Cannot open file",xfile); 202: 203: while(bufp < endbufp && (c = getc(xptr)) != EOF) { 204: if(isabreak(c)) { 205: if(storeh(hash(strtbufp,bufp),strtbufp)) 206: diag("Too many words",xfile); 207: *bufp++ = '\0'; 208: strtbufp = bufp; 209: } 210: else { 211: *bufp++ = (isupper(c)?tolower(c):c); 212: } 213: } 214: if (bufp >= endbufp) 215: diag("Too many words in file",xfile); 216: endbufp = --bufp; 217: 218: /* open output file for sorting */ 219: 220: sortfile = mktemp("/tmp/ptxsXXXXX"); 221: if((sortptr = fopen(sortfile, "w")) == NULL) 222: diag("Cannot open output for sorting:",sortfile); 223: 224: /* get a line of data and compare each word for 225: inclusion or exclusion in the sort phase 226: */ 227: 228: if (infile!=0 && (inptr = fopen(infile,"r")) == NULL) 229: diag("Cannot open data: ",infile); 230: while(pend=getline()) 231: cmpline(pend); 232: fclose(sortptr); 233: 234: switch (pid = fork()){ 235: 236: case -1: /* cannot fork */ 237: diag("Cannot fork",empty); 238: 239: case 0: /* child */ 240: execl(SORT, SORT, sortopt, "+0", "-1", "+1", 241: sortfile, "-o", sortfile, 0); 242: 243: default: /* parent */ 244: while(wait(&status) != pid); 245: } 246: 247: 248: getsort(); 249: if(*sortfile) 250: unlink(sortfile); 251: exit(0); 252: } 253: 254: msg(s,arg) 255: char *s; 256: char *arg; 257: { 258: fprintf(stderr,"%s %s\n",s,arg); 259: return; 260: } 261: diag(s,arg) 262: char *s, *arg; 263: { 264: 265: msg(s,arg); 266: exit(1); 267: } 268: 269: 270: char *getline() 271: { 272: 273: register c; 274: register char *linep; 275: char *endlinep; 276: 277: 278: endlinep= line + mlen; 279: linep = line; 280: /* Throw away leading white space */ 281: 282: while(isspace(c=getc(inptr))) 283: ; 284: if(c==EOF) 285: return(0); 286: ungetc(c,inptr); 287: while(( c=getc(inptr)) != EOF) { 288: switch (c) { 289: 290: case '\t': 291: if(linep<endlinep) 292: *linep++ = ' '; 293: break; 294: case '\n': 295: while(isspace(*--linep)); 296: *++linep = '\n'; 297: return(linep); 298: default: 299: if(linep < endlinep) 300: *linep++ = c; 301: } 302: } 303: return(0); 304: } 305: 306: cmpline(pend) 307: char *pend; 308: { 309: 310: char *pstrt, *pchar, *cp; 311: char **hp; 312: int flag; 313: 314: pchar = line; 315: if(rflag) 316: while(pchar<pend&&!isspace(*pchar)) 317: pchar++; 318: while(pchar<pend){ 319: /* eliminate white space */ 320: if(isabreak(*pchar++)) 321: continue; 322: pstrt = --pchar; 323: 324: flag = 1; 325: while(flag){ 326: if(isabreak(*pchar)) { 327: hp = &hasht[hash(pstrt,pchar)]; 328: pchar--; 329: while(cp = *hp++){ 330: if(hp == &hasht[MAXT]) 331: hp = hasht; 332: /* possible match */ 333: if(cmpword(pstrt,pchar,cp)){ 334: /* exact match */ 335: if(!ignore && only) 336: putline(pstrt,pend); 337: flag = 0; 338: break; 339: } 340: } 341: /* no match */ 342: if(flag){ 343: if(ignore || !only) 344: putline(pstrt,pend); 345: flag = 0; 346: } 347: } 348: pchar++; 349: } 350: } 351: } 352: 353: cmpword(cpp,pend,hpp) 354: char *cpp, *pend, *hpp; 355: { 356: char c; 357: 358: while(*hpp != '\0'){ 359: c = *cpp++; 360: if((isupper(c)?tolower(c):c) != *hpp++) 361: return(0); 362: } 363: if(--cpp == pend) return(1); 364: return(0); 365: } 366: 367: putline(strt, end) 368: char *strt, *end; 369: { 370: char *cp; 371: 372: for(cp=strt; cp<end; cp++) 373: putc(*cp, sortptr); 374: /* Add extra blank before TILDE to sort correctly 375: with -fd option */ 376: putc(' ',sortptr); 377: putc(TILDE,sortptr); 378: for (cp=line; cp<strt; cp++) 379: putc(*cp,sortptr); 380: putc('\n',sortptr); 381: } 382: 383: getsort() 384: { 385: register c; 386: register char *tilde, *linep, *ref; 387: char *p1a,*p1b,*p2a,*p2b,*p3a,*p3b,*p4a,*p4b; 388: int w; 389: char *rtrim(), *ltrim(); 390: 391: if((sortptr = fopen(sortfile,"r")) == NULL) 392: diag("Cannot open sorted data:",sortfile); 393: 394: halflen = (llen-gutter)/2; 395: linep = line; 396: while((c = getc(sortptr)) != EOF) { 397: switch(c) { 398: 399: case TILDE: 400: tilde = linep; 401: break; 402: 403: case '\n': 404: while(isspace(linep[-1])) 405: linep--; 406: ref = tilde; 407: if(rflag) { 408: while(ref<linep&&!isspace(*ref)) 409: ref++; 410: *ref++ = 0; 411: } 412: /* the -1 is an overly conservative test to leave 413: space for the / that signifies truncation*/ 414: p3b = rtrim(p3a=line,tilde,halflen-1); 415: if(p3b-p3a>halflen-1) 416: p3b = p3a+halflen-1; 417: p2a = ltrim(ref,p2b=linep,halflen-1); 418: if(p2b-p2a>halflen-1) 419: p2a = p2b-halflen-1; 420: p1b = rtrim(p1a=p3b+(isspace(p3b[0])!=0),tilde, 421: w=halflen-(p2b-p2a)-gap); 422: if(p1b-p1a>w) 423: p1b = p1a; 424: p4a = ltrim(ref,p4b=p2a-(isspace(p2a[-1])!=0), 425: w=halflen-(p3b-p3a)-gap); 426: if(p4b-p4a>w) 427: p4a = p4b; 428: fprintf(outptr,".xx \""); 429: putout(p1a,p1b); 430: /* tilde-1 to account for extra space before TILDE */ 431: if(p1b!=(tilde-1) && p1a!=p1b) 432: fprintf(outptr,"/"); 433: fprintf(outptr,"\" \""); 434: if(p4a==p4b && p2a!=ref && p2a!=p2b) 435: fprintf(outptr,"/"); 436: putout(p2a,p2b); 437: fprintf(outptr,"\" \""); 438: putout(p3a,p3b); 439: /* ++p3b to account for extra blank after TILDE */ 440: /* ++p3b to account for extra space before TILDE */ 441: if(p1a==p1b && ++p3b!=tilde) 442: fprintf(outptr,"/"); 443: fprintf(outptr,"\" \""); 444: if(p1a==p1b && p4a!=ref && p4a!=p4b) 445: fprintf(outptr,"/"); 446: putout(p4a,p4b); 447: if(rflag) 448: fprintf(outptr,"\" %s\n",tilde); 449: else 450: fprintf(outptr,"\"\n"); 451: linep = line; 452: break; 453: 454: case '"': 455: /* put double " for " */ 456: *linep++ = c; 457: default: 458: *linep++ = c; 459: } 460: } 461: } 462: 463: char *rtrim(a,c,d) 464: char *a,*c; 465: { 466: char *b,*x; 467: b = c; 468: for(x=a+1; x<=c&&x-a<=d; x++) 469: if((x==c||isspace(x[0]))&&!isspace(x[-1])) 470: b = x; 471: if(b<c&&!isspace(b[0])) 472: b++; 473: return(b); 474: } 475: 476: char *ltrim(c,b,d) 477: char *c,*b; 478: { 479: char *a,*x; 480: a = c; 481: for(x=b-1; x>=c&&b-x<=d; x--) 482: if(!isspace(x[0])&&(x==c||isspace(x[-1]))) 483: a = x; 484: if(a>c&&!isspace(a[-1])) 485: a--; 486: return(a); 487: } 488: 489: putout(strt,end) 490: char *strt, *end; 491: { 492: char *cp; 493: 494: cp = strt; 495: 496: for(cp=strt; cp<end; cp++) { 497: putc(*cp,outptr); 498: } 499: } 500: 501: onintr() 502: { 503: 504: if(*sortfile) 505: unlink(sortfile); 506: exit(1); 507: } 508: 509: hash(strtp,endp) 510: char *strtp, *endp; 511: { 512: char *cp, c; 513: int i, j, k; 514: 515: /* Return zero hash number for single letter words */ 516: if((endp - strtp) == 1) 517: return(0); 518: 519: cp = strtp; 520: c = *cp++; 521: i = (isupper(c)?tolower(c):c); 522: c = *cp; 523: j = (isupper(c)?tolower(c):c); 524: i = i*j; 525: cp = --endp; 526: c = *cp--; 527: k = (isupper(c)?tolower(c):c); 528: c = *cp; 529: j = (isupper(c)?tolower(c):c); 530: j = k*j; 531: 532: k = (i ^ (j>>2)) & MASK; 533: return(k); 534: } 535: 536: storeh(num,strtp) 537: int num; 538: char *strtp; 539: { 540: int i; 541: 542: for(i=num; i<MAXT; i++) { 543: if(hasht[i] == 0) { 544: hasht[i] = strtp; 545: return(0); 546: } 547: } 548: for(i=0; i<num; i++) { 549: if(hasht[i] == 0) { 550: hasht[i] = strtp; 551: return(0); 552: } 553: } 554: return(1); 555: }