1: #ifndef lint 2: static char sccsid[] = "@(#)diction.c 4.2 (Berkeley) 82/11/06"; 3: #endif not lint 4: 5: /* 6: * diction -- print all sentences containing one of default phrases 7: * 8: * status returns: 9: * 0 - ok, and some matches 10: * 1 - ok, but no matches 11: * 2 - some error 12: */ 13: 14: #include <stdio.h> 15: #include <ctype.h> 16: 17: #define MAXSIZ 6500 18: #define QSIZE 650 19: int linemsg; 20: long olcount; 21: long lcount; 22: struct words { 23: char inp; 24: char out; 25: struct words *nst; 26: struct words *link; 27: struct words *fail; 28: } w[MAXSIZ], *smax, *q; 29: 30: char table[128] = { 31: 0, 0, 0, 0, 0, 0, 0, 0, 32: 0, 0, ' ', 0, 0, 0, 0, 0, 33: 0, 0, 0, 0, 0, 0, 0, 0, 34: 0, 0, 0, 0, 0, 0, 0, 0, 35: ' ', '.', ' ', ' ', ' ', ' ', ' ', ' ', 36: ' ', ' ', ' ', ' ', ' ', ' ', '.', ' ', 37: '0', '1', '2', '3', '4', '5', '6', '7', 38: '8', '9', ' ', ' ', ' ', ' ', ' ', '.', 39: ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 40: 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 41: 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 42: 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ', 43: ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 44: 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 45: 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 46: 'x', 'y', 'z', ' ', ' ', ' ', ' ', ' ' 47: }; 48: int caps = 0; 49: int lineno = 0; 50: int fflag; 51: int nflag = 1; /*use default file*/ 52: char *filename; 53: int mflg = 0; /*don't catch output*/ 54: int nfile; 55: int nsucc; 56: long nsent = 0; 57: long nhits = 0; 58: char *nlp; 59: char *begp, *endp; 60: int beg, last; 61: char *myst; 62: int myct = 0; 63: int oct = 0; 64: FILE *wordf; 65: FILE *mine; 66: char *argptr; 67: long tl = 0; 68: long th = 0; 69: 70: main(argc, argv) 71: char *argv[]; 72: { 73: int sv; 74: while (--argc > 0 && (++argv)[0][0]=='-') 75: switch (argv[0][1]) { 76: 77: case 'f': 78: fflag++; 79: filename = (++argv)[0]; 80: argc--; 81: continue; 82: 83: case 'n': 84: nflag = 0; 85: continue; 86: case 'd': 87: mflg=0; 88: continue; 89: case 'c': 90: caps++; 91: continue; 92: case 'l': 93: lineno++; 94: continue; 95: default: 96: fprintf(stderr, "diction: unknown flag\n"); 97: continue; 98: } 99: out: 100: if(nflag){ 101: wordf = fopen(DICT,"r"); 102: if(wordf == NULL){ 103: fprintf(stderr,"diction: can't open default dictionary\n"); 104: exit(2); 105: } 106: } 107: else { 108: wordf = fopen(filename,"r"); 109: if(wordf == NULL){ 110: fprintf(stderr,"diction: can't open %s\n",filename); 111: exit(2); 112: } 113: } 114: 115: #ifdef CATCH 116: if(fopen(CATCH,"r") != NULL) 117: if((mine=fopen(CATCH,"a"))!=NULL)mflg=1; 118: #endif 119: #ifdef MACS 120: if(caps){ 121: printf(".so "); 122: printf(MACS); 123: printf("\n"); 124: } 125: #endif 126: cgotofn(); 127: cfail(); 128: nfile = argc; 129: if (argc<=0) { 130: execute((char *)NULL); 131: } 132: else while (--argc >= 0) { 133: execute(*argv); 134: if(lineno){ 135: printf("file %s: number of lines %ld number of phrases found %ld\n", 136: *argv, lcount-1, nhits); 137: tl += lcount-1; 138: th += nhits; 139: sv = lcount-1; 140: lcount = nhits = 0; 141: } 142: argv++; 143: } 144: if(mflg)fprintf(mine,"number of sentences %ld %ld number of hits %ld %ld\n",nsent,tl,nhits,th); 145: if(!caps&& !lineno)printf("number of sentences %ld number of phrases found %ld\n",nsent,nhits); 146: else if(tl != sv) 147: if(!caps)printf("totals: number of lines %ld number of phrases found %ld\n",tl,th); 148: exit(nsucc == 0); 149: } 150: 151: execute(file) 152: char *file; 153: { 154: register char *p; 155: register struct words *c; 156: register ccount; 157: int count1; 158: char *beg1; 159: struct words *savc; 160: char *savp; 161: int savct; 162: int scr; 163: char buf[1024]; 164: int f; 165: int hit; 166: last = 0; 167: if (file) { 168: if ((f = open(file, 0)) < 0) { 169: fprintf(stderr, "diction: can't open %s\n", file); 170: exit(2); 171: } 172: } 173: else f = 0; 174: lcount = olcount = 1; 175: linemsg = 1; 176: ccount = 0; 177: count1 = -1; 178: p = buf; 179: nlp = p; 180: c = w; 181: oct = hit = 0; 182: savc = (struct words *) 0; 183: savp = (char *) 0; 184: for (;;) { 185: if(--ccount <= 0) { 186: if (p == &buf[1024]) p = buf; 187: if (p > &buf[512]) { 188: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 189: } 190: else if ((ccount = read(f, p, 512)) <= 0) break; 191: if(caps && (count1 > 0)) 192: fwrite(beg1,sizeof(*beg1),count1,stdout); 193: count1 = ccount; 194: beg1 = p; 195: } 196: if(p == &buf[1024])p=buf; 197: nstate: 198: if (c->inp == table[*p]) { 199: c = c->nst; 200: } 201: else if (c->link != 0) { 202: c = c->link; 203: goto nstate; 204: } 205: else { 206: if(savp != 0){ 207: c=savc; 208: p=savp; 209: if(ccount > savct)ccount += savct; 210: else ccount = savct; 211: savc = (struct words *) 0; 212: savp = (char *) 0; 213: goto hadone; 214: } 215: c = c->fail; 216: if (c==0) { 217: c = w; 218: istate: 219: if (c->inp == table[*p]) { 220: c = c->nst; 221: } 222: else if (c->link != 0) { 223: c = c->link; 224: goto istate; 225: } 226: } 227: else goto nstate; 228: } 229: if(c->out){ 230: if((c->inp == table[*(p+1)]) && (c->nst != 0)){ 231: savp=p; 232: savc=c; 233: savct=ccount; 234: goto cont; 235: } 236: else if(c->link != 0){ 237: savc=c; 238: while((savc=savc->link)!= 0){ 239: if(savc->inp == table[*(p+1)]){ 240: savp=p; 241: savc=c; 242: savct=ccount; 243: goto cont; 244: } 245: } 246: } 247: hadone: 248: savc = (struct words *) 0; 249: savp = (char *) 0; 250: if(c->out == (char)(0377)){ 251: c=w; 252: goto nstate; 253: } 254: begp = p - (c->out); 255: if(begp < &buf[0])begp = &buf[1024] - (&buf[0]-begp); 256: endp=p; 257: if(mflg){ 258: if(begp-20 < &buf[0]){ 259: myst = &buf[1024]-20; 260: if(nlp < &buf[512])myst=nlp; 261: } 262: else myst = begp-20; 263: if(myst < nlp)myst = nlp; 264: beg = 0; 265: } 266: hit = 1; 267: nhits++; 268: if(*p == '\n')lcount++; 269: if (table[*p++] == '.') { 270: linemsg = 1; 271: if (--ccount <= 0) { 272: if (p == &buf[1024]) p = buf; 273: if (p > &buf[512]) { 274: if ((ccount = read(f, p, &buf[1024] - p)) <= 0) break; 275: } 276: else if ((ccount = read(f, p, 512)) <= 0) break; 277: if(caps && (count1 > 0)) 278: fwrite(beg1,sizeof(*beg1),count1,stdout); 279: count1=ccount; 280: beg1=p; 281: } 282: } 283: succeed: nsucc = 1; 284: { 285: if (p <= nlp) { 286: outc(&buf[1024],file); 287: nlp = buf; 288: } 289: outc(p,file); 290: } 291: if(mflg)last=1; 292: nomatch: 293: nlp = p; 294: c = w; 295: begp = endp = 0; 296: continue; 297: } 298: cont: 299: if(*p == '\n')lcount++; 300: if (table[*p++] == '.'){ 301: if(hit){ 302: if(p <= nlp){ 303: outc(&buf[1024],file); 304: nlp = buf; 305: } 306: outc(p,file); 307: if(!caps)printf("\n\n"); 308: if(mflg && last){putc('\n',mine);myct = 0;} 309: } 310: linemsg = 1; 311: if(*p == '\n')olcount = lcount+1; 312: else 313: olcount=lcount; 314: last = 0; 315: hit = 0; 316: oct = 0; 317: nlp = p; 318: c = w; 319: begp = endp = 0; 320: nsent++; 321: } 322: } 323: if(caps && (count1 > 0)) 324: fwrite(beg1,sizeof(*beg1),count1,stdout); 325: close(f); 326: } 327: 328: getargc() 329: { 330: register c; 331: if (wordf){ 332: if((c=getc(wordf))==EOF){ 333: fclose(wordf); 334: if(nflag && fflag){ 335: nflag=0; 336: wordf=fopen(filename,"r"); 337: if(wordf == NULL){ 338: fprintf("diction can't open %s\n",filename); 339: exit(2); 340: } 341: return(getc(wordf)); 342: } 343: else return(EOF); 344: } 345: else return(c); 346: } 347: if ((c = *argptr++) == '\0') 348: return(EOF); 349: return(c); 350: } 351: 352: cgotofn() { 353: register c; 354: register struct words *s; 355: register ct; 356: int neg; 357: 358: s = smax = w; 359: neg = ct = 0; 360: nword: for(;;) { 361: c = getargc(); 362: if(c == '~'){ 363: neg++; 364: c = getargc(); 365: } 366: if (c==EOF) 367: return; 368: if (c == '\n') { 369: if(neg)s->out = 0377; 370: else s->out = ct-1; 371: neg = ct = 0; 372: s = w; 373: } else { 374: loop: if (s->inp == c) { 375: s = s->nst; 376: ct++; 377: continue; 378: } 379: if (s->inp == 0) goto enter; 380: if (s->link == 0) { 381: if (smax >= &w[MAXSIZ - 1]) overflo(); 382: s->link = ++smax; 383: s = smax; 384: goto enter; 385: } 386: s = s->link; 387: goto loop; 388: } 389: } 390: 391: enter: 392: do { 393: s->inp = c; 394: ct++; 395: if (smax >= &w[MAXSIZ - 1]) overflo(); 396: s->nst = ++smax; 397: s = smax; 398: } while ((c = getargc()) != '\n' && c!=EOF); 399: if(neg)smax->out = 0377; 400: else smax->out = ct-1; 401: neg = ct = 0; 402: s = w; 403: if (c != EOF) 404: goto nword; 405: } 406: 407: overflo() { 408: fprintf(stderr, "wordlist too large\n"); 409: exit(2); 410: } 411: cfail() { 412: struct words *queue[QSIZE]; 413: struct words **front, **rear; 414: struct words *state; 415: int bstart; 416: register char c; 417: register struct words *s; 418: s = w; 419: front = rear = queue; 420: init: if ((s->inp) != 0) { 421: *rear++ = s->nst; 422: if (rear >= &queue[QSIZE - 1]) overflo(); 423: } 424: if ((s = s->link) != 0) { 425: goto init; 426: } 427: 428: while (rear!=front) { 429: s = *front; 430: if (front == &queue[QSIZE-1]) 431: front = queue; 432: else front++; 433: cloop: if ((c = s->inp) != 0) { 434: bstart=0; 435: *rear = (q = s->nst); 436: if (front < rear) 437: if (rear >= &queue[QSIZE-1]) 438: if (front == queue) overflo(); 439: else rear = queue; 440: else rear++; 441: else 442: if (++rear == front) overflo(); 443: state = s->fail; 444: floop: if (state == 0){ state = w;bstart=1;} 445: if (state->inp == c) { 446: qloop: q->fail = state->nst; 447: if ((state->nst)->out != 0 && q->out == 0) q->out = (state->nst)->out; 448: if((q=q->link) != 0)goto qloop; 449: } 450: else if ((state = state->link) != 0) 451: goto floop; 452: else if(bstart==0){state=0; goto floop;} 453: } 454: if ((s = s->link) != 0) 455: goto cloop; 456: } 457: /* for(s=w;s<=smax;s++) 458: printf("s %d ch %c out %d nst %d link %d fail %d\n",s, 459: s->inp,s->out,s->nst,s->link,s->fail); 460: */ 461: } 462: outc(addr,file) 463: char *addr; 464: char *file; 465: { 466: int inside; 467: 468: inside = 0; 469: if(!caps && lineno && linemsg){ 470: printf("beginning line %ld",olcount); 471: if(file != (char *)NULL)printf(" %s\n",file); 472: else printf("\n"); 473: linemsg = 0; 474: } 475: while(nlp < addr){ 476: if(!caps && oct > 60 && table[*nlp] == ' ' && nlp != begp && nlp != endp){ 477: oct=0; 478: putchar('\n'); 479: } 480: if(nlp == begp){ 481: if(caps)inside++; 482: else { 483: if( oct >45){putchar('\n'); 484: oct=0; 485: } 486: if( oct==0 || table[*nlp] != ' '){ 487: printf("*["); 488: oct+=2; 489: } 490: else {printf(" *[");; 491: oct+=3; 492: } 493: } 494: if(mflg)putc('[',mine); 495: } 496: if(inside){ 497: if(islower(*nlp))*nlp = toupper(*nlp); 498: } 499: else { 500: if(!caps && *nlp == '\n')*nlp = ' '; 501: if(*nlp == ' ' && oct==0); 502: else if(!caps) {putchar(*nlp); oct++;} 503: } 504: if(nlp == endp){ 505: if(caps) 506: inside= 0; 507: else { 508: if(*(nlp) != ' '){printf("]*"); 509: oct+=2; 510: } 511: else {printf("]* "); 512: oct+=3; 513: } 514: if(oct >60){putchar('\n'); 515: oct=0; 516: } 517: } 518: if(mflg)putc(']',mine); 519: beg = 0; 520: } 521: if(mflg){ 522: if(nlp == myst)beg = 1; 523: if(beg || last){ 524: putc(*nlp,mine); 525: if(myct++ >= 72 || last == 20){ 526: putc('\n',mine); 527: if(last == 20)last=myct=0; 528: else myct=0; 529: } 530: if(last)last++; 531: } 532: } 533: nlp++; 534: } 535: }