1: #ifndef lint 2: static char sccsid[] = "@(#)spell.c 4.2 6/3/86"; 3: #endif 4: 5: #include "spell.h" 6: #define DLEV 2 7: 8: char *strcat(); 9: int strip(); 10: char *skipv(); 11: int an(); 12: int s(); 13: int es(); 14: int ily(); 15: int ncy(); 16: int CCe(); 17: int VCe(); 18: int bility(); 19: int tion(); 20: int ize(); 21: int y_to_e(); 22: int i_to_y(); 23: int nop(); 24: int metry(); 25: 26: struct suftab { 27: char *suf; 28: int (*p1)(); 29: int n1; 30: char *d1; 31: char *a1; 32: int (*p2)(); 33: int n2; 34: char *d2; 35: char *a2; 36: } suftab[] = { 37: {"ssen",ily,4,"-y+iness","+ness" }, 38: {"ssel",ily,4,"-y+i+less","+less" }, 39: {"se",s,1,"","+s", es,2,"-y+ies","+es" }, 40: {"s'",s,2,"","+'s"}, 41: {"s",s,1,"","+s"}, 42: {"ecn",ncy,1,"","-t+ce"}, 43: {"ycn",ncy,1,"","-cy+t"}, 44: {"ytilb",nop,0,"",""}, 45: {"ytilib",bility,5,"-le+ility",""}, 46: {"elbaif",i_to_y,4,"-y+iable",""}, 47: {"elba",CCe,4,"-e+able","+able"}, 48: {"yti",CCe,3,"-e+ity","+ity"}, 49: {"ylb",y_to_e,1,"-e+y",""}, 50: {"yl",ily,2,"-y+ily","+ly"}, 51: {"laci",strip,2,"","+al"}, 52: {"latnem",strip,2,"","+al"}, 53: {"lanoi",strip,2,"","+al"}, 54: {"tnem",strip,4,"","+ment"}, 55: {"gni",CCe,3,"-e+ing","+ing"}, 56: {"reta",nop,0,"",""}, 57: {"re",strip,1,"","+r", i_to_y,2,"-y+ier","+er"}, 58: {"de",strip,1,"","+d", i_to_y,2,"-y+ied","+ed"}, 59: {"citsi",strip,2,"","+ic"}, 60: {"cihparg",i_to_y,1,"-y+ic",""}, 61: {"tse",strip,2,"","+st", i_to_y,3,"-y+iest","+est"}, 62: {"cirtem",i_to_y,1,"-y+ic",""}, 63: {"yrtem",metry,0,"-ry+er",""}, 64: {"cigol",i_to_y,1,"-y+ic",""}, 65: {"tsigol",i_to_y,2,"-y+ist",""}, 66: {"tsi",VCe,3,"-e+ist","+ist"}, 67: {"msi",VCe,3,"-e+ism","+ist"}, 68: {"noitacif",i_to_y,6,"-y+ication",""}, 69: {"noitazi",ize,5,"-e+ation",""}, 70: {"rota",tion,2,"-e+or",""}, 71: {"noit",tion,3,"-e+ion","+ion"}, 72: {"naino",an,3,"","+ian"}, 73: {"na",an,1,"","+n"}, 74: {"evit",tion,3,"-e+ive","+ive"}, 75: {"ezi",CCe,3,"-e+ize","+ize"}, 76: {"pihs",strip,4,"","+ship"}, 77: {"dooh",ily,4,"-y+hood","+hood"}, 78: {"ekil",strip,4,"","+like"}, 79: 0 80: }; 81: 82: char *preftab[] = { 83: "anti", 84: "bio", 85: "dis", 86: "electro", 87: "en", 88: "fore", 89: "hyper", 90: "intra", 91: "inter", 92: "iso", 93: "kilo", 94: "magneto", 95: "meta", 96: "micro", 97: "milli", 98: "mis", 99: "mono", 100: "multi", 101: "non", 102: "out", 103: "over", 104: "photo", 105: "poly", 106: "pre", 107: "pseudo", 108: "re", 109: "semi", 110: "stereo", 111: "sub", 112: "super", 113: "thermo", 114: "ultra", 115: "under", /*must precede un*/ 116: "un", 117: 0 118: }; 119: 120: int vflag; 121: int xflag; 122: char word[100]; 123: char original[100]; 124: char *deriv[40]; 125: char affix[40]; 126: 127: main(argc,argv) 128: char **argv; 129: { 130: register char *ep, *cp; 131: register char *dp; 132: int fold; 133: int j; 134: FILE *file, *found; 135: if(!prime(argc,argv)) { 136: fprintf(stderr, 137: "spell: cannot initialize hash table\n"); 138: exit(1); 139: } 140: found = fopen(argv[2],"w"); 141: for(argc-=3,argv+=3; argc>0 && argv[0][0]=='-'; argc--,argv++) 142: switch(argv[0][1]) { 143: case 'b': 144: ise(); 145: break; 146: case 'v': 147: vflag++; 148: break; 149: case 'x': 150: xflag++; 151: break; 152: } 153: for(;; fprintf(file,"%s%s\n",affix,original)) { 154: affix[0] = 0; 155: file = found; 156: for(ep=word;(*ep=j=getchar())!='\n';ep++) 157: if(j == EOF) { 158: fclose(found); 159: exit(0); 160: } 161: for(cp=word,dp=original; cp<ep; ) 162: *dp++ = *cp++; 163: *dp = 0; 164: fold = 0; 165: for(cp=word;cp<ep;cp++) 166: if(islower(*cp)) 167: goto lcase; 168: if(putsuf(ep,".",0)) 169: continue; 170: ++fold; 171: for(cp=original+1,dp=word+1;dp<ep;dp++,cp++) 172: *dp = Tolower(*cp); 173: lcase: 174: if(putsuf(ep,".",0)||suffix(ep,0)) 175: continue; 176: if(isupper(word[0])) { 177: for(cp=original,dp=word; *dp = *cp++; dp++) 178: if (fold) *dp = Tolower(*dp); 179: word[0] = Tolower(word[0]); 180: goto lcase; 181: } 182: file = stdout; 183: } 184: } 185: 186: suffix(ep,lev) 187: char *ep; 188: { 189: register struct suftab *t; 190: register char *cp, *sp; 191: lev += DLEV; 192: deriv[lev] = deriv[lev-1] = 0; 193: for(t= &suftab[0];sp=t->suf;t++) { 194: cp = ep; 195: while(*sp) 196: if(*--cp!=*sp++) 197: goto next; 198: for(sp=cp; --sp>=word&&!vowel(*sp); ) ; 199: if(sp<word) 200: return(0); 201: if((*t->p1)(ep-t->n1,t->d1,t->a1,lev+1)) 202: return(1); 203: if(t->p2!=0) { 204: deriv[lev] = deriv[lev+1] = 0; 205: return((*t->p2)(ep-t->n2,t->d2,t->a2,lev)); 206: } 207: return(0); 208: next: ; 209: } 210: return(0); 211: } 212: 213: nop() 214: { 215: return(0); 216: } 217: 218: strip(ep,d,a,lev) 219: char *ep,*d,*a; 220: { 221: return(putsuf(ep,a,lev)||suffix(ep,lev)); 222: } 223: 224: s(ep,d,a,lev) 225: char *ep,*d,*a; 226: { 227: if(lev>DLEV+1) 228: return(0); 229: if(*ep=='s'&&ep[-1]=='s') 230: return(0); 231: return(strip(ep,d,a,lev)); 232: } 233: 234: an(ep,d,a,lev) 235: char *ep,*d,*a; 236: { 237: if(!isupper(*word)) /*must be proper name*/ 238: return(0); 239: return(putsuf(ep,a,lev)); 240: } 241: 242: ize(ep,d,a,lev) 243: char *ep,*d,*a; 244: { 245: *ep++ = 'e'; 246: return(strip(ep,"",d,lev)); 247: } 248: 249: y_to_e(ep,d,a,lev) 250: char *ep,*d,*a; 251: { 252: *ep++ = 'e'; 253: return(strip(ep,"",d,lev)); 254: } 255: 256: ily(ep,d,a,lev) 257: char *ep,*d,*a; 258: { 259: if(ep[-1]=='i') 260: return(i_to_y(ep,d,a,lev)); 261: else 262: return(strip(ep,d,a,lev)); 263: } 264: 265: ncy(ep,d,a,lev) 266: char *ep, *d, *a; 267: { 268: if(skipv(skipv(ep-1))<word) 269: return(0); 270: ep[-1] = 't'; 271: return(strip(ep,d,a,lev)); 272: } 273: 274: bility(ep,d,a,lev) 275: char *ep,*d,*a; 276: { 277: *ep++ = 'l'; 278: return(y_to_e(ep,d,a,lev)); 279: } 280: 281: i_to_y(ep,d,a,lev) 282: char *ep,*d,*a; 283: { 284: if(ep[-1]=='i') { 285: ep[-1] = 'y'; 286: a = d; 287: } 288: return(strip(ep,"",a,lev)); 289: } 290: 291: es(ep,d,a,lev) 292: char *ep,*d,*a; 293: { 294: if(lev>DLEV) 295: return(0); 296: switch(ep[-1]) { 297: default: 298: return(0); 299: case 'i': 300: return(i_to_y(ep,d,a,lev)); 301: case 's': 302: case 'h': 303: case 'z': 304: case 'x': 305: return(strip(ep,d,a,lev)); 306: } 307: } 308: 309: metry(ep,d,a,lev) 310: char *ep, *d,*a; 311: { 312: ep[-2] = 'e'; 313: ep[-1] = 'r'; 314: return(strip(ep,d,a,lev)); 315: } 316: 317: tion(ep,d,a,lev) 318: char *ep,*d,*a; 319: { 320: switch(ep[-2]) { 321: case 'c': 322: case 'r': 323: return(putsuf(ep,a,lev)); 324: case 'a': 325: return(y_to_e(ep,d,a,lev)); 326: } 327: return(0); 328: } 329: 330: /* possible consonant-consonant-e ending*/ 331: CCe(ep,d,a,lev) 332: char *ep,*d,*a; 333: { 334: switch(ep[-1]) { 335: case 'l': 336: if(vowel(ep[-2])) 337: break; 338: switch(ep[-2]) { 339: case 'l': 340: case 'r': 341: case 'w': 342: break; 343: default: 344: return(y_to_e(ep,d,a,lev)); 345: } 346: break; 347: case 's': 348: if(ep[-2]=='s') 349: break; 350: case 'c': 351: case 'g': 352: if(*ep=='a') 353: return(0); 354: case 'v': 355: case 'z': 356: if(vowel(ep[-2])) 357: break; 358: case 'u': 359: if(y_to_e(ep,d,a,lev)) 360: return(1); 361: if(!(ep[-2]=='n'&&ep[-1]=='g')) 362: return(0); 363: } 364: return(VCe(ep,d,a,lev)); 365: } 366: 367: /* possible consonant-vowel-consonant-e ending*/ 368: VCe(ep,d,a,lev) 369: char *ep,*d,*a; 370: { 371: char c; 372: c = ep[-1]; 373: if(c=='e') 374: return(0); 375: if(!vowel(c) && vowel(ep[-2])) { 376: c = *ep; 377: *ep++ = 'e'; 378: if(putsuf(ep,d,lev)||suffix(ep,lev)) 379: return(1); 380: ep--; 381: *ep = c; 382: } 383: return(strip(ep,d,a,lev)); 384: } 385: 386: char *lookuppref(wp,ep) 387: char **wp; 388: char *ep; 389: { 390: register char **sp; 391: register char *bp,*cp; 392: for(sp=preftab;*sp;sp++) { 393: bp = *wp; 394: for(cp= *sp;*cp;cp++,bp++) 395: if(Tolower(*bp)!=*cp) 396: goto next; 397: for(cp=bp;cp<ep;cp++) 398: if(vowel(*cp)) { 399: *wp = bp; 400: return(*sp); 401: } 402: next: ; 403: } 404: return(0); 405: } 406: 407: putsuf(ep,a,lev) 408: char *ep,*a; 409: { 410: register char *cp; 411: char *bp; 412: register char *pp; 413: int val = 0; 414: char space[20]; 415: deriv[lev] = a; 416: if(putw(word,ep,lev)) 417: return(1); 418: bp = word; 419: pp = space; 420: deriv[lev+1] = pp; 421: while(cp=lookuppref(&bp,ep)) { 422: *pp++ = '+'; 423: while(*pp = *cp++) 424: pp++; 425: if(putw(bp,ep,lev+1)) { 426: val = 1; 427: break; 428: } 429: } 430: deriv[lev+1] = deriv[lev+2] = 0; 431: return(val); 432: } 433: 434: putw(bp,ep,lev) 435: char *bp,*ep; 436: { 437: register i, j; 438: char duple[3]; 439: if(ep-bp<=1) 440: return(0); 441: if(vowel(*ep)) { 442: if(monosyl(bp,ep)) 443: return(0); 444: } 445: i = dict(bp,ep); 446: if(i==0&&vowel(*ep)&&ep[-1]==ep[-2]&&monosyl(bp,ep-1)) { 447: ep--; 448: deriv[++lev] = duple; 449: duple[0] = '+'; 450: duple[1] = *ep; 451: duple[2] = 0; 452: i = dict(bp,ep); 453: } 454: if(vflag==0||i==0) 455: return(i); 456: j = lev; 457: do { 458: if(deriv[j]) 459: strcat(affix,deriv[j]); 460: } while(--j>0); 461: strcat(affix,"\t"); 462: return(i); 463: } 464: 465: 466: monosyl(bp,ep) 467: char *bp, *ep; 468: { 469: if(ep<bp+2) 470: return(0); 471: if(vowel(*--ep)||!vowel(*--ep) 472: ||ep[1]=='x'||ep[1]=='w') 473: return(0); 474: while(--ep>=bp) 475: if(vowel(*ep)) 476: return(0); 477: return(1); 478: } 479: 480: char * 481: skipv(s) 482: char *s; 483: { 484: if(s>=word&&vowel(*s)) 485: s--; 486: while(s>=word&&!vowel(*s)) 487: s--; 488: return(s); 489: } 490: 491: vowel(c) 492: { 493: switch(Tolower(c)) { 494: case 'a': 495: case 'e': 496: case 'i': 497: case 'o': 498: case 'u': 499: case 'y': 500: return(1); 501: } 502: return(0); 503: } 504: 505: /* crummy way to Britishise */ 506: ise() 507: { 508: register struct suftab *p; 509: for(p = suftab;p->suf;p++) { 510: ztos(p->suf); 511: ztos(p->d1); 512: ztos(p->a1); 513: } 514: } 515: ztos(s) 516: char *s; 517: { 518: for(;*s;s++) 519: if(*s=='z') 520: *s = 's'; 521: } 522: 523: dict(bp,ep) 524: char *bp, *ep; 525: { 526: register char *wp; 527: long h; 528: register long *lp; 529: register i; 530: if(xflag) 531: printf("=%.*s\n",ep-bp,bp); 532: for(i=0; i<NP; i++) { 533: for (wp = bp, h = 0, lp = pow2[i]; wp < ep; ++wp, ++lp) 534: h += *wp * *lp; 535: h += '\n' * *lp; 536: h %= p[i]; 537: if(get(h)==0) 538: return(0); 539: } 540: return(1); 541: }