1: #ifndef lint 2: static char *sccsid = "@(#)checknr.c 4.4 (Berkeley) 5/13/81"; 3: #endif 4: /* 5: * checknr: check an nroff/troff input file for matching macro calls. 6: * we also attempt to match size and font changes, but only the embedded 7: * kind. These must end in \s0 and \fP resp. Maybe more sophistication 8: * later but for now think of these restrictions as contributions to 9: * structured typesetting. 10: */ 11: #include <sys/types.h> 12: #include <stdio.h> 13: #include <ctype.h> 14: 15: #define MAXSTK 100 /* Stack size */ 16: #define MAXBR 100 /* Max number of bracket pairs known */ 17: #define MAXCMDS 500 /* Max number of commands known */ 18: 19: /* 20: * The stack on which we remember what we've seen so far. 21: */ 22: struct stkstr { 23: int opno; /* number of opening bracket */ 24: int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 25: int parm; /* parm to size, font, etc */ 26: int lno; /* line number the thing came in in */ 27: } stk[MAXSTK]; 28: int stktop; 29: 30: /* 31: * The kinds of opening and closing brackets. 32: */ 33: struct brstr { 34: char *opbr; 35: char *clbr; 36: } br[MAXBR] = { 37: /* A few bare bones troff commands */ 38: #define SZ 0 39: "sz", "sz", /* also \s */ 40: #define FT 1 41: "ft", "ft", /* also \f */ 42: /* the -mm package */ 43: "AL", "LE", 44: "AS", "AE", 45: "BL", "LE", 46: "BS", "BE", 47: "DF", "DE", 48: "DL", "LE", 49: "DS", "DE", 50: "FS", "FE", 51: "ML", "LE", 52: "NS", "NE", 53: "RL", "LE", 54: "VL", "LE", 55: /* the -ms package */ 56: "AB", "AE", 57: "CD", "DE", 58: "DS", "DE", 59: "FS", "FE", 60: "ID", "DE", 61: "KF", "KE", 62: "KS", "KE", 63: "LD", "DE", 64: "LG", "NL", 65: "QS", "QE", 66: "RS", "RE", 67: "SM", "NL", 68: /* The -me package */ 69: "(b", ")b", 70: "(c", ")c", 71: "(d", ")d", 72: "(f", ")f", 73: "(l", ")l", 74: "(q", ")q", 75: "(x", ")x", 76: "(z", ")z", 77: /* Things needed by preprocessors */ 78: "EQ", "EN", 79: "TS", "TE", 80: /* Refer */ 81: "[", "]", 82: 0, 0 83: }; 84: 85: /* 86: * All commands known to nroff, plus macro packages. 87: * Used so we can complain about unrecognized commands. 88: */ 89: char *knowncmds[MAXCMDS] = { 90: "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", 91: "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", 92: "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", 93: "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", 94: "AE", "AF", "AI", "AL", "AS", "AT", "AU", "AX", "B", "B1", "B2", "BD", 95: "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", "D", 96: "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", "EN", "EQ", 97: "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", "FQ", "FS", 98: "FV", "FX", "H", "HC", "HM", "HO", "HU", "I", "ID", "IE", "IH", "IM", 99: "IP", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", "LC", "LD", "LE", "LG", 100: "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", "MT", "ND", "NE", "NH", 101: "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", "PF", "PH", "PP", "PT", 102: "PY", "QE", "QP", "QS", "R", "RA", "RC", "RE", "RL", "RP", "RQ", "RS", 103: "RT", "S", "S0", "S2", "S3", "SA", "SG", "SH", "SK", "SM", "SP", "SY", 104: "TA", "TB", "TC", "TD", "TE", "TH", "TL", "TM", "TP", "TQ", "TR", "TS", 105: "TX", "UL", "US", "UX", "VL", "WC", "WH", "XD", "XF", "XK", "XP", "[", "[-", 106: "[0", "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", 107: "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", 108: "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", 109: "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", 110: "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", 111: "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", 112: "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 113: "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", 114: "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 115: "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", 116: "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", 117: "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", 118: "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", "yr", 119: 0 120: }; 121: 122: int lineno; /* current line number in input file */ 123: char line[256]; /* the current line */ 124: char *cfilename; /* name of current file */ 125: int nfiles; /* number of files to process */ 126: int fflag; /* -f: ignore \f */ 127: int sflag; /* -s: ignore \s */ 128: int ncmds; /* size of knowncmds */ 129: int slot; /* slot in knowncmds found by binsrch */ 130: 131: char *malloc(); 132: 133: main(argc, argv) 134: int argc; 135: char **argv; 136: { 137: FILE *f; 138: int i; 139: char *cp; 140: char b1[4]; 141: 142: if (argc <= 1) 143: usage(); 144: /* Figure out how many known commands there are */ 145: while (knowncmds[ncmds]) 146: ncmds++; 147: while (argc > 1 && argv[1][0] == '-') { 148: switch(argv[1][1]) { 149: 150: /* -a: add pairs of macros */ 151: case 'a': 152: i = strlen(argv[1]) - 2; 153: if (i % 6 != 0) 154: usage(); 155: /* look for empty macro slots */ 156: for (i=0; br[i].opbr; i++) 157: ; 158: for (cp=argv[1]+3; cp[-1]; cp += 6) { 159: br[i].opbr = malloc(3); 160: strncpy(br[i].opbr, cp, 2); 161: br[i].clbr = malloc(3); 162: strncpy(br[i].clbr, cp+3, 2); 163: addmac(br[i].opbr); /* knows pairs are also known cmds */ 164: addmac(br[i].clbr); 165: i++; 166: } 167: break; 168: 169: /* -c: add known commands */ 170: case 'c': 171: i = strlen(argv[1]) - 2; 172: if (i % 3 != 0) 173: usage(); 174: for (cp=argv[1]+3; cp[-1]; cp += 3) { 175: if (cp[2] && cp[2] != '.') 176: usage(); 177: strncpy(b1, cp, 2); 178: b1[3] = '\0'; 179: addmac(b1); 180: } 181: break; 182: 183: /* -f: ignore font changes */ 184: case 'f': 185: fflag = 1; 186: break; 187: 188: /* -s: ignore size changes */ 189: case 's': 190: sflag = 1; 191: break; 192: default: 193: usage(); 194: } 195: argc--; argv++; 196: } 197: 198: nfiles = argc - 1; 199: 200: if (nfiles > 0) { 201: for (i=1; i<argc; i++) { 202: cfilename = argv[i]; 203: f = fopen(cfilename, "r"); 204: if (f == NULL) 205: perror(cfilename); 206: else 207: process(f); 208: } 209: } else { 210: cfilename = "stdin"; 211: process(stdin); 212: } 213: exit(0); 214: } 215: 216: usage() 217: { 218: printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n"); 219: exit(1); 220: } 221: 222: process(f) 223: FILE *f; 224: { 225: register int i, n; 226: char mac[5]; /* The current macro or nroff command */ 227: int pl; 228: 229: stktop = -1; 230: for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 231: if (line[0] == '.') { 232: /* 233: * find and isolate the macro/command name. 234: */ 235: strncpy(mac, line+1, 4); 236: if (isspace(mac[0])) { 237: pe(lineno); 238: printf("Empty command\n"); 239: } else if (isspace(mac[1])) { 240: mac[1] = 0; 241: } else if (isspace(mac[2])) { 242: mac[2] = 0; 243: } else if (mac[0] != '\\' || mac[1] != '\"') { 244: pe(lineno); 245: printf("Command too long\n"); 246: } 247: 248: /* 249: * Is it a known command? 250: */ 251: checkknown(mac); 252: 253: /* 254: * Should we add it? 255: */ 256: if (eq(mac, "de")) 257: addcmd(line); 258: 259: chkcmd(mac); 260: } 261: 262: /* 263: * At this point we process the line looking 264: * for \s and \f. 265: */ 266: for (i=0; line[i]; i++) 267: if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 268: if (!sflag && line[++i]=='s') { 269: pl = line[++i]; 270: if (isdigit(pl)) { 271: n = pl - '0'; 272: pl = ' '; 273: } else 274: n = 0; 275: while (isdigit(line[++i])) 276: n = 10 * n + line[i] - '0'; 277: i--; 278: if (n == 0) { 279: if (stk[stktop].opno == SZ) { 280: stktop--; 281: } else { 282: pe(lineno); 283: printf("unmatched \\s0\n"); 284: } 285: } else { 286: stk[++stktop].opno = SZ; 287: stk[stktop].pl = pl; 288: stk[stktop].parm = n; 289: stk[stktop].lno = lineno; 290: } 291: } else if (!fflag && line[i]=='f') { 292: n = line[++i]; 293: if (n == 'P') { 294: if (stk[stktop].opno == FT) { 295: stktop--; 296: } else { 297: pe(lineno); 298: printf("unmatched \\fP\n"); 299: } 300: } else { 301: stk[++stktop].opno = FT; 302: stk[stktop].pl = 1; 303: stk[stktop].parm = n; 304: stk[stktop].lno = lineno; 305: } 306: } 307: } 308: } 309: /* 310: * We've hit the end and look at all this stuff that hasn't been 311: * matched yet! Complain, complain. 312: */ 313: for (i=stktop; i>=0; i--) { 314: complain(i); 315: } 316: } 317: 318: complain(i) 319: { 320: pe(stk[i].lno); 321: printf("Unmatched "); 322: prop(i); 323: printf("\n"); 324: } 325: 326: prop(i) 327: { 328: if (stk[i].pl == 0) 329: printf(".%s", br[stk[i].opno].opbr); 330: else switch(stk[i].opno) { 331: case SZ: 332: printf("\\s%c%d", stk[i].pl, stk[i].parm); 333: break; 334: case FT: 335: printf("\\f%c", stk[i].parm); 336: break; 337: default: 338: printf("Bug: stk[%d].opno = %d = .%s, .%s", 339: i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr); 340: } 341: } 342: 343: chkcmd(mac) 344: register char *mac; 345: { 346: register int i; 347: 348: /* 349: * Check to see if it matches top of stack. 350: */ 351: if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 352: stktop--; /* OK. Pop & forget */ 353: else { 354: /* No. Maybe it's an opener */ 355: for (i=0; br[i].opbr; i++) { 356: if (eq(mac, br[i].opbr)) { 357: /* Found. Push it. */ 358: stktop++; 359: stk[stktop].opno = i; 360: stk[stktop].pl = 0; 361: stk[stktop].parm = 0; 362: stk[stktop].lno = lineno; 363: break; 364: } 365: /* 366: * Maybe it's an unmatched closer. 367: * NOTE: this depends on the fact 368: * that none of the closers can be 369: * openers too. 370: */ 371: if (eq(mac, br[i].clbr)) { 372: nomatch(mac); 373: break; 374: } 375: } 376: } 377: } 378: 379: nomatch(mac) 380: char *mac; 381: { 382: register int i, j; 383: 384: /* 385: * Look for a match further down on stack 386: * If we find one, it suggests that the stuff in 387: * between is supposed to match itself. 388: */ 389: for (j=stktop; j>=0; j--) 390: if (eq(mac,br[stk[j].opno].clbr)) { 391: /* Found. Make a good diagnostic. */ 392: if (j == stktop-2) { 393: /* 394: * Check for special case \fx..\fR and don't 395: * complain. 396: */ 397: if (stk[j+1].opno==FT && stk[j+1].parm!='R' 398: && stk[j+2].opno==FT && stk[j+2].parm=='R') { 399: stktop = j -1; 400: return; 401: } 402: /* 403: * We have two unmatched frobs. Chances are 404: * they were intended to match, so we mention 405: * them together. 406: */ 407: pe(stk[j+1].lno); 408: prop(j+1); 409: printf(" does not match %d: ", stk[j+2].lno); 410: prop(j+2); 411: printf("\n"); 412: } else for (i=j+1; i <= stktop; i++) { 413: complain(i); 414: } 415: stktop = j-1; 416: return; 417: } 418: /* Didn't find one. Throw this away. */ 419: pe(lineno); 420: printf("Unmatched .%s\n", mac); 421: } 422: 423: /* eq: are two strings equal? */ 424: eq(s1, s2) 425: char *s1, *s2; 426: { 427: return (strcmp(s1, s2) == 0); 428: } 429: 430: /* print the first part of an error message, given the line number */ 431: pe(linenum) 432: int linenum; 433: { 434: if (nfiles > 1) 435: printf("%s: ", cfilename); 436: printf("%d: ", linenum); 437: } 438: 439: checkknown(mac) 440: char *mac; 441: { 442: 443: if (eq(mac, ".")) 444: return; 445: if (binsrch(mac) >= 0) 446: return; 447: if (mac[0] == '\\' && mac[1] == '"') /* comments */ 448: return; 449: 450: pe(lineno); 451: printf("Unknown command: .%s\n", mac); 452: } 453: 454: /* 455: * We have a .de xx line in "line". Add xx to the list of known commands. 456: */ 457: addcmd(linebuf) 458: char *linebuf; 459: { 460: char *mac; 461: 462: /* grab the macro being defined */ 463: mac = linebuf+4; 464: while (isspace(*mac)) 465: mac++; 466: if (*mac == 0) { 467: pe(lineno); 468: printf("illegal define: %s\n", linebuf); 469: return; 470: } 471: mac[2] = 0; 472: if (isspace(mac[1]) || mac[1] == '\\') 473: mac[1] = 0; 474: if (ncmds >= MAXCMDS) { 475: printf("Only %d known commands allowed\n", MAXCMDS); 476: exit(1); 477: } 478: addmac(mac); 479: } 480: 481: /* 482: * Add mac to the list. We should really have some kind of tree 483: * structure here but this is a quick-and-dirty job and I just don't 484: * have time to mess with it. (I wonder if this will come back to haunt 485: * me someday?) Anyway, I claim that .de is fairly rare in user 486: * nroff programs, and the register loop below is pretty fast. 487: */ 488: addmac(mac) 489: char *mac; 490: { 491: register char **src, **dest, **loc; 492: 493: (void) binsrch(mac); /* it's OK to redefine something */ 494: /* binsrch sets slot as a side effect */ 495: #ifdef DEBUG 496: printf("binsrch(%s) -> %d\n", mac, slot); 497: #endif 498: loc = &knowncmds[slot]; 499: src = &knowncmds[ncmds-1]; 500: dest = src+1; 501: while (dest > loc) 502: *dest-- = *src--; 503: *loc = malloc(3); 504: strcpy(*loc, mac); 505: ncmds++; 506: #ifdef DEBUG 507: printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds); 508: #endif 509: } 510: 511: /* 512: * Do a binary search in knowncmds for mac. 513: * If found, return the index. If not, return -1. 514: */ 515: binsrch(mac) 516: char *mac; 517: { 518: register char *p; /* pointer to current cmd in list */ 519: register int d; /* difference if any */ 520: register int mid; /* mid point in binary search */ 521: register int top, bot; /* boundaries of bin search, inclusive */ 522: 523: top = ncmds-1; 524: bot = 0; 525: while (top >= bot) { 526: mid = (top+bot)/2; 527: p = knowncmds[mid]; 528: d = p[0] - mac[0]; 529: if (d == 0) 530: d = p[1] - mac[1]; 531: if (d == 0) 532: return mid; 533: if (d < 0) 534: bot = mid + 1; 535: else 536: top = mid - 1; 537: } 538: slot = bot; /* place it would have gone */ 539: return -1; 540: }