1: /* 2: * Copyright (c) 1980 Regents of the University of California. 3: * All rights reserved. The Berkeley software License Agreement 4: * specifies the terms and conditions for redistribution. 5: */ 6: 7: #ifndef lint 8: char copyright[] = 9: "@(#) Copyright (c) 1980 Regents of the University of California.\n\ 10: All rights reserved.\n"; 11: #endif not lint 12: 13: #ifndef lint 14: static char sccsid[] = "@(#)checknr.c 5.2 (Berkeley) 12/5/85"; 15: #endif not lint 16: 17: /* 18: * checknr: check an nroff/troff input file for matching macro calls. 19: * we also attempt to match size and font changes, but only the embedded 20: * kind. These must end in \s0 and \fP resp. Maybe more sophistication 21: * later but for now think of these restrictions as contributions to 22: * structured typesetting. 23: */ 24: #include <stdio.h> 25: #include <ctype.h> 26: 27: #define MAXSTK 100 /* Stack size */ 28: #define MAXBR 100 /* Max number of bracket pairs known */ 29: #define MAXCMDS 500 /* Max number of commands known */ 30: 31: /* 32: * The stack on which we remember what we've seen so far. 33: */ 34: struct stkstr { 35: int opno; /* number of opening bracket */ 36: int pl; /* '+', '-', ' ' for \s, 1 for \f, 0 for .ft */ 37: int parm; /* parm to size, font, etc */ 38: int lno; /* line number the thing came in in */ 39: } stk[MAXSTK]; 40: int stktop; 41: 42: /* 43: * The kinds of opening and closing brackets. 44: */ 45: struct brstr { 46: char *opbr; 47: char *clbr; 48: } br[MAXBR] = { 49: /* A few bare bones troff commands */ 50: #define SZ 0 51: "sz", "sz", /* also \s */ 52: #define FT 1 53: "ft", "ft", /* also \f */ 54: /* the -mm package */ 55: "AL", "LE", 56: "AS", "AE", 57: "BL", "LE", 58: "BS", "BE", 59: "DF", "DE", 60: "DL", "LE", 61: "DS", "DE", 62: "FS", "FE", 63: "ML", "LE", 64: "NS", "NE", 65: "RL", "LE", 66: "VL", "LE", 67: /* the -ms package */ 68: "AB", "AE", 69: "BD", "DE", 70: "CD", "DE", 71: "DS", "DE", 72: "FS", "FE", 73: "ID", "DE", 74: "KF", "KE", 75: "KS", "KE", 76: "LD", "DE", 77: "LG", "NL", 78: "QS", "QE", 79: "RS", "RE", 80: "SM", "NL", 81: "XA", "XE", 82: "XS", "XE", 83: /* The -me package */ 84: "(b", ")b", 85: "(c", ")c", 86: "(d", ")d", 87: "(f", ")f", 88: "(l", ")l", 89: "(q", ")q", 90: "(x", ")x", 91: "(z", ")z", 92: /* Things needed by preprocessors */ 93: "EQ", "EN", 94: "TS", "TE", 95: /* Refer */ 96: "[", "]", 97: 0, 0 98: }; 99: 100: /* 101: * All commands known to nroff, plus macro packages. 102: * Used so we can complain about unrecognized commands. 103: */ 104: char *knowncmds[MAXCMDS] = { 105: "$c", "$f", "$h", "$p", "$s", "(b", "(c", "(d", "(f", "(l", "(q", "(t", 106: "(x", "(z", ")b", ")c", ")d", ")f", ")l", ")q", ")t", ")x", ")z", "++", 107: "+c", "1C", "1c", "2C", "2c", "@(", "@)", "@C", "@D", "@F", "@I", "@M", 108: "@c", "@e", "@f", "@h", "@m", "@n", "@o", "@p", "@r", "@t", "@z", "AB", 109: "AE", "AF", "AI", "AL", "AM", "AS", "AT", "AU", "AX", "B", "B1", "B2", 110: "BD", "BE", "BG", "BL", "BS", "BT", "BX", "C1", "C2", "CD", "CM", "CT", 111: "D", "DA", "DE", "DF", "DL", "DS", "DT", "EC", "EF", "EG", "EH", "EM", 112: "EN", "EQ", "EX", "FA", "FD", "FE", "FG", "FJ", "FK", "FL", "FN", "FO", 113: "FQ", "FS", "FV", "FX", "H", "HC", "HD", "HM", "HO", "HU", "I", "ID", 114: "IE", "IH", "IM", "IP", "IX", "IZ", "KD", "KE", "KF", "KQ", "KS", "LB", 115: "LC", "LD", "LE", "LG", "LI", "LP", "MC", "ME", "MF", "MH", "ML", "MR", 116: "MT", "ND", "NE", "NH", "NL", "NP", "NS", "OF", "OH", "OK", "OP", "P", 117: "P1", "PF", "PH", "PP", "PT", "PX", "PY", "QE", "QP", "QS", "R", "RA", 118: "RC", "RE", "RL", "RP", "RQ", "RS", "RT", "S", "S0", "S2", "S3", "SA", 119: "SG", "SH", "SK", "SM", "SP", "SY", "T&", "TA", "TB", "TC", "TD", "TE", 120: "TH", "TL", "TM", "TP", "TQ", "TR", "TS", "TX", "UL", "US", "UX", "VL", 121: "WC", "WH", "XA", "XD", "XE", "XF", "XK", "XP", "XS", "[", "[-", "[0", 122: "[1", "[2", "[3", "[4", "[5", "[<", "[>", "[]", "]", "]-", "]<", "]>", 123: "][", "ab", "ac", "ad", "af", "am", "ar", "as", "b", "ba", "bc", "bd", 124: "bi", "bl", "bp", "br", "bx", "c.", "c2", "cc", "ce", "cf", "ch", "cs", 125: "ct", "cu", "da", "de", "di", "dl", "dn", "ds", "dt", "dw", "dy", "ec", 126: "ef", "eh", "el", "em", "eo", "ep", "ev", "ex", "fc", "fi", "fl", "fo", 127: "fp", "ft", "fz", "hc", "he", "hl", "hp", "ht", "hw", "hx", "hy", "i", 128: "ie", "if", "ig", "in", "ip", "it", "ix", "lc", "lg", "li", "ll", "ln", 129: "lo", "lp", "ls", "lt", "m1", "m2", "m3", "m4", "mc", "mk", "mo", "n1", 130: "n2", "na", "ne", "nf", "nh", "nl", "nm", "nn", "np", "nr", "ns", "nx", 131: "of", "oh", "os", "pa", "pc", "pi", "pl", "pm", "pn", "po", "pp", "ps", 132: "q", "r", "rb", "rd", "re", "rm", "rn", "ro", "rr", "rs", "rt", "sb", 133: "sc", "sh", "sk", "so", "sp", "ss", "st", "sv", "sz", "ta", "tc", "th", 134: "ti", "tl", "tm", "tp", "tr", "u", "uf", "uh", "ul", "vs", "wh", "xp", 135: "yr", 0 136: }; 137: 138: int lineno; /* current line number in input file */ 139: char line[256]; /* the current line */ 140: char *cfilename; /* name of current file */ 141: int nfiles; /* number of files to process */ 142: int fflag; /* -f: ignore \f */ 143: int sflag; /* -s: ignore \s */ 144: int ncmds; /* size of knowncmds */ 145: int slot; /* slot in knowncmds found by binsrch */ 146: 147: char *malloc(); 148: 149: main(argc, argv) 150: int argc; 151: char **argv; 152: { 153: FILE *f; 154: int i; 155: char *cp; 156: char b1[4]; 157: 158: /* Figure out how many known commands there are */ 159: while (knowncmds[ncmds]) 160: ncmds++; 161: while (argc > 1 && argv[1][0] == '-') { 162: switch(argv[1][1]) { 163: 164: /* -a: add pairs of macros */ 165: case 'a': 166: i = strlen(argv[1]) - 2; 167: if (i % 6 != 0) 168: usage(); 169: /* look for empty macro slots */ 170: for (i=0; br[i].opbr; i++) 171: ; 172: for (cp=argv[1]+3; cp[-1]; cp += 6) { 173: br[i].opbr = malloc(3); 174: strncpy(br[i].opbr, cp, 2); 175: br[i].clbr = malloc(3); 176: strncpy(br[i].clbr, cp+3, 2); 177: addmac(br[i].opbr); /* knows pairs are also known cmds */ 178: addmac(br[i].clbr); 179: i++; 180: } 181: break; 182: 183: /* -c: add known commands */ 184: case 'c': 185: i = strlen(argv[1]) - 2; 186: if (i % 3 != 0) 187: usage(); 188: for (cp=argv[1]+3; cp[-1]; cp += 3) { 189: if (cp[2] && cp[2] != '.') 190: usage(); 191: strncpy(b1, cp, 2); 192: addmac(b1); 193: } 194: break; 195: 196: /* -f: ignore font changes */ 197: case 'f': 198: fflag = 1; 199: break; 200: 201: /* -s: ignore size changes */ 202: case 's': 203: sflag = 1; 204: break; 205: default: 206: usage(); 207: } 208: argc--; argv++; 209: } 210: 211: nfiles = argc - 1; 212: 213: if (nfiles > 0) { 214: for (i=1; i<argc; i++) { 215: cfilename = argv[i]; 216: f = fopen(cfilename, "r"); 217: if (f == NULL) 218: perror(cfilename); 219: else 220: process(f); 221: } 222: } else { 223: cfilename = "stdin"; 224: process(stdin); 225: } 226: exit(0); 227: } 228: 229: usage() 230: { 231: printf("Usage: checknr -s -f -a.xx.yy.xx.yy... -c.xx.xx.xx...\n"); 232: exit(1); 233: } 234: 235: process(f) 236: FILE *f; 237: { 238: register int i, n; 239: char mac[5]; /* The current macro or nroff command */ 240: int pl; 241: 242: stktop = -1; 243: for (lineno = 1; fgets(line, sizeof line, f); lineno++) { 244: if (line[0] == '.') { 245: /* 246: * find and isolate the macro/command name. 247: */ 248: strncpy(mac, line+1, 4); 249: if (isspace(mac[0])) { 250: pe(lineno); 251: printf("Empty command\n"); 252: } else if (isspace(mac[1])) { 253: mac[1] = 0; 254: } else if (isspace(mac[2])) { 255: mac[2] = 0; 256: } else if (mac[0] != '\\' || mac[1] != '\"') { 257: pe(lineno); 258: printf("Command too long\n"); 259: } 260: 261: /* 262: * Is it a known command? 263: */ 264: checkknown(mac); 265: 266: /* 267: * Should we add it? 268: */ 269: if (eq(mac, "de")) 270: addcmd(line); 271: 272: chkcmd(line, mac); 273: } 274: 275: /* 276: * At this point we process the line looking 277: * for \s and \f. 278: */ 279: for (i=0; line[i]; i++) 280: if (line[i]=='\\' && (i==0 || line[i-1]!='\\')) { 281: if (!sflag && line[++i]=='s') { 282: pl = line[++i]; 283: if (isdigit(pl)) { 284: n = pl - '0'; 285: pl = ' '; 286: } else 287: n = 0; 288: while (isdigit(line[++i])) 289: n = 10 * n + line[i] - '0'; 290: i--; 291: if (n == 0) { 292: if (stk[stktop].opno == SZ) { 293: stktop--; 294: } else { 295: pe(lineno); 296: printf("unmatched \\s0\n"); 297: } 298: } else { 299: stk[++stktop].opno = SZ; 300: stk[stktop].pl = pl; 301: stk[stktop].parm = n; 302: stk[stktop].lno = lineno; 303: } 304: } else if (!fflag && line[i]=='f') { 305: n = line[++i]; 306: if (n == 'P') { 307: if (stk[stktop].opno == FT) { 308: stktop--; 309: } else { 310: pe(lineno); 311: printf("unmatched \\fP\n"); 312: } 313: } else { 314: stk[++stktop].opno = FT; 315: stk[stktop].pl = 1; 316: stk[stktop].parm = n; 317: stk[stktop].lno = lineno; 318: } 319: } 320: } 321: } 322: /* 323: * We've hit the end and look at all this stuff that hasn't been 324: * matched yet! Complain, complain. 325: */ 326: for (i=stktop; i>=0; i--) { 327: complain(i); 328: } 329: } 330: 331: complain(i) 332: { 333: pe(stk[i].lno); 334: printf("Unmatched "); 335: prop(i); 336: printf("\n"); 337: } 338: 339: prop(i) 340: { 341: if (stk[i].pl == 0) 342: printf(".%s", br[stk[i].opno].opbr); 343: else switch(stk[i].opno) { 344: case SZ: 345: printf("\\s%c%d", stk[i].pl, stk[i].parm); 346: break; 347: case FT: 348: printf("\\f%c", stk[i].parm); 349: break; 350: default: 351: printf("Bug: stk[%d].opno = %d = .%s, .%s", 352: i, stk[i].opno, br[stk[i].opno].opbr, br[stk[i].opno].clbr); 353: } 354: } 355: 356: chkcmd(line, mac) 357: char *line; 358: char *mac; 359: { 360: register int i, n; 361: 362: /* 363: * Check to see if it matches top of stack. 364: */ 365: if (stktop >= 0 && eq(mac, br[stk[stktop].opno].clbr)) 366: stktop--; /* OK. Pop & forget */ 367: else { 368: /* No. Maybe it's an opener */ 369: for (i=0; br[i].opbr; i++) { 370: if (eq(mac, br[i].opbr)) { 371: /* Found. Push it. */ 372: stktop++; 373: stk[stktop].opno = i; 374: stk[stktop].pl = 0; 375: stk[stktop].parm = 0; 376: stk[stktop].lno = lineno; 377: break; 378: } 379: /* 380: * Maybe it's an unmatched closer. 381: * NOTE: this depends on the fact 382: * that none of the closers can be 383: * openers too. 384: */ 385: if (eq(mac, br[i].clbr)) { 386: nomatch(mac); 387: break; 388: } 389: } 390: } 391: } 392: 393: nomatch(mac) 394: char *mac; 395: { 396: register int i, j; 397: 398: /* 399: * Look for a match further down on stack 400: * If we find one, it suggests that the stuff in 401: * between is supposed to match itself. 402: */ 403: for (j=stktop; j>=0; j--) 404: if (eq(mac,br[stk[j].opno].clbr)) { 405: /* Found. Make a good diagnostic. */ 406: if (j == stktop-2) { 407: /* 408: * Check for special case \fx..\fR and don't 409: * complain. 410: */ 411: if (stk[j+1].opno==FT && stk[j+1].parm!='R' 412: && stk[j+2].opno==FT && stk[j+2].parm=='R') { 413: stktop = j -1; 414: return; 415: } 416: /* 417: * We have two unmatched frobs. Chances are 418: * they were intended to match, so we mention 419: * them together. 420: */ 421: pe(stk[j+1].lno); 422: prop(j+1); 423: printf(" does not match %d: ", stk[j+2].lno); 424: prop(j+2); 425: printf("\n"); 426: } else for (i=j+1; i <= stktop; i++) { 427: complain(i); 428: } 429: stktop = j-1; 430: return; 431: } 432: /* Didn't find one. Throw this away. */ 433: pe(lineno); 434: printf("Unmatched .%s\n", mac); 435: } 436: 437: /* eq: are two strings equal? */ 438: eq(s1, s2) 439: char *s1, *s2; 440: { 441: return (strcmp(s1, s2) == 0); 442: } 443: 444: /* print the first part of an error message, given the line number */ 445: pe(lineno) 446: int lineno; 447: { 448: if (nfiles > 1) 449: printf("%s: ", cfilename); 450: printf("%d: ", lineno); 451: } 452: 453: checkknown(mac) 454: char *mac; 455: { 456: 457: if (eq(mac, ".")) 458: return; 459: if (binsrch(mac) >= 0) 460: return; 461: if (mac[0] == '\\' && mac[1] == '"') /* comments */ 462: return; 463: 464: pe(lineno); 465: printf("Unknown command: .%s\n", mac); 466: } 467: 468: /* 469: * We have a .de xx line in "line". Add xx to the list of known commands. 470: */ 471: addcmd(line) 472: char *line; 473: { 474: char *mac; 475: 476: /* grab the macro being defined */ 477: mac = line+4; 478: while (isspace(*mac)) 479: mac++; 480: if (*mac == 0) { 481: pe(lineno); 482: printf("illegal define: %s\n", line); 483: return; 484: } 485: mac[2] = 0; 486: if (isspace(mac[1]) || mac[1] == '\\') 487: mac[1] = 0; 488: if (ncmds >= MAXCMDS) { 489: printf("Only %d known commands allowed\n", MAXCMDS); 490: exit(1); 491: } 492: addmac(mac); 493: } 494: 495: /* 496: * Add mac to the list. We should really have some kind of tree 497: * structure here but this is a quick-and-dirty job and I just don't 498: * have time to mess with it. (I wonder if this will come back to haunt 499: * me someday?) Anyway, I claim that .de is fairly rare in user 500: * nroff programs, and the register loop below is pretty fast. 501: */ 502: addmac(mac) 503: char *mac; 504: { 505: register char **src, **dest, **loc; 506: 507: if (binsrch(mac) >= 0){ /* it's OK to redefine something */ 508: #ifdef DEBUG 509: printf("binsrch(%s) -> already in table\n", mac); 510: #endif DEBUG 511: return; 512: } 513: /* binsrch sets slot as a side effect */ 514: #ifdef DEBUG 515: printf("binsrch(%s) -> %d\n", mac, slot); 516: #endif 517: loc = &knowncmds[slot]; 518: src = &knowncmds[ncmds-1]; 519: dest = src+1; 520: while (dest > loc) 521: *dest-- = *src--; 522: *loc = malloc(3); 523: strcpy(*loc, mac); 524: ncmds++; 525: #ifdef DEBUG 526: printf("after: %s %s %s %s %s, %d cmds\n", knowncmds[slot-2], knowncmds[slot-1], knowncmds[slot], knowncmds[slot+1], knowncmds[slot+2], ncmds); 527: #endif 528: } 529: 530: /* 531: * Do a binary search in knowncmds for mac. 532: * If found, return the index. If not, return -1. 533: */ 534: binsrch(mac) 535: char *mac; 536: { 537: register char *p; /* pointer to current cmd in list */ 538: register int d; /* difference if any */ 539: register int mid; /* mid point in binary search */ 540: register int top, bot; /* boundaries of bin search, inclusive */ 541: 542: top = ncmds-1; 543: bot = 0; 544: while (top >= bot) { 545: mid = (top+bot)/2; 546: p = knowncmds[mid]; 547: d = p[0] - mac[0]; 548: if (d == 0) 549: d = p[1] - mac[1]; 550: if (d == 0) 551: return mid; 552: if (d < 0) 553: bot = mid + 1; 554: else 555: top = mid - 1; 556: } 557: slot = bot; /* place it would have gone */ 558: return -1; 559: }