1: #ifndef lint 2: static char *sccsid = "@(#)ctags.c 4.4 (Berkeley) 8/30/82"; 3: #endif 4: 5: #include <stdio.h> 6: #include <ctype.h> 7: 8: /* 9: * ctags: create a tags file 10: */ 11: 12: #define reg register 13: #define logical char 14: 15: #define TRUE (1) 16: #define FALSE (0) 17: 18: #define iswhite(arg) (_wht[arg]) /* T if char is white */ 19: #define begtoken(arg) (_btk[arg]) /* T if char can start token */ 20: #define intoken(arg) (_itk[arg]) /* T if char can be in token */ 21: #define endtoken(arg) (_etk[arg]) /* T if char ends tokens */ 22: #define isgood(arg) (_gd[arg]) /* T if char can be after ')' */ 23: 24: #define max(I1,I2) (I1 > I2 ? I1 : I2) 25: 26: struct nd_st { /* sorting structure */ 27: char *entry; /* function or type name */ 28: char *file; /* file name */ 29: logical f; /* use pattern or line no */ 30: int lno; /* for -x option */ 31: char *pat; /* search pattern */ 32: logical been_warned; /* set if noticed dup */ 33: struct nd_st *left,*right; /* left and right sons */ 34: }; 35: 36: long ftell(); 37: typedef struct nd_st NODE; 38: 39: logical number, /* T if on line starting with # */ 40: gotone, /* found a func already on line */ 41: /* boolean "func" (see init) */ 42: _wht[0177],_etk[0177],_itk[0177],_btk[0177],_gd[0177]; 43: 44: /* typedefs are recognized using a simple finite automata, 45: * tydef is its state variable. 46: */ 47: typedef enum {none, begin, middle, end } TYST; 48: 49: TYST tydef = none; 50: 51: char searchar = '/'; /* use /.../ searches */ 52: 53: int lineno; /* line number of current line */ 54: char line[4*BUFSIZ], /* current input line */ 55: *curfile, /* current input file name */ 56: *outfile= "tags", /* output file */ 57: *white = " \f\t\n", /* white chars */ 58: *endtk = " \t\n\"'#()[]{}=-+%*/&|^~!<>;,.:?", 59: /* token ending chars */ 60: *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz", 61: /* token starting chars */ 62: *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz0123456789", 63: /* valid in-token chars */ 64: *notgd = ",;"; /* non-valid after-function chars */ 65: 66: int file_num; /* current file number */ 67: int aflag; /* -a: append to tags */ 68: int tflag; /* -t: create tags for typedefs */ 69: int uflag; /* -u: update tags */ 70: int wflag; /* -w: suppress warnings */ 71: int vflag; /* -v: create vgrind style index output */ 72: int xflag; /* -x: create cxref style output */ 73: 74: char lbuf[BUFSIZ]; 75: 76: FILE *inf, /* ioptr for current input file */ 77: *outf; /* ioptr for tags file */ 78: 79: long lineftell; /* ftell after getc( inf ) == '\n' */ 80: 81: NODE *head; /* the head of the sorted binary tree */ 82: 83: char *savestr(); 84: char *rindex(); 85: main(ac,av) 86: int ac; 87: char *av[]; 88: { 89: char cmd[100]; 90: int i; 91: 92: while (ac > 1 && av[1][0] == '-') { 93: for (i=1; av[1][i]; i++) { 94: switch(av[1][i]) { 95: case 'B': 96: searchar='?'; 97: break; 98: case 'F': 99: searchar='/'; 100: break; 101: case 'a': 102: aflag++; 103: break; 104: case 't': 105: tflag++; 106: break; 107: case 'u': 108: uflag++; 109: break; 110: case 'w': 111: wflag++; 112: break; 113: case 'v': 114: vflag++; 115: xflag++; 116: break; 117: case 'x': 118: xflag++; 119: break; 120: default: 121: goto usage; 122: } 123: } 124: ac--; av++; 125: } 126: 127: if (ac <= 1) { 128: usage: printf("Usage: ctags [-BFatuwvx] file ...\n"); 129: exit(1); 130: } 131: 132: init(); /* set up boolean "functions" */ 133: /* 134: * loop through files finding functions 135: */ 136: for (file_num = 1; file_num < ac; file_num++) 137: find_entries(av[file_num]); 138: 139: if (xflag) { 140: put_entries(head); 141: exit(0); 142: } 143: if (uflag) { 144: for (i=1; i<ac; i++) { 145: sprintf(cmd, 146: "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS", 147: outfile, av[i], outfile); 148: system(cmd); 149: } 150: aflag++; 151: } 152: outf = fopen(outfile, aflag ? "a" : "w"); 153: if (outf == NULL) { 154: perror(outfile); 155: exit(1); 156: } 157: put_entries(head); 158: fclose(outf); 159: if (uflag) { 160: sprintf(cmd, "sort %s -o %s", outfile, outfile); 161: system(cmd); 162: } 163: exit(0); 164: } 165: 166: /* 167: * This routine sets up the boolean psuedo-functions which work 168: * by seting boolean flags dependent upon the corresponding character 169: * Every char which is NOT in that string is not a white char. Therefore, 170: * all of the array "_wht" is set to FALSE, and then the elements 171: * subscripted by the chars in "white" are set to TRUE. Thus "_wht" 172: * of a char is TRUE if it is the string "white", else FALSE. 173: */ 174: init() 175: { 176: 177: reg char *sp; 178: reg int i; 179: 180: for (i = 0; i < 0177; i++) { 181: _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE; 182: _gd[i] = TRUE; 183: } 184: for (sp = white; *sp; sp++) 185: _wht[*sp] = TRUE; 186: for (sp = endtk; *sp; sp++) 187: _etk[*sp] = TRUE; 188: for (sp = intk; *sp; sp++) 189: _itk[*sp] = TRUE; 190: for (sp = begtk; *sp; sp++) 191: _btk[*sp] = TRUE; 192: for (sp = notgd; *sp; sp++) 193: _gd[*sp] = FALSE; 194: } 195: 196: /* 197: * This routine opens the specified file and calls the function 198: * which finds the function and type definitions. 199: */ 200: find_entries(file) 201: char *file; 202: { 203: char *cp; 204: 205: if ((inf=fopen(file,"r")) == NULL) { 206: perror(file); 207: return; 208: } 209: curfile = savestr(file); 210: cp = rindex(file, '.'); 211: if (cp && (cp[1] != 'c' || cp[1] != 'h') && cp[2] == 0) { 212: if (PF_funcs(inf) == 0) { 213: rewind(inf); 214: C_entries(); 215: } 216: } else 217: C_entries(); 218: fclose(inf); 219: } 220: 221: pfnote(name, ln, f) 222: char *name; 223: logical f; /* f == TRUE when function */ 224: { 225: register char *fp; 226: register NODE *np; 227: char nbuf[BUFSIZ]; 228: 229: if ((np = (NODE *) malloc(sizeof (NODE))) == NULL) { 230: fprintf(stderr, "ctags: too many entries to sort\n"); 231: put_entries(head); 232: free_tree(head); 233: head = np = (NODE *) malloc(sizeof (NODE)); 234: } 235: if (xflag == 0 && !strcmp(name, "main")) { 236: fp = rindex(curfile, '/'); 237: if (fp == 0) 238: fp = curfile; 239: else 240: fp++; 241: sprintf(nbuf, "M%s", fp); 242: fp = rindex(nbuf, '.'); 243: if (fp && fp[2] == 0) 244: *fp = 0; 245: name = nbuf; 246: } 247: np->entry = savestr(name); 248: np->file = curfile; 249: np->f = f; 250: np->lno = ln; 251: np->left = np->right = 0; 252: if (xflag == 0) { 253: lbuf[50] = 0; 254: strcat(lbuf, "$"); 255: lbuf[50] = 0; 256: } 257: np->pat = savestr(lbuf); 258: if (head == NULL) 259: head = np; 260: else 261: add_node(np, head); 262: } 263: 264: /* 265: * This routine finds functions and typedefs in C syntax and adds them 266: * to the list. 267: */ 268: C_entries() 269: { 270: register int c; 271: register char *token, *tp; 272: logical incomm, inquote, inchar, midtoken; 273: int level; 274: char *sp; 275: char tok[BUFSIZ]; 276: 277: lineno = 1; 278: number = gotone = midtoken = inquote = inchar = incomm = FALSE; 279: level = 0; 280: sp = tp = token = line; 281: for (;;) { 282: *sp=c=getc(inf); 283: if (feof(inf)) 284: break; 285: if (c == '\n') 286: lineno++; 287: if (c == '\\') { 288: c = *++sp = getc(inf); 289: if (c = '\n') 290: c = ' '; 291: } else if (incomm) { 292: if (c == '*') { 293: while ((*++sp=c=getc(inf)) == '*') 294: continue; 295: if (c == '\n') 296: lineno++; 297: if (c == '/') 298: incomm = FALSE; 299: } 300: } else if (inquote) { 301: /* 302: * Too dumb to know about \" not being magic, but 303: * they usually occur in pairs anyway. 304: */ 305: if (c == '"') 306: inquote = FALSE; 307: continue; 308: } else if (inchar) { 309: if (c == '\'') 310: inchar = FALSE; 311: continue; 312: } else switch (c) { 313: case '"': 314: inquote = TRUE; 315: continue; 316: case '\'': 317: inchar = TRUE; 318: continue; 319: case '/': 320: if ((*++sp=c=getc(inf)) == '*') 321: incomm = TRUE; 322: else 323: ungetc(*sp, inf); 324: continue; 325: case '#': 326: if (sp == line) 327: number = TRUE; 328: continue; 329: case '{': 330: if (tydef == begin) { 331: tydef=middle; 332: } 333: level++; 334: continue; 335: case '}': 336: if (sp == line) 337: level = 0; /* reset */ 338: else 339: level--; 340: if (!level && tydef==middle) { 341: tydef=end; 342: } 343: continue; 344: } 345: if (!level && !inquote && !incomm && gotone == FALSE) { 346: if (midtoken) { 347: if (endtoken(c)) { 348: int f; 349: int pfline = lineno; 350: if (start_entry(&sp,token,&f)) { 351: strncpy(tok,token,tp-token+1); 352: tok[tp-token+1] = 0; 353: getline(); 354: pfnote(tok, pfline, f); 355: gotone = f; /* function */ 356: } 357: midtoken = FALSE; 358: token = sp; 359: } else if (intoken(c)) 360: tp++; 361: } else if (begtoken(c)) { 362: token = tp = sp; 363: midtoken = TRUE; 364: } 365: } 366: if (c == ';' && tydef==end) /* clean with typedefs */ 367: tydef=none; 368: sp++; 369: if (c == '\n' || sp > &line[sizeof (line) - BUFSIZ]) { 370: tp = token = sp = line; 371: lineftell = ftell(inf); 372: number = gotone = midtoken = inquote = inchar = FALSE; 373: } 374: } 375: } 376: 377: /* 378: * This routine checks to see if the current token is 379: * at the start of a function, or corresponds to a typedef 380: * It updates the input line * so that the '(' will be 381: * in it when it returns. 382: */ 383: start_entry(lp,token,f) 384: char **lp; 385: register char *token; 386: int *f; 387: { 388: 389: reg char c,*sp; 390: static logical found; 391: logical firsttok; /* T if have seen first token in ()'s */ 392: int bad; 393: 394: *f = 1; /* a function */ 395: sp = *lp; 396: c = *sp; 397: bad = FALSE; 398: if (!number) { /* space is not allowed in macro defs */ 399: while (iswhite(c)) { 400: *++sp = c = getc(inf); 401: if (c == '\n') { 402: lineno++; 403: if (sp > &line[sizeof (line) - BUFSIZ]) 404: goto ret; 405: } 406: } 407: /* the following tries to make it so that a #define a b(c) */ 408: /* doesn't count as a define of b. */ 409: } else { 410: if (!strncmp(token, "define", 6)) 411: found = 0; 412: else 413: found++; 414: if (found >= 2) { 415: gotone = TRUE; 416: badone: bad = TRUE; 417: goto ret; 418: } 419: } 420: /* check for the typedef cases */ 421: if (tflag && !strncmp(token, "typedef", 7)) { 422: tydef=begin; 423: goto badone; 424: } 425: if (tydef==begin && (!strncmp(token, "struct", 6) || 426: !strncmp(token, "union", 5) || !strncmp(token, "enum", 4))) { 427: goto badone; 428: } 429: if (tydef==begin) { 430: tydef=end; 431: goto badone; 432: } 433: if (tydef==end) { 434: *f = 0; 435: goto ret; 436: } 437: if (c != '(') 438: goto badone; 439: firsttok = FALSE; 440: while ((*++sp=c=getc(inf)) != ')') { 441: if (c == '\n') { 442: lineno++; 443: if (sp > &line[sizeof (line) - BUFSIZ]) 444: goto ret; 445: } 446: /* 447: * This line used to confuse ctags: 448: * int (*oldhup)(); 449: * This fixes it. A nonwhite char before the first 450: * token, other than a / (in case of a comment in there) 451: * makes this not a declaration. 452: */ 453: if (begtoken(c) || c=='/') firsttok++; 454: else if (!iswhite(c) && !firsttok) goto badone; 455: } 456: while (iswhite(*++sp=c=getc(inf))) 457: if (c == '\n') { 458: lineno++; 459: if (sp > &line[sizeof (line) - BUFSIZ]) 460: break; 461: } 462: ret: 463: *lp = --sp; 464: if (c == '\n') 465: lineno--; 466: ungetc(c,inf); 467: return !bad && (!*f || isgood(c)); 468: /* hack for typedefs */ 469: } 470: 471: getline() 472: { 473: long saveftell = ftell( inf ); 474: register char *cp; 475: 476: fseek( inf , lineftell , 0 ); 477: fgets(lbuf, sizeof lbuf, inf); 478: cp = rindex(lbuf, '\n'); 479: if (cp) 480: *cp = 0; 481: fseek(inf, saveftell, 0); 482: } 483: 484: free_tree(node) 485: NODE *node; 486: { 487: 488: while (node) { 489: free_tree(node->right); 490: cfree(node); 491: node = node->left; 492: } 493: } 494: 495: add_node(node, cur_node) 496: NODE *node,*cur_node; 497: { 498: register int dif; 499: 500: dif = strcmp(node->entry, cur_node->entry); 501: if (dif == 0) { 502: if (node->file == cur_node->file) { 503: if (!wflag) { 504: fprintf(stderr,"Duplicate entry in file %s, line %d: %s\n", 505: node->file,lineno,node->entry); 506: fprintf(stderr,"Second entry ignored\n"); 507: } 508: return; 509: } 510: if (!cur_node->been_warned) 511: if (!wflag) 512: fprintf(stderr,"Duplicate entry in files %s and %s: %s (Warning only)\n", 513: node->file, cur_node->file, node->entry); 514: cur_node->been_warned = TRUE; 515: return; 516: } 517: if (dif < 0) { 518: if (cur_node->left != NULL) 519: add_node(node,cur_node->left); 520: else 521: cur_node->left = node; 522: return; 523: } 524: if (cur_node->right != NULL) 525: add_node(node,cur_node->right); 526: else 527: cur_node->right = node; 528: } 529: 530: put_entries(node) 531: reg NODE *node; 532: { 533: reg char *sp; 534: 535: if (node == NULL) 536: return; 537: put_entries(node->left); 538: if (xflag == 0) 539: if (node->f) { /* a function */ 540: fprintf(outf, "%s\t%s\t%c^", 541: node->entry, node->file, searchar); 542: for (sp = node->pat; *sp; sp++) 543: if (*sp == '\\') 544: fprintf(outf, "\\\\"); 545: else if (*sp == searchar) 546: fprintf(outf, "\\%c", searchar); 547: else 548: putc(*sp, outf); 549: fprintf(outf, "%c\n", searchar); 550: } else { /* a typedef; text pattern inadequate */ 551: fprintf(outf, "%s\t%s\t%d\n", 552: node->entry, node->file, node->lno); 553: } 554: else if (vflag) 555: fprintf(stdout, "%s %s %d\n", 556: node->entry, node->file, (node->lno+63)/64); 557: else 558: fprintf(stdout, "%-16s%4d %-16s %s\n", 559: node->entry, node->lno, node->file, node->pat); 560: put_entries(node->right); 561: } 562: 563: char *dbp = lbuf; 564: int pfcnt; 565: 566: PF_funcs(fi) 567: FILE *fi; 568: { 569: 570: lineno = 0; 571: pfcnt = 0; 572: while (fgets(lbuf, sizeof(lbuf), fi)) { 573: lineno++; 574: dbp = lbuf; 575: if ( *dbp == '%' ) dbp++ ; /* Ratfor escape to fortran */ 576: while (isspace(*dbp)) 577: dbp++; 578: if (*dbp == 0) 579: continue; 580: switch (*dbp |' ') { 581: 582: case 'i': 583: if (tail("integer")) 584: takeprec(); 585: break; 586: case 'r': 587: if (tail("real")) 588: takeprec(); 589: break; 590: case 'l': 591: if (tail("logical")) 592: takeprec(); 593: break; 594: case 'c': 595: if (tail("complex") || tail("character")) 596: takeprec(); 597: break; 598: case 'd': 599: if (tail("double")) { 600: while (isspace(*dbp)) 601: dbp++; 602: if (*dbp == 0) 603: continue; 604: if (tail("precision")) 605: break; 606: continue; 607: } 608: break; 609: } 610: while (isspace(*dbp)) 611: dbp++; 612: if (*dbp == 0) 613: continue; 614: switch (*dbp|' ') { 615: 616: case 'f': 617: if (tail("function")) 618: getit(); 619: continue; 620: case 's': 621: if (tail("subroutine")) 622: getit(); 623: continue; 624: case 'p': 625: if (tail("program")) { 626: getit(); 627: continue; 628: } 629: if (tail("procedure")) 630: getit(); 631: continue; 632: } 633: } 634: return (pfcnt); 635: } 636: 637: tail(cp) 638: char *cp; 639: { 640: register int len = 0; 641: 642: while (*cp && (*cp&~' ') == ((*(dbp+len))&~' ')) 643: cp++, len++; 644: if (*cp == 0) { 645: dbp += len; 646: return (1); 647: } 648: return (0); 649: } 650: 651: takeprec() 652: { 653: 654: while (isspace(*dbp)) 655: dbp++; 656: if (*dbp != '*') 657: return; 658: dbp++; 659: while (isspace(*dbp)) 660: dbp++; 661: if (!isdigit(*dbp)) { 662: --dbp; /* force failure */ 663: return; 664: } 665: do 666: dbp++; 667: while (isdigit(*dbp)); 668: } 669: 670: getit() 671: { 672: register char *cp; 673: char c; 674: char nambuf[BUFSIZ]; 675: 676: for (cp = lbuf; *cp; cp++) 677: ; 678: *--cp = 0; /* zap newline */ 679: while (isspace(*dbp)) 680: dbp++; 681: if (*dbp == 0 || !isalpha(*dbp)) 682: return; 683: for (cp = dbp+1; *cp && (isalpha(*cp) || isdigit(*cp)); cp++) 684: continue; 685: c = cp[0]; 686: cp[0] = 0; 687: strcpy(nambuf, dbp); 688: cp[0] = c; 689: pfnote(nambuf, lineno, FALSE); 690: pfcnt++; 691: } 692: 693: char * 694: savestr(cp) 695: char *cp; 696: { 697: register int len; 698: register char *dp; 699: 700: len = strlen(cp); 701: dp = (char *)malloc(len+1); 702: strcpy(dp, cp); 703: return (dp); 704: } 705: 706: /* 707: * Return the ptr in sp at which the character c last 708: * appears; NULL if not found 709: * 710: * Identical to v7 rindex, included for portability. 711: */ 712: 713: char * 714: rindex(sp, c) 715: register char *sp, c; 716: { 717: register char *r; 718: 719: r = NULL; 720: do { 721: if (*sp == c) 722: r = sp; 723: } while (*sp++); 724: return(r); 725: }