lexi.c

Functions

addkey            X
lexi              X

Variables

chartype          X
specials          X

Macros

alphanum          X
opchar            X

struct's

templ             X

   1: /*
   2:  * Copyright (c) 1980 Regents of the University of California.
   3:  * All rights reserved.  The Berkeley software License Agreement
   4:  * specifies the terms and conditions for redistribution.
   5:  */
   6: 
   7: #ifndef lint
   8: static char sccsid[] = "@(#)lexi.c	5.4 (Berkeley) 9/10/85";
   9: #endif not lint
  10: 
  11: /*-
  12:  *
  13:  *			  Copyright (C) 1976
  14:  *				by the
  15:  *			  Board of Trustees
  16:  *				of the
  17:  *			University of Illinois
  18:  *
  19:  *			 All rights reserved
  20:  *
  21:  *
  22:  * NAME:
  23:  *	lexi
  24:  *
  25:  * FUNCTION:
  26:  *	This is the token scanner for indent
  27:  *
  28:  * ALGORITHM:
  29:  *	1) Strip off intervening blanks and/or tabs.
  30:  *	2) If it is an alphanumeric token, move it to the token buffer "token".
  31:  *	   Check if it is a special reserved word that indent will want to
  32:  *	   know about.
  33:  *	3) Non-alphanumeric tokens are handled with a big switch statement.  A
  34:  *	   flag is kept to remember if the last token was a "unary delimiter",
  35:  *	   which forces a following operator to be unary as opposed to binary.
  36:  *
  37:  * PARAMETERS:
  38:  *	None
  39:  *
  40:  * RETURNS:
  41:  *	An integer code indicating the type of token scanned.
  42:  *
  43:  * GLOBALS:
  44:  *	buf_ptr =
  45:  *	had_eof
  46:  *	ps.last_u_d =	Set to true iff this token is a "unary delimiter"
  47:  *
  48:  * CALLS:
  49:  *	fill_buffer
  50:  *	printf (lib)
  51:  *
  52:  * CALLED BY:
  53:  *	main
  54:  *
  55:  * NOTES:
  56:  *	Start of comment is passed back so that the comment can be scanned by
  57:  *	pr_comment.
  58:  *
  59:  *	Strings and character literals are returned just like identifiers.
  60:  *
  61:  * HISTORY:
  62:  *	initial coding 	November 1976	D A Willcox of CAC
  63:  *	1/7/77		D A Willcox of CAC	Fix to provide proper handling
  64:  *						of "int a -1;"
  65:  *
  66:  */
  67: 
  68: /*
  69:  * Here we have the token scanner for indent.  It scans off one token and
  70:  * puts it in the global variable "token".  It returns a code, indicating
  71:  * the type of token scanned.
  72:  */
  73: 
  74: #include "indent_globs.h";
  75: #include "indent_codes.h";
  76: #include "ctype.h"
  77: 
  78: #define alphanum 1
  79: #define opchar 3
  80: 
  81: struct templ {
  82:     char       *rwd;
  83:     int         rwcode;
  84: };
  85: 
  86: struct templ specials[100] =
  87: {
  88:     "switch", 1,
  89:     "case", 2,
  90:     "break", 0,
  91:     "struct", 3,
  92:     "union", 3,
  93:     "enum", 3,
  94:     "default", 2,
  95:     "int", 4,
  96:     "char", 4,
  97:     "float", 4,
  98:     "double", 4,
  99:     "long", 4,
 100:     "short", 4,
 101:     "typdef", 4,
 102:     "unsigned", 4,
 103:     "register", 4,
 104:     "static", 4,
 105:     "global", 4,
 106:     "extern", 4,
 107:     "void", 4,
 108:     "goto", 0,
 109:     "return", 0,
 110:     "if", 5,
 111:     "while", 5,
 112:     "for", 5,
 113:     "else", 6,
 114:     "do", 6,
 115:     "sizeof", 7,
 116:     0, 0
 117: };
 118: 
 119: char        chartype[128] =
 120: {               /* this is used to facilitate the decision
 121: 				 * of what type (alphanumeric, operator)
 122: 				 * each character is */
 123:     0, 0, 0, 0, 0, 0, 0, 0,
 124:     0, 0, 0, 0, 0, 0, 0, 0,
 125:     0, 0, 0, 0, 0, 0, 0, 0,
 126:     0, 0, 0, 0, 0, 0, 0, 0,
 127:     0, 3, 0, 0, 0, 3, 3, 0,
 128:     0, 0, 3, 3, 0, 3, 3, 3,
 129:     1, 1, 1, 1, 1, 1, 1, 1,
 130:     1, 1, 0, 0, 3, 3, 3, 3,
 131:     0, 1, 1, 1, 1, 1, 1, 1,
 132:     1, 1, 1, 1, 1, 1, 1, 1,
 133:     1, 1, 1, 1, 1, 1, 1, 1,
 134:     1, 1, 1, 0, 0, 0, 3, 1,
 135:     0, 1, 1, 1, 1, 1, 1, 1,
 136:     1, 1, 1, 1, 1, 1, 1, 1,
 137:     1, 1, 1, 1, 1, 1, 1, 1,
 138:     1, 1, 1, 0, 3, 0, 3, 0
 139: };
 140: 
 141: 
 142: 
 143: 
 144: int
 145: lexi()
 146: {
 147:     register char *tok;     /* local pointer to next char in token */
 148:     int         unary_delim;    /* this is set to 1 if the current token
 149: 				 *
 150: 				 * forces a following operator to be unary */
 151:     static int  last_code;  /* the last token type returned */
 152:     static int  l_struct;   /* set to 1 if the last token was 'struct' */
 153:     int         code;       /* internal code to be returned */
 154:     char        qchar;      /* the delimiter character for a string */
 155: 
 156:     tok = token;        /* point to start of place to save token */
 157:     unary_delim = false;
 158:     ps.col_1 = ps.last_nl;  /* tell world that this token started in
 159: 				 * column 1 iff the last thing scanned was
 160: 				 * nl */
 161:     ps.last_nl = false;
 162: 
 163:     while (*buf_ptr == ' ' || *buf_ptr == '\t') {   /* get rid of blanks */
 164:     ps.col_1 = false;   /* leading blanks imply token is not in
 165: 				 * column 1 */
 166:     if (++buf_ptr >= buf_end)
 167:         fill_buffer();
 168:     }
 169: 
 170:     /* Scan an alphanumeric token.  Note that we must also handle
 171:      * stuff like "1.0e+03" and "7e-6". */
 172:     if (chartype[*buf_ptr & 0177] == alphanum) {    /* we have a character
 173: 							 * or number */
 174:     register char *j;   /* used for searching thru list of
 175: 				 * reserved words */
 176:     register struct templ *p;
 177:     register int c;
 178: 
 179:     do {            /* copy it over */
 180:         *tok++ = *buf_ptr++;
 181:         if (buf_ptr >= buf_end)
 182:         fill_buffer();
 183:     } while (chartype[c = *buf_ptr & 0177] == alphanum ||
 184:         isdigit(token[0]) && (c == '+' || c == '-') &&
 185:         (tok[-1] == 'e' || tok[-1] == 'E'));
 186:     *tok++ = '\0';
 187:     while (*buf_ptr == ' ' || *buf_ptr == '\t') {   /* get rid of blanks */
 188:         if (++buf_ptr >= buf_end)
 189:         fill_buffer();
 190:     }
 191:     ps.its_a_keyword = false;
 192:     ps.sizeof_keyword = false;
 193:     if (l_struct) {     /* if last token was 'struct', then this
 194: 				 * token should be treated as a
 195: 				 * declaration */
 196:         l_struct = false;
 197:         last_code = ident;
 198:         ps.last_u_d = true;
 199:         return (decl);
 200:     }
 201:     ps.last_u_d = false;    /* Operator after indentifier is binary */
 202:     last_code = ident;  /* Remember that this is the code we will
 203: 				 * return */
 204: 
 205:     /*
 206: 	 * This loop will check if the token is a keyword.
 207: 	 */
 208:     for (p = specials; (j = p->rwd) != 0; p++) {
 209:         tok = token;    /* point at scanned token */
 210:         if (*j++ != *tok++ || *j++ != *tok++)
 211:         continue;   /* This test depends on the fact that
 212: 				 * identifiers are always at least 1
 213: 				 * character long (ie. the first two bytes
 214: 				 * of the identifier are always
 215: 				 * meaningful) */
 216:         if (tok[-1] == 0)
 217:         break;      /* If its a one-character identifier */
 218:         while (*tok++ == *j)
 219:         if (*j++ == 0)
 220:             goto found_keyword; /* I wish that C had a multi-level
 221: 					 * break... */
 222:     }
 223:     if (p->rwd) {       /* we have a keyword */
 224:     found_keyword:
 225:         ps.its_a_keyword = true;
 226:         ps.last_u_d = true;
 227:         switch (p->rwcode) {
 228:         case 1: /* it is a switch */
 229:             return (swstmt);
 230:         case 2: /* a case or default */
 231:             return (casestmt);
 232: 
 233:         case 3: /* a "struct" */
 234:             if (ps.p_l_follow)
 235:             break;  /* inside parens: cast */
 236:             l_struct = true;
 237: 
 238:             /*
 239: 		     * Next time around, we will want to know that we have
 240: 		     * had a 'struct'
 241: 		     */
 242:         case 4: /* one of the declaration keywords */
 243:             if (ps.p_l_follow) {
 244:             ps.cast_mask |= 1 << ps.p_l_follow;
 245:             break;  /* inside parens: cast */
 246:             }
 247:             last_code = decl;
 248:             return (decl);
 249: 
 250:         case 5: /* if, while, for */
 251:             return (sp_paren);
 252: 
 253:         case 6: /* do, else */
 254:             return (sp_nparen);
 255: 
 256:         case 7:
 257:             ps.sizeof_keyword = true;
 258:         default:    /* all others are treated like any other
 259: 				 * identifier */
 260:             return (ident);
 261:         }           /* end of switch */
 262:     }           /* end of if (found_it) */
 263:     if (*buf_ptr == '(' && ps.tos <= 1 && ps.ind_level == 0
 264:         && (buf_ptr[1] != ')' || buf_ptr[2] != ';')) {
 265:         strncpy(ps.procname, token, sizeof ps.procname - 1);
 266:         ps.in_parameter_declaration = 1;
 267:     }
 268: 
 269:     /*
 270: 	 * The following hack attempts to guess whether or not the current
 271: 	 * token is in fact a declaration keyword -- one that has been
 272: 	 * typedefd
 273: 	 */
 274:     if (((*buf_ptr == '*' && buf_ptr[1] != '=') || isalpha(*buf_ptr))
 275:         && !ps.p_l_follow
 276:         && (ps.last_token == rparen || ps.last_token == semicolon ||
 277:         ps.last_token == decl ||
 278:         ps.last_token == lbrace || ps.last_token == rbrace)) {
 279:         ps.its_a_keyword = true;
 280:         ps.last_u_d = true;
 281:         last_code = decl;
 282:         return decl;
 283:     }
 284:     if (last_code == decl)  /* if this is a declared variable, then
 285: 				 * following sign is unary */
 286:         ps.last_u_d = true; /* will make "int a -1" work */
 287:     last_code = ident;
 288:     return (ident);     /* the ident is not in the list */
 289:     }               /* end of procesing for alpanum character */
 290:     /* Scan a non-alphanumeric token */
 291: 
 292:     *tok++ = *buf_ptr;      /* if it is only a one-character token, it
 293: 				 * is moved here */
 294:     *tok = '\0';
 295:     if (++buf_ptr >= buf_end)
 296:     fill_buffer();
 297: 
 298:     switch (*token) {
 299:     case '\n':
 300:         unary_delim = ps.last_u_d;
 301:         ps.last_nl = true;  /* remember that we just had a newline */
 302:         code = (had_eof ? 0 : newline);
 303: 
 304:         /*
 305: 	     * if data has been exausted, the newline is a dummy, and we
 306: 	     * should return code to stop
 307: 	     */
 308:         break;
 309: 
 310:     case '\'':      /* start of quoted character */
 311:     case '"':       /* start of string */
 312:         qchar = *token;
 313:         if (troff) {
 314:         tok[-1] = '`';
 315:         if (qchar == '"')
 316:             *tok++ = '`';
 317:         *tok++ = BACKSLASH;
 318:         *tok++ = 'f';
 319:         *tok++ = 'L';
 320:         }
 321:         do {        /* copy the string */
 322:         while (1) { /* move one character or [/<char>]<char> */
 323:             if (*buf_ptr == '\n') {
 324:             printf("%d: Unterminated literal\n", line_no);
 325:             goto stop_lit;
 326:             }
 327:             *tok = *buf_ptr++;
 328:             if (buf_ptr >= buf_end)
 329:             fill_buffer();
 330:             if (had_eof || ((tok - token) > (bufsize - 2))) {
 331:             printf("Unterminated literal\n");
 332:             ++tok;
 333:             goto stop_lit;
 334:             /* get outof literal copying loop */
 335:             }
 336:             if (*tok == BACKSLASH) {    /* if escape, copy extra
 337: 						 * char */
 338:             if (*buf_ptr == '\n')   /* check for escaped
 339: 						 * newline */
 340:                 ++line_no;
 341:             if (troff) {
 342:                 *++tok = BACKSLASH;
 343:                 if (*buf_ptr == BACKSLASH)
 344:                 *++tok = BACKSLASH;
 345:             }
 346:             *++tok = *buf_ptr++;
 347:             ++tok;  /* we must increment this again because we
 348: 				 * copied two chars */
 349:             if (buf_ptr >= buf_end)
 350:                 fill_buffer();
 351:             }
 352:             else
 353:             break;  /* we copied one character */
 354:         }       /* end of while (1) */
 355:         } while (*tok++ != qchar);
 356:         if (troff) {
 357:         tok[-1] = BACKSLASH;
 358:         *tok++ = 'f';
 359:         *tok++ = 'R';
 360:         *tok++ = '\'';
 361:         if (qchar == '"')
 362:             *tok++ = '\'';
 363:         }
 364:     stop_lit:
 365:         code = ident;
 366:         break;
 367: 
 368:     case ('('):
 369:     case ('['):
 370:         unary_delim = true;
 371:         code = lparen;
 372:         break;
 373: 
 374:     case (')'):
 375:     case (']'):
 376:         code = rparen;
 377:         break;
 378: 
 379:     case '#':
 380:         unary_delim = ps.last_u_d;
 381:         code = preesc;
 382:         break;
 383: 
 384:     case '?':
 385:         unary_delim = true;
 386:         code = question;
 387:         break;
 388: 
 389:     case (':'):
 390:         code = colon;
 391:         unary_delim = true;
 392:         break;
 393: 
 394:     case (';'):
 395:         unary_delim = true;
 396:         code = semicolon;
 397:         break;
 398: 
 399:     case ('{'):
 400:         unary_delim = true;
 401: 
 402:         /*
 403: 	     * if (ps.in_or_st) ps.block_init = 1;
 404: 	     */
 405:         code = ps.block_init ? lparen : lbrace;
 406:         break;
 407: 
 408:     case ('}'):
 409:         unary_delim = true;
 410:         code = ps.block_init ? rparen : rbrace;
 411:         break;
 412: 
 413:     case 014:       /* a form feed */
 414:         unary_delim = ps.last_u_d;
 415:         ps.last_nl = true;  /* remember this so we can set 'ps.col_1'
 416: 				 * right */
 417:         code = form_feed;
 418:         break;
 419: 
 420:     case (','):
 421:         unary_delim = true;
 422:         code = comma;
 423:         break;
 424: 
 425:     case '.':
 426:         unary_delim = false;
 427:         code = period;
 428:         break;
 429: 
 430:     case '-':
 431:     case '+':       /* check for -, +, --, ++ */
 432:         code = (ps.last_u_d ? unary_op : binary_op);
 433:         unary_delim = true;
 434: 
 435:         if (*buf_ptr == token[0]) {
 436:         /* check for doubled character */
 437:         *tok++ = *buf_ptr++;
 438:         /* buffer overflow will be checked at end of loop */
 439:         if (last_code == ident || last_code == rparen) {
 440:             code = (ps.last_u_d ? unary_op : postop);
 441:             /* check for following ++ or -- */
 442:             unary_delim = false;
 443:         }
 444:         }
 445:         else if (*buf_ptr == '=')
 446:         /* check for operator += */
 447:         *tok++ = *buf_ptr++;
 448:         else if (token[0] == '-' && *buf_ptr == '>') {
 449:         /* check for operator -> */
 450:         *tok++ = *buf_ptr++;
 451:         if (!pointer_as_binop) {
 452:             code = unary_op;
 453:             unary_delim = false;
 454:             ps.want_blank = false;
 455:         }
 456:         }
 457:         /* buffer overflow will be checked at end of switch */
 458: 
 459:         break;
 460: 
 461:     case '=':
 462:         if (ps.in_or_st)
 463:         ps.block_init = 1;
 464:         if (chartype[*buf_ptr] == opchar) { /* we have two char
 465: 						 * assignment */
 466:         tok[-1] = *buf_ptr++;
 467:         if ((tok[-1] == '<' || tok[-1] == '>') && tok[-1] == *buf_ptr)
 468:             *tok++ = *buf_ptr++;
 469:         *tok++ = '=';   /* Flip =+ to += */
 470:         *tok = 0;
 471:         }
 472:         code = binary_op;
 473:         unary_delim = true;
 474:         break;
 475:         /* can drop thru!!! */
 476: 
 477:     case '>':
 478:     case '<':
 479:     case '!':       /* ops like <, <<, <=, !=, etc */
 480:         if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
 481:         *tok++ = *buf_ptr;
 482:         if (++buf_ptr >= buf_end)
 483:             fill_buffer();
 484:         }
 485:         if (*buf_ptr == '=')
 486:         *tok++ = *buf_ptr++;
 487:         code = (ps.last_u_d ? unary_op : binary_op);
 488:         unary_delim = true;
 489:         break;
 490: 
 491:     default:
 492:         if (token[0] == '/' && *buf_ptr == '*') {
 493:         /* it is start of comment */
 494:         *tok++ = '*';
 495: 
 496:         if (++buf_ptr >= buf_end)
 497:             fill_buffer();
 498: 
 499:         code = comment;
 500:         unary_delim = ps.last_u_d;
 501:         break;
 502:         }
 503:         while (*(tok - 1) == *buf_ptr || *buf_ptr == '=') {
 504:         /* handle ||, &&, etc, and also things as in int *****i */
 505:         *tok++ = *buf_ptr;
 506:         if (++buf_ptr >= buf_end)
 507:             fill_buffer();
 508:         }
 509:         code = (ps.last_u_d ? unary_op : binary_op);
 510:         unary_delim = true;
 511: 
 512: 
 513:     }               /* end of switch */
 514:     if (code != newline) {
 515:     l_struct = false;
 516:     last_code = code;
 517:     }
 518:     if (buf_ptr >= buf_end) /* check for input buffer empty */
 519:     fill_buffer();
 520:     ps.last_u_d = unary_delim;
 521:     *tok = '\0';        /* null terminate the token */
 522:     return (code);
 523: };
 524: 
 525: /* Add the given keyword to the keyword table, using val as the keyword type
 526:    */
 527: addkey (key, val)
 528: char       *key;
 529: {
 530:     register struct templ *p = specials;
 531:     while (p->rwd)
 532:     if (p->rwd[0] == key[0] && strcmp(p->rwd, key) == 0)
 533:         return;
 534:     else
 535:         p++;
 536:     if (p >= specials + sizeof specials / sizeof specials[0])
 537:     return;         /* For now, table overflows are silently
 538: 				   ignored */
 539:     p->rwd = key;
 540:     p->rwcode = val;
 541:     p[1].rwd = 0;
 542:     p[1].rwcode = 0;
 543:     return;
 544: }

Defined functions

addkey defined in line 527; used 1 times

in /usr/src/ucb/indent/args.c line 225

lexi defined in line 144; used 2 times

in /usr/src/ucb/indent/indent.c line 239, 369

Defined variables

chartype defined in line 119; used 3 times

in line 172, 183, 464

sccsid defined in line 8; never used

specials defined in line 86; used 5 times

in line 208, 530, 536(3)

Defined struct's

templ defined in line 81; used 6 times

in line 86(2), 176(2), 530(2)

Defined macros

alphanum defined in line 78; used 2 times

in line 172, 183

opchar defined in line 79; used 1 times

in line 464

Last modified: 1985-09-10
Generated: 2016-12-26

Generated by src2html V0.67
page hit count: 1660