lexi.c

Functions

lexi              X

Variables

chartype          X
last_nl           X
specials          X

Macros

alphanum          X
opchar            X

struct's

templ             X

   1: static char sccsid[] = "@(#)lexi.c	4.1	(Berkeley)	10/21/82";
   2: 
   3: /*
   4: 
   5: 			  Copyright (C) 1976
   6: 				by the
   7: 			  Board of Trustees
   8: 				of the
   9: 			University of Illinois
  10: 
  11: 			 All rights reserved
  12: 
  13: 
  14: NAME:
  15: 	lexi
  16: 
  17: FUNCTION:
  18: 	This is the token scanner for indent
  19: 
  20: ALGORITHM:
  21: 	1) Strip off intervening blanks and/or tabs.
  22: 	2) If it is an alphanumeric token, move it to the token buffer "token".
  23: 	   Check if it is a special reserved word that indent will want to
  24: 	   know about.
  25: 	3) Non-alphanumeric tokens are handled with a big switch statement.  A
  26: 	   flag is kept to remember if the last token was a "unary delimiter",
  27: 	   which forces a following operator to be unary as opposed to binary.
  28: 
  29: PARAMETERS:
  30: 	None
  31: 
  32: RETURNS:
  33: 	An integer code indicating the type of token scanned.
  34: 
  35: GLOBALS:
  36: 	buf_ptr =
  37: 	had_eof
  38: 	last_u_d =	Set to true iff this token is a "unary delimiter"
  39: 
  40: CALLS:
  41: 	fill_buffer
  42: 	printf (lib)
  43: 
  44: CALLED BY:
  45: 	main
  46: 
  47: NOTES:
  48: 	Start of comment is passed back so that the comment can be scanned by
  49: 	pr_comment.
  50: 
  51: 	Strings and character literals are returned just like identifiers.
  52: 
  53: HISTORY:
  54: 	initial coding 	November 1976	D A Willcox of CAC
  55: 	1/7/77		D A Willcox of CAC	Fix to provide proper handling
  56: 						of "int a -1;"
  57: 
  58: */
  59: 
  60: /* Here we have the token scanner for indent.  It scans off one token and
  61:    puts it in the global variable "token".  It returns a code, indicating the
  62:    type of token scanned. */
  63: 
  64: #include "indent_globs.h";
  65: #include "indent_codes.h";
  66: 
  67: 
  68: 
  69: #define alphanum 1
  70: #define opchar 3
  71: 
  72: struct templ {
  73:     char   *rwd;
  74:     int     rwcode;
  75: };
  76: 
  77: struct templ    specials[] =
  78: {
  79:     "switch", 1,
  80:     "case", 2,
  81:     "struct", 3,
  82:     "default", 2,
  83:     "int", 4,
  84:     "char", 4,
  85:     "float", 4,
  86:     "double", 4,
  87:     "long", 4,
  88:     "short", 4,
  89:     "typdef", 4,
  90:     "unsigned", 4,
  91:     "register", 4,
  92:     "static", 4,
  93:     "global", 4,
  94:     "extern", 4,
  95:     "if", 5,
  96:     "while", 5,
  97:     "for", 5,
  98:     "else", 6,
  99:     "do", 6,
 100:     "sizeof", 0,
 101:     0, 0
 102: };
 103: 
 104: char    chartype[128] =
 105: {          /* this is used to facilitate the decision of what type
 106: 		      (alphanumeric, operator) each character is */
 107:     0, 0, 0, 0, 0, 0, 0, 0,
 108:     0, 0, 0, 0, 0, 0, 0, 0,
 109:     0, 0, 0, 0, 0, 0, 0, 0,
 110:     0, 0, 0, 0, 0, 0, 0, 0,
 111:     0, 3, 0, 0, 0, 3, 3, 0,
 112:     0, 0, 3, 3, 0, 3, 3, 3,
 113:     1, 1, 1, 1, 1, 1, 1, 1,
 114:     1, 1, 0, 0, 3, 3, 3, 3,
 115:     0, 1, 1, 1, 1, 1, 1, 1,
 116:     1, 1, 1, 1, 1, 1, 1, 1,
 117:     1, 1, 1, 1, 1, 1, 1, 1,
 118:     1, 1, 1, 0, 0, 0, 3, 1,
 119:     0, 1, 1, 1, 1, 1, 1, 1,
 120:     1, 1, 1, 1, 1, 1, 1, 1,
 121:     1, 1, 1, 1, 1, 1, 1, 1,
 122:     1, 1, 1, 0, 3, 0, 3, 0
 123: };
 124: 
 125: int     last_nl = true;
 126:  /* this is true if the last thing scanned was a newline */
 127: 
 128: 
 129: 
 130: int     lexi () {
 131:     register char  *tok;
 132:  /* local pointer to next char in token */
 133:     register int    i;
 134:  /* local loop counter */
 135:     register char  *j;
 136:  /* used for searching thru list of reserved words */
 137:     int     unary_delim;
 138:  /* this is set to 1 if the current token forces a following operator to be
 139:     unary */
 140:     static int  last_code;
 141:  /* the last token type returned */
 142:     static int  l_struct;
 143:  /* set to 1 if the last token was 'struct' */
 144:     int     found_it;
 145:     int     code;  /* internal code to be returned */
 146:     char    qchar; /* the delimiter character for a string */
 147: 
 148:     tok = token;           /* point to start of place to save token */
 149:     unary_delim = false;
 150:     col_1 = last_nl;           /* tell world that this token started in column
 151: 			          1 iff the last thing scanned was nl */
 152:     last_nl = false;
 153: 
 154:     while (*buf_ptr == ' ' || *buf_ptr == '\t') {
 155:     /* get rid of blanks */
 156:     col_1 = false;         /* leading blanks imply token is not in column 1
 157: 			          */
 158:     if (++buf_ptr >= buf_end)
 159:         fill_buffer ();
 160:     }
 161: 
 162: /*----------------------------------------------------------*\
 163: |    Scan an alphanumeric token
 164: \*----------------------------------------------------------*/
 165: 
 166:     if (chartype[*buf_ptr & 0177] == alphanum) {
 167:     /* we have a character or number */
 168:     while (chartype[*buf_ptr & 0177] == alphanum) {
 169:     /* copy it over */
 170:         *tok++ = *buf_ptr++;
 171:         if (buf_ptr >= buf_end)
 172:         fill_buffer ();
 173:     }
 174: 
 175:     *tok++ = '\0';
 176: 
 177:     if (l_struct) {        /* if last token was 'struct', then this token
 178: 			          should be treated as a declaration */
 179:         l_struct = false;
 180:         last_code = ident;
 181:         last_u_d = true;
 182:         return (decl);
 183:     }
 184: 
 185:     last_u_d = false;      /* operator after indentifier is binary */
 186: 
 187:     for (i = 0; specials[i].rwd != 0; ++i) {
 188:     /* this loop will check if the token is a keyword.  if so, a following
 189: 	   operator is unary */
 190:         last_code = ident; /* remember that this is the code we will return
 191: 			          */
 192:         j = specials[i].rwd;
 193:     /* point at ith reserved word */
 194:         tok = token;       /* point at scanned toekn */
 195:         found_it = true;   /* set to false if not found */
 196:         do {
 197:         if (*tok++ != *j) {
 198:             found_it = false;
 199:             break;
 200:         }
 201:         } while (*j++);
 202: 
 203:         if (found_it) {    /* we have a keyword */
 204:         last_u_d = true;
 205:         switch (specials[i].rwcode) {
 206:             case 1:    /* it is a switch */
 207:             return (swstmt);
 208:             case 2:    /* a case or default */
 209:             return (casestmt);
 210: 
 211:             case 3:    /* a "struct" */
 212:             l_struct = true;
 213:             /* Next time around, we will want to know that we have had
 214: 		       a 'struct' */
 215:             case 4:    /* one of the declaration keywords */
 216:             if(p_l_follow) break;   /* inside parens: cast */
 217:             last_code = decl;
 218:             return (decl);
 219: 
 220:             case 5:    /* if, while, for */
 221:             return (sp_paren);
 222: 
 223:             case 6:    /* do, else */
 224:             return (sp_nparen);
 225: 
 226:             default:   /* all others are treated like any other
 227: 			          identifier */
 228:             return (ident);
 229:         }          /* end of switch */
 230:         }              /* end of if (found_it) */
 231: 
 232:     }
 233: 
 234:     if (last_code == decl) /* if this is a declared variable, then
 235: 			          following sign is unary */
 236:         last_u_d = true;   /* will make "int a -1" work */
 237:     last_code = ident;
 238:     return (ident);        /* the ident is not in the list */
 239:     }                  /* end of procesing for alpanum character */
 240: 
 241: 
 242: 
 243: /*----------------------------------------------------------*\
 244: |   Scan a non-alphanumeric token
 245: \*----------------------------------------------------------*/
 246: 
 247:     *tok++ = *buf_ptr;         /* if it is only a one-character token, it is
 248: 			          moved here */
 249:     *tok = '\0';
 250:     if (++buf_ptr >= buf_end)
 251:     fill_buffer ();
 252: 
 253:     switch (*token) {
 254:     case '\n':
 255:         unary_delim = last_u_d;
 256:         last_nl = true;    /* remember that we just had a newline */
 257:         code = (had_eof ? 0 : newline);
 258:     /* if data has been exausted, the newline is a dummy, and we should
 259: 	   return code to stop */
 260:         break;
 261: 
 262:     case '\'':         /* start of quoted character */
 263:         qchar = '\'';      /* remember final delimiter */
 264:         goto copy_lit;     /* and go to common literal code */
 265: 
 266:     case '"':          /* start of string */
 267:         qchar = '"';
 268: 
 269:     copy_lit:
 270:         do {           /* copy the string */
 271:         while (1) {    /* move one character or [/<char>]<char> */
 272:             if (*buf_ptr == '\n') {
 273:             /* check for unterminated literal */
 274:             printf ("%d: Unterminated literal\n", line_no);
 275:             goto stop_lit;
 276:             /* Don't copy any more */
 277:             }
 278: 
 279:             *tok = *buf_ptr++;
 280:             if (buf_ptr >= buf_end)
 281:             fill_buffer ();
 282:             if (had_eof || ((tok - token) > (bufsize - 2))) {
 283:             printf ("Unterminated literal\n");
 284:             ++tok;
 285:             goto stop_lit;
 286:             /* get outof literal copying loop */
 287:             }
 288: 
 289:             if (*tok == '\\') {
 290:             /* if escape, copy extra char */
 291:             if (*buf_ptr == '\n')
 292:                    /* check for escaped newline */
 293:                 ++line_no;
 294:             *(++tok) = *buf_ptr++;
 295:             ++tok; /* we must increment this again because we
 296: 			          copied two chars */
 297:             if (buf_ptr >= buf_end)
 298:                 fill_buffer ();
 299:             }
 300:             else
 301:             break; /* we copied one character */
 302:         }          /* end of while (1) */
 303:         } while (*tok++ != qchar);
 304: 
 305:     stop_lit:
 306:         code = ident;
 307:         break;
 308: 
 309:     case ('('):
 310:     case ('['):
 311:         unary_delim = true;
 312:         code = lparen;
 313:         break;
 314: 
 315:     case (')'):
 316:     case (']'):
 317:         code = rparen;
 318:         break;
 319: 
 320:     case '#':
 321:         unary_delim = last_u_d;
 322:         code = preesc;
 323:         break;
 324: 
 325:     case '?':
 326:         unary_delim = true;
 327:         code = question;
 328:         break;
 329: 
 330:     case (':'):
 331:         code = colon;
 332:         unary_delim = true;
 333:         break;
 334: 
 335:     case (';'):
 336:         unary_delim = true;
 337:         code = semicolon;
 338:         break;
 339: 
 340:     case ('{'):
 341:         unary_delim = true;
 342:         code = lbrace;
 343:         break;
 344: 
 345:     case ('}'):
 346:         unary_delim = true;
 347:         code = rbrace;
 348:         break;
 349: 
 350:     case 014:          /* a form feed */
 351:         unary_delim = last_u_d;
 352:         last_nl = true;    /* remember this so we can set 'col_1' right */
 353:         code = form_feed;
 354:         break;
 355: 
 356:     case (','):
 357:         unary_delim = true;
 358:         code = comma;
 359:         break;
 360: 
 361:     case '.':
 362:         unary_delim = false;
 363:         code = period;
 364:         break;
 365: 
 366:     case '-':
 367:     case '+':          /* check for -, +, --, ++ */
 368:         code = (last_u_d ? unary_op : binary_op);
 369:         unary_delim = true;
 370: 
 371:         if (*buf_ptr == token[0]) {
 372:         /* check for doubled character */
 373:         *tok++ = *buf_ptr++;
 374:         /* buffer overflow will be checked at end of loop */
 375:         if (last_code == ident || last_code == rparen) {
 376:             code = (last_u_d ? unary_op : postop);
 377:         /* check for following ++ or -- */
 378:             unary_delim = false;
 379:         }
 380:         }
 381:         else
 382:         if (*buf_ptr == '>' || *buf_ptr == '=')
 383:                    /* check for operator -> or += */
 384:             *tok++ = *buf_ptr++;
 385:     /* buffer overflow will be checked at end of switch */
 386: 
 387:         break;
 388: 
 389:     case '=':
 390:         if (chartype[*buf_ptr] == opchar) {
 391:         /* we have two char assignment */
 392:         *tok++ = *buf_ptr;
 393:         /* move second character */
 394:         if (++buf_ptr >= buf_end)
 395:             fill_buffer ();
 396:         }
 397: 
 398:         code = binary_op;
 399:         unary_delim = true;
 400:         if (token[1] != '<' && token[1] != '>')
 401:                    /* check for possible 3 char operator */
 402:         break;
 403:     /* can drop thru!!! */
 404: 
 405:     case '>':
 406:     case '<':
 407:     case '!':          /* ops like <, <<, <=, !=, etc */
 408:         if (*buf_ptr == '>' || *buf_ptr == '<' || *buf_ptr == '=') {
 409:         *tok++ = *buf_ptr;
 410:         if (++buf_ptr >= buf_end)
 411:             fill_buffer ();
 412:         }
 413: 
 414:         if (*buf_ptr == '=')
 415:          *tok++ = *buf_ptr++;
 416:         code = (last_u_d ? unary_op : binary_op);
 417:         unary_delim = true;
 418:         break;
 419: 
 420:     default:
 421:         if (token[0] == '/' && *buf_ptr == '*') {
 422:         /* it is start of comment */
 423:         *tok++ = '*';
 424: 
 425:         if (++buf_ptr >= buf_end)
 426:             fill_buffer ();
 427: 
 428:         code = comment;
 429:         unary_delim = last_u_d;
 430:         break;
 431:         }
 432: 
 433:         while (*(tok - 1) == *buf_ptr || *buf_ptr=='=') {
 434:         /* handle ||, &&, etc, and also things as in int *****i */
 435:         *tok++ = *buf_ptr;
 436:         if (++buf_ptr >= buf_end)
 437:             fill_buffer ();
 438:         }
 439: 
 440: 
 441:         code = (last_u_d ? unary_op : binary_op);
 442:         unary_delim = true;
 443: 
 444: 
 445:     }                  /* end of switch */
 446: 
 447:     if (code != newline) {
 448:     l_struct = false;
 449:     last_code = code;
 450:     }
 451: 
 452:     if (buf_ptr >= buf_end)    /* check for input buffer empty */
 453:     fill_buffer ();
 454:     last_u_d = unary_delim;
 455:     *tok = '\0';           /* null terminate the token */
 456:     return (code);
 457: };

Defined functions

lexi defined in line 130; used 2 times

in /usr/src/ucb/indent/indent.c line 323, 458

Defined variables

chartype defined in line 104; used 3 times

in line 166-168(2), 390

last_nl defined in line 125; used 4 times

in line 150-152(2), 256, 352

sccsid defined in line 1; never used

specials defined in line 77; used 3 times

in line 187-192(2), 205

Defined struct's

templ defined in line 72; used 2 times

in line 77(2)

Defined macros

alphanum defined in line 69; used 2 times

in line 166-168(2)

opchar defined in line 70; used 1 times

in line 390

Last modified: 1983-09-04
Generated: 2016-12-26

Generated by src2html V0.67
page hit count: 1218