awkgram.y (13931B)
1 /**************************************************************** 2 Copyright (C) Lucent Technologies 1997 3 All Rights Reserved 4 5 Permission to use, copy, modify, and distribute this software and 6 its documentation for any purpose and without fee is hereby 7 granted, provided that the above copyright notice appear in all 8 copies and that both that the copyright notice and this 9 permission notice and warranty disclaimer appear in supporting 10 documentation, and that the name Lucent Technologies or any of 11 its entities not be used in advertising or publicity pertaining 12 to distribution of the software without specific, written prior 13 permission. 14 15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. 17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY 18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER 20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, 21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF 22 THIS SOFTWARE. 23 ****************************************************************/ 24 25 %{ 26 #include <stdio.h> 27 #include <string.h> 28 #include "awk.h" 29 30 #define makedfa(a,b) compre(a) 31 32 void checkdup(Node *list, Cell *item); 33 int yywrap(void) { return(1); } 34 35 Node *beginloc = 0; 36 Node *endloc = 0; 37 int infunc = 0; /* = 1 if in arglist or body of func */ 38 int inloop = 0; /* = 1 if in while, for, do */ 39 char *curfname = 0; /* current function name */ 40 Node *arglist = 0; /* list of args for current function */ 41 %} 42 43 %union { 44 Node *p; 45 Cell *cp; 46 int i; 47 char *s; 48 } 49 50 %token <i> FIRSTTOKEN /* must be first */ 51 %token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND 52 %token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']' 53 %token <i> ARRAY 54 %token <i> MATCH NOTMATCH MATCHOP 55 %token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS 56 %token <i> AND BOR APPEND EQ GE GT LE LT NE IN 57 %token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC 58 %token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE 59 %token <i> ADD MINUS MULT DIVIDE MOD 60 %token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ 61 %token <i> PRINT PRINTF SPRINTF 62 %token <p> ELSE INTEST CONDEXPR 63 %token <i> POSTINCR PREINCR POSTDECR PREDECR 64 %token <cp> VAR IVAR VARNF CALL NUMBER STRING 65 %token <s> REGEXPR 66 67 %type <p> pas pattern ppattern plist pplist patlist prarg term re 68 %type <p> pa_pat pa_stat pa_stats 69 %type <s> reg_expr 70 %type <p> simple_stmt opt_simple_stmt stmt stmtlist 71 %type <p> var varname funcname varlist 72 %type <p> for if else while 73 %type <i> do st 74 %type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor 75 %type <i> subop print 76 77 %right ASGNOP 78 %right '?' 79 %right ':' 80 %left BOR 81 %left AND 82 %left GETLINE 83 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|' 84 %left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC 85 %left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER 86 %left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR 87 %left REGEXPR VAR VARNF IVAR WHILE '(' 88 %left CAT 89 %left '+' '-' 90 %left '*' '/' '%' 91 %left NOT UMINUS 92 %right POWER 93 %right DECR INCR 94 %left INDIRECT 95 %token LASTTOKEN /* must be last */ 96 97 %% 98 99 program: 100 pas { if (errorflag==0) 101 winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); } 102 | error { yyclearin; bracecheck(); SYNTAX("bailing out"); } 103 ; 104 105 and: 106 AND | and NL 107 ; 108 109 bor: 110 BOR | bor NL 111 ; 112 113 comma: 114 ',' | comma NL 115 ; 116 117 do: 118 DO | do NL 119 ; 120 121 else: 122 ELSE | else NL 123 ; 124 125 for: 126 FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt 127 { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); } 128 | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt 129 { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); } 130 | FOR '(' varname IN varname rparen {inloop++;} stmt 131 { --inloop; $$ = stat3(IN, $3, makearr($5), $8); } 132 ; 133 134 funcname: 135 VAR { setfname($1); } 136 | CALL { setfname($1); } 137 ; 138 139 if: 140 IF '(' pattern rparen { $$ = notnull($3); } 141 ; 142 143 lbrace: 144 '{' | lbrace NL 145 ; 146 147 nl: 148 NL | nl NL 149 ; 150 151 opt_nl: 152 /* empty */ { $$ = 0; } 153 | nl 154 ; 155 156 opt_pst: 157 /* empty */ { $$ = 0; } 158 | pst 159 ; 160 161 162 opt_simple_stmt: 163 /* empty */ { $$ = 0; } 164 | simple_stmt 165 ; 166 167 pas: 168 opt_pst { $$ = 0; } 169 | opt_pst pa_stats opt_pst { $$ = $2; } 170 ; 171 172 pa_pat: 173 pattern { $$ = notnull($1); } 174 ; 175 176 pa_stat: 177 pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); } 178 | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); } 179 | pa_pat ',' pa_pat { $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); } 180 | pa_pat ',' pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $3, $5); } 181 | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); } 182 | XBEGIN lbrace stmtlist '}' 183 { beginloc = linkum(beginloc, $3); $$ = 0; } 184 | XEND lbrace stmtlist '}' 185 { endloc = linkum(endloc, $3); $$ = 0; } 186 | FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}' 187 { infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; } 188 ; 189 190 pa_stats: 191 pa_stat 192 | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); } 193 ; 194 195 patlist: 196 pattern 197 | patlist comma pattern { $$ = linkum($1, $3); } 198 ; 199 200 ppattern: 201 var ASGNOP ppattern { $$ = op2($2, $1, $3); } 202 | ppattern '?' ppattern ':' ppattern %prec '?' 203 { $$ = op3(CONDEXPR, notnull($1), $3, $5); } 204 | ppattern bor ppattern %prec BOR 205 { $$ = op2(BOR, notnull($1), notnull($3)); } 206 | ppattern and ppattern %prec AND 207 { $$ = op2(AND, notnull($1), notnull($3)); } 208 | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } 209 | ppattern MATCHOP ppattern 210 { if (constnode($3)) 211 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); 212 else 213 $$ = op3($2, (Node *)1, $1, $3); } 214 | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } 215 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } 216 | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); } 217 | re 218 | term 219 ; 220 221 pattern: 222 var ASGNOP pattern { $$ = op2($2, $1, $3); } 223 | pattern '?' pattern ':' pattern %prec '?' 224 { $$ = op3(CONDEXPR, notnull($1), $3, $5); } 225 | pattern bor pattern %prec BOR 226 { $$ = op2(BOR, notnull($1), notnull($3)); } 227 | pattern and pattern %prec AND 228 { $$ = op2(AND, notnull($1), notnull($3)); } 229 | pattern EQ pattern { $$ = op2($2, $1, $3); } 230 | pattern GE pattern { $$ = op2($2, $1, $3); } 231 | pattern GT pattern { $$ = op2($2, $1, $3); } 232 | pattern LE pattern { $$ = op2($2, $1, $3); } 233 | pattern LT pattern { $$ = op2($2, $1, $3); } 234 | pattern NE pattern { $$ = op2($2, $1, $3); } 235 | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } 236 | pattern MATCHOP pattern 237 { if (constnode($3)) 238 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); 239 else 240 $$ = op3($2, (Node *)1, $1, $3); } 241 | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); } 242 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); } 243 | pattern '|' GETLINE var { 244 if (safe) SYNTAX("cmd | getline is unsafe"); 245 else $$ = op3(GETLINE, $4, itonp($2), $1); } 246 | pattern '|' GETLINE { 247 if (safe) SYNTAX("cmd | getline is unsafe"); 248 else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); } 249 | pattern term %prec CAT { $$ = op2(CAT, $1, $2); } 250 | re 251 | term 252 ; 253 254 plist: 255 pattern comma pattern { $$ = linkum($1, $3); } 256 | plist comma pattern { $$ = linkum($1, $3); } 257 ; 258 259 pplist: 260 ppattern 261 | pplist comma ppattern { $$ = linkum($1, $3); } 262 ; 263 264 prarg: 265 /* empty */ { $$ = rectonode(); } 266 | pplist 267 | '(' plist ')' { $$ = $2; } 268 ; 269 270 print: 271 PRINT | PRINTF 272 ; 273 274 pst: 275 NL | ';' | pst NL | pst ';' 276 ; 277 278 rbrace: 279 '}' | rbrace NL 280 ; 281 282 re: 283 reg_expr 284 { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); } 285 | NOT re { $$ = op1(NOT, notnull($2)); } 286 ; 287 288 reg_expr: 289 '/' {startreg();} REGEXPR '/' { $$ = $3; } 290 ; 291 292 rparen: 293 ')' | rparen NL 294 ; 295 296 simple_stmt: 297 print prarg '|' term { 298 if (safe) SYNTAX("print | is unsafe"); 299 else $$ = stat3($1, $2, itonp($3), $4); } 300 | print prarg APPEND term { 301 if (safe) SYNTAX("print >> is unsafe"); 302 else $$ = stat3($1, $2, itonp($3), $4); } 303 | print prarg GT term { 304 if (safe) SYNTAX("print > is unsafe"); 305 else $$ = stat3($1, $2, itonp($3), $4); } 306 | print prarg { $$ = stat3($1, $2, NIL, NIL); } 307 | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); } 308 | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); } 309 | pattern { $$ = exptostat($1); } 310 | error { yyclearin; SYNTAX("illegal statement"); } 311 ; 312 313 st: 314 nl 315 | ';' opt_nl 316 ; 317 318 stmt: 319 BREAK st { if (!inloop) SYNTAX("break illegal outside of loops"); 320 $$ = stat1(BREAK, NIL); } 321 | CLOSE pattern st { $$ = stat1(CLOSE, $2); } 322 | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops"); 323 $$ = stat1(CONTINUE, NIL); } 324 | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st 325 { $$ = stat2(DO, $3, notnull($7)); } 326 | EXIT pattern st { $$ = stat1(EXIT, $2); } 327 | EXIT st { $$ = stat1(EXIT, NIL); } 328 | for 329 | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); } 330 | if stmt { $$ = stat3(IF, $1, $2, NIL); } 331 | lbrace stmtlist rbrace { $$ = $2; } 332 | NEXT st { if (infunc) 333 SYNTAX("next is illegal inside a function"); 334 $$ = stat1(NEXT, NIL); } 335 | NEXTFILE st { if (infunc) 336 SYNTAX("nextfile is illegal inside a function"); 337 $$ = stat1(NEXTFILE, NIL); } 338 | RETURN pattern st { $$ = stat1(RETURN, $2); } 339 | RETURN st { $$ = stat1(RETURN, NIL); } 340 | simple_stmt st 341 | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); } 342 | ';' opt_nl { $$ = 0; } 343 ; 344 345 stmtlist: 346 stmt 347 | stmtlist stmt { $$ = linkum($1, $2); } 348 ; 349 350 subop: 351 SUB | GSUB 352 ; 353 354 term: 355 term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); } 356 | term '+' term { $$ = op2(ADD, $1, $3); } 357 | term '-' term { $$ = op2(MINUS, $1, $3); } 358 | term '*' term { $$ = op2(MULT, $1, $3); } 359 | term '/' term { $$ = op2(DIVIDE, $1, $3); } 360 | term '%' term { $$ = op2(MOD, $1, $3); } 361 | term POWER term { $$ = op2(POWER, $1, $3); } 362 | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); } 363 | '+' term %prec UMINUS { $$ = $2; } 364 | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); } 365 | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); } 366 | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); } 367 | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); } 368 | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); } 369 | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); } 370 | DECR var { $$ = op1(PREDECR, $2); } 371 | INCR var { $$ = op1(PREINCR, $2); } 372 | var DECR { $$ = op1(POSTDECR, $1); } 373 | var INCR { $$ = op1(POSTINCR, $1); } 374 | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); } 375 | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); } 376 | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); } 377 | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); } 378 | INDEX '(' pattern comma pattern ')' 379 { $$ = op2(INDEX, $3, $5); } 380 | INDEX '(' pattern comma reg_expr ')' 381 { SYNTAX("index() doesn't permit regular expressions"); 382 $$ = op2(INDEX, $3, (Node*)$5); } 383 | '(' pattern ')' { $$ = $2; } 384 | MATCHFCN '(' pattern comma reg_expr ')' 385 { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); } 386 | MATCHFCN '(' pattern comma pattern ')' 387 { if (constnode($5)) 388 $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1)); 389 else 390 $$ = op3(MATCHFCN, (Node *)1, $3, $5); } 391 | NUMBER { $$ = celltonode($1, CCON); } 392 | SPLIT '(' pattern comma varname comma pattern ')' /* string */ 393 { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); } 394 | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */ 395 { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); } 396 | SPLIT '(' pattern comma varname ')' 397 { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */ 398 | SPRINTF '(' patlist ')' { $$ = op1($1, $3); } 399 | STRING { $$ = celltonode($1, CCON); } 400 | subop '(' reg_expr comma pattern ')' 401 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); } 402 | subop '(' pattern comma pattern ')' 403 { if (constnode($3)) 404 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode()); 405 else 406 $$ = op4($1, (Node *)1, $3, $5, rectonode()); } 407 | subop '(' reg_expr comma pattern comma var ')' 408 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); } 409 | subop '(' pattern comma pattern comma var ')' 410 { if (constnode($3)) 411 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7); 412 else 413 $$ = op4($1, (Node *)1, $3, $5, $7); } 414 | SUBSTR '(' pattern comma pattern comma pattern ')' 415 { $$ = op3(SUBSTR, $3, $5, $7); } 416 | SUBSTR '(' pattern comma pattern ')' 417 { $$ = op3(SUBSTR, $3, $5, NIL); } 418 | var 419 ; 420 421 var: 422 varname 423 | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); } 424 | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); } 425 | INDIRECT term { $$ = op1(INDIRECT, $2); } 426 ; 427 428 varlist: 429 /* nothing */ { arglist = $$ = 0; } 430 | VAR { arglist = $$ = celltonode($1,CVAR); } 431 | varlist comma VAR { 432 checkdup($1, $3); 433 arglist = $$ = linkum($1,celltonode($3,CVAR)); } 434 ; 435 436 varname: 437 VAR { $$ = celltonode($1, CVAR); } 438 | ARG { $$ = op1(ARG, itonp($1)); } 439 | VARNF { $$ = op1(VARNF, (Node *) $1); } 440 ; 441 442 443 while: 444 WHILE '(' pattern rparen { $$ = notnull($3); } 445 ; 446 447 %% 448 449 void setfname(Cell *p) 450 { 451 if (isarr(p)) 452 SYNTAX("%s is an array, not a function", p->nval); 453 else if (isfcn(p)) 454 SYNTAX("you can't define function %s more than once", p->nval); 455 curfname = p->nval; 456 } 457 458 int constnode(Node *p) 459 { 460 return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON; 461 } 462 463 char *strnode(Node *p) 464 { 465 return ((Cell *)(p->narg[0]))->sval; 466 } 467 468 Node *notnull(Node *n) 469 { 470 switch (n->nobj) { 471 case LE: case LT: case EQ: case NE: case GT: case GE: 472 case BOR: case AND: case NOT: 473 return n; 474 default: 475 return op2(NE, n, nullnode); 476 } 477 } 478 479 void checkdup(Node *vl, Cell *cp) /* check if name already in list */ 480 { 481 char *s = cp->nval; 482 for ( ; vl; vl = vl->nnext) { 483 if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) { 484 SYNTAX("duplicate argument %s", s); 485 break; 486 } 487 } 488 } 489