9base

revived minimalist port of Plan 9 userland to Unix
git clone git://git.suckless.org/9base
Log | Files | Refs | README | LICENSE

awkgram.y (13931B)


      1 /****************************************************************
      2 Copyright (C) Lucent Technologies 1997
      3 All Rights Reserved
      4 
      5 Permission to use, copy, modify, and distribute this software and
      6 its documentation for any purpose and without fee is hereby
      7 granted, provided that the above copyright notice appear in all
      8 copies and that both that the copyright notice and this
      9 permission notice and warranty disclaimer appear in supporting
     10 documentation, and that the name Lucent Technologies or any of
     11 its entities not be used in advertising or publicity pertaining
     12 to distribution of the software without specific, written prior
     13 permission.
     14 
     15 LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
     16 INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
     17 IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
     18 SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
     19 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
     20 IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
     21 ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
     22 THIS SOFTWARE.
     23 ****************************************************************/
     24 
     25 %{
     26 #include <stdio.h>
     27 #include <string.h>
     28 #include "awk.h"
     29 
     30 #define	makedfa(a,b)	compre(a)
     31 
     32 void checkdup(Node *list, Cell *item);
     33 int yywrap(void) { return(1); }
     34 
     35 Node	*beginloc = 0;
     36 Node	*endloc = 0;
     37 int	infunc	= 0;	/* = 1 if in arglist or body of func */
     38 int	inloop	= 0;	/* = 1 if in while, for, do */
     39 char	*curfname = 0;	/* current function name */
     40 Node	*arglist = 0;	/* list of args for current function */
     41 %}
     42 
     43 %union {
     44 	Node	*p;
     45 	Cell	*cp;
     46 	int	i;
     47 	char	*s;
     48 }
     49 
     50 %token	<i>	FIRSTTOKEN	/* must be first */
     51 %token	<p>	PROGRAM PASTAT PASTAT2 XBEGIN XEND
     52 %token	<i>	NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
     53 %token	<i>	ARRAY
     54 %token	<i>	MATCH NOTMATCH MATCHOP
     55 %token	<i>	FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS
     56 %token	<i>	AND BOR APPEND EQ GE GT LE LT NE IN
     57 %token	<i>	ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC 
     58 %token	<i>	SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
     59 %token	<i>	ADD MINUS MULT DIVIDE MOD
     60 %token	<i>	ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
     61 %token	<i>	PRINT PRINTF SPRINTF
     62 %token	<p>	ELSE INTEST CONDEXPR
     63 %token	<i>	POSTINCR PREINCR POSTDECR PREDECR
     64 %token	<cp>	VAR IVAR VARNF CALL NUMBER STRING
     65 %token	<s>	REGEXPR
     66 
     67 %type	<p>	pas pattern ppattern plist pplist patlist prarg term re
     68 %type	<p>	pa_pat pa_stat pa_stats
     69 %type	<s>	reg_expr
     70 %type	<p>	simple_stmt opt_simple_stmt stmt stmtlist
     71 %type	<p>	var varname funcname varlist
     72 %type	<p>	for if else while
     73 %type	<i>	do st
     74 %type	<i>	pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
     75 %type	<i>	subop print
     76 
     77 %right	ASGNOP
     78 %right	'?'
     79 %right	':'
     80 %left	BOR
     81 %left	AND
     82 %left	GETLINE
     83 %nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
     84 %left	ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC 
     85 %left	GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
     86 %left	PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
     87 %left	REGEXPR VAR VARNF IVAR WHILE '('
     88 %left	CAT
     89 %left	'+' '-'
     90 %left	'*' '/' '%'
     91 %left	NOT UMINUS
     92 %right	POWER
     93 %right	DECR INCR
     94 %left	INDIRECT
     95 %token	LASTTOKEN	/* must be last */
     96 
     97 %%
     98 
     99 program:
    100 	  pas	{ if (errorflag==0)
    101 			winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
    102 	| error	{ yyclearin; bracecheck(); SYNTAX("bailing out"); }
    103 	;
    104 
    105 and:
    106 	  AND | and NL
    107 	;
    108 
    109 bor:
    110 	  BOR | bor NL
    111 	;
    112 
    113 comma:
    114 	  ',' | comma NL
    115 	;
    116 
    117 do:
    118 	  DO | do NL
    119 	;
    120 
    121 else:
    122 	  ELSE | else NL
    123 	;
    124 
    125 for:
    126 	  FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
    127 		{ --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
    128 	| FOR '(' opt_simple_stmt ';'  ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
    129 		{ --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
    130 	| FOR '(' varname IN varname rparen {inloop++;} stmt
    131 		{ --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
    132 	;
    133 
    134 funcname:
    135 	  VAR	{ setfname($1); }
    136 	| CALL	{ setfname($1); }
    137 	;
    138 
    139 if:
    140 	  IF '(' pattern rparen		{ $$ = notnull($3); }
    141 	;
    142 
    143 lbrace:
    144 	  '{' | lbrace NL
    145 	;
    146 
    147 nl:
    148 	  NL | nl NL
    149 	;
    150 
    151 opt_nl:
    152 	  /* empty */	{ $$ = 0; }
    153 	| nl
    154 	;
    155 
    156 opt_pst:
    157 	  /* empty */	{ $$ = 0; }
    158 	| pst
    159 	;
    160 
    161 
    162 opt_simple_stmt:
    163 	  /* empty */			{ $$ = 0; }
    164 	| simple_stmt
    165 	;
    166 
    167 pas:
    168 	  opt_pst			{ $$ = 0; }
    169 	| opt_pst pa_stats opt_pst	{ $$ = $2; }
    170 	;
    171 
    172 pa_pat:
    173 	  pattern	{ $$ = notnull($1); }
    174 	;
    175 
    176 pa_stat:
    177 	  pa_pat			{ $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
    178 	| pa_pat lbrace stmtlist '}'	{ $$ = stat2(PASTAT, $1, $3); }
    179 	| pa_pat ',' pa_pat		{ $$ = pa2stat($1, $3, stat2(PRINT, rectonode(), NIL)); }
    180 	| pa_pat ',' pa_pat lbrace stmtlist '}'	{ $$ = pa2stat($1, $3, $5); }
    181 	| lbrace stmtlist '}'		{ $$ = stat2(PASTAT, NIL, $2); }
    182 	| XBEGIN lbrace stmtlist '}'
    183 		{ beginloc = linkum(beginloc, $3); $$ = 0; }
    184 	| XEND lbrace stmtlist '}'
    185 		{ endloc = linkum(endloc, $3); $$ = 0; }
    186 	| FUNC funcname '(' varlist rparen {infunc++;} lbrace stmtlist '}'
    187 		{ infunc--; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
    188 	;
    189 
    190 pa_stats:
    191 	  pa_stat
    192 	| pa_stats opt_pst pa_stat	{ $$ = linkum($1, $3); }
    193 	;
    194 
    195 patlist:
    196 	  pattern
    197 	| patlist comma pattern		{ $$ = linkum($1, $3); }
    198 	;
    199 
    200 ppattern:
    201 	  var ASGNOP ppattern		{ $$ = op2($2, $1, $3); }
    202 	| ppattern '?' ppattern ':' ppattern %prec '?'
    203 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
    204 	| ppattern bor ppattern %prec BOR
    205 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
    206 	| ppattern and ppattern %prec AND
    207 		{ $$ = op2(AND, notnull($1), notnull($3)); }
    208 	| ppattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
    209 	| ppattern MATCHOP ppattern
    210 		{ if (constnode($3))
    211 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
    212 		  else
    213 			$$ = op3($2, (Node *)1, $1, $3); }
    214 	| ppattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
    215 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
    216 	| ppattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
    217 	| re
    218 	| term
    219 	;
    220 
    221 pattern:
    222 	  var ASGNOP pattern		{ $$ = op2($2, $1, $3); }
    223 	| pattern '?' pattern ':' pattern %prec '?'
    224 	 	{ $$ = op3(CONDEXPR, notnull($1), $3, $5); }
    225 	| pattern bor pattern %prec BOR
    226 		{ $$ = op2(BOR, notnull($1), notnull($3)); }
    227 	| pattern and pattern %prec AND
    228 		{ $$ = op2(AND, notnull($1), notnull($3)); }
    229 	| pattern EQ pattern		{ $$ = op2($2, $1, $3); }
    230 	| pattern GE pattern		{ $$ = op2($2, $1, $3); }
    231 	| pattern GT pattern		{ $$ = op2($2, $1, $3); }
    232 	| pattern LE pattern		{ $$ = op2($2, $1, $3); }
    233 	| pattern LT pattern		{ $$ = op2($2, $1, $3); }
    234 	| pattern NE pattern		{ $$ = op2($2, $1, $3); }
    235 	| pattern MATCHOP reg_expr	{ $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
    236 	| pattern MATCHOP pattern
    237 		{ if (constnode($3))
    238 			$$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
    239 		  else
    240 			$$ = op3($2, (Node *)1, $1, $3); }
    241 	| pattern IN varname		{ $$ = op2(INTEST, $1, makearr($3)); }
    242 	| '(' plist ')' IN varname	{ $$ = op2(INTEST, $2, makearr($5)); }
    243 	| pattern '|' GETLINE var	{ 
    244 			if (safe) SYNTAX("cmd | getline is unsafe");
    245 			else $$ = op3(GETLINE, $4, itonp($2), $1); }
    246 	| pattern '|' GETLINE		{ 
    247 			if (safe) SYNTAX("cmd | getline is unsafe");
    248 			else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
    249 	| pattern term %prec CAT	{ $$ = op2(CAT, $1, $2); }
    250 	| re
    251 	| term
    252 	;
    253 
    254 plist:
    255 	  pattern comma pattern		{ $$ = linkum($1, $3); }
    256 	| plist comma pattern		{ $$ = linkum($1, $3); }
    257 	;
    258 
    259 pplist:
    260 	  ppattern
    261 	| pplist comma ppattern		{ $$ = linkum($1, $3); }
    262 	;
    263 
    264 prarg:
    265 	  /* empty */			{ $$ = rectonode(); }
    266 	| pplist
    267 	| '(' plist ')'			{ $$ = $2; }
    268 	;
    269 
    270 print:
    271 	  PRINT | PRINTF
    272 	;
    273 
    274 pst:
    275 	  NL | ';' | pst NL | pst ';'
    276 	;
    277 
    278 rbrace:
    279 	  '}' | rbrace NL
    280 	;
    281 
    282 re:
    283 	   reg_expr
    284 		{ $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
    285 	| NOT re	{ $$ = op1(NOT, notnull($2)); }
    286 	;
    287 
    288 reg_expr:
    289 	  '/' {startreg();} REGEXPR '/'		{ $$ = $3; }
    290 	;
    291 
    292 rparen:
    293 	  ')' | rparen NL
    294 	;
    295 
    296 simple_stmt:
    297 	  print prarg '|' term		{ 
    298 			if (safe) SYNTAX("print | is unsafe");
    299 			else $$ = stat3($1, $2, itonp($3), $4); }
    300 	| print prarg APPEND term	{
    301 			if (safe) SYNTAX("print >> is unsafe");
    302 			else $$ = stat3($1, $2, itonp($3), $4); }
    303 	| print prarg GT term		{
    304 			if (safe) SYNTAX("print > is unsafe");
    305 			else $$ = stat3($1, $2, itonp($3), $4); }
    306 	| print prarg			{ $$ = stat3($1, $2, NIL, NIL); }
    307 	| DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
    308 	| DELETE varname		 { $$ = stat2(DELETE, makearr($2), 0); }
    309 	| pattern			{ $$ = exptostat($1); }
    310 	| error				{ yyclearin; SYNTAX("illegal statement"); }
    311 	;
    312 
    313 st:
    314 	  nl
    315 	| ';' opt_nl
    316 	;
    317 
    318 stmt:
    319 	  BREAK st		{ if (!inloop) SYNTAX("break illegal outside of loops");
    320 				  $$ = stat1(BREAK, NIL); }
    321 	| CLOSE pattern st	{ $$ = stat1(CLOSE, $2); }
    322 	| CONTINUE st		{  if (!inloop) SYNTAX("continue illegal outside of loops");
    323 				  $$ = stat1(CONTINUE, NIL); }
    324 	| do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
    325 		{ $$ = stat2(DO, $3, notnull($7)); }
    326 	| EXIT pattern st	{ $$ = stat1(EXIT, $2); }
    327 	| EXIT st		{ $$ = stat1(EXIT, NIL); }
    328 	| for
    329 	| if stmt else stmt	{ $$ = stat3(IF, $1, $2, $4); }
    330 	| if stmt		{ $$ = stat3(IF, $1, $2, NIL); }
    331 	| lbrace stmtlist rbrace { $$ = $2; }
    332 	| NEXT st	{ if (infunc)
    333 				SYNTAX("next is illegal inside a function");
    334 			  $$ = stat1(NEXT, NIL); }
    335 	| NEXTFILE st	{ if (infunc)
    336 				SYNTAX("nextfile is illegal inside a function");
    337 			  $$ = stat1(NEXTFILE, NIL); }
    338 	| RETURN pattern st	{ $$ = stat1(RETURN, $2); }
    339 	| RETURN st		{ $$ = stat1(RETURN, NIL); }
    340 	| simple_stmt st
    341 	| while {inloop++;} stmt	{ --inloop; $$ = stat2(WHILE, $1, $3); }
    342 	| ';' opt_nl		{ $$ = 0; }
    343 	;
    344 
    345 stmtlist:
    346 	  stmt
    347 	| stmtlist stmt		{ $$ = linkum($1, $2); }
    348 	;
    349 
    350 subop:
    351 	  SUB | GSUB
    352 	;
    353 
    354 term:
    355  	  term '/' ASGNOP term		{ $$ = op2(DIVEQ, $1, $4); }
    356  	| term '+' term			{ $$ = op2(ADD, $1, $3); }
    357 	| term '-' term			{ $$ = op2(MINUS, $1, $3); }
    358 	| term '*' term			{ $$ = op2(MULT, $1, $3); }
    359 	| term '/' term			{ $$ = op2(DIVIDE, $1, $3); }
    360 	| term '%' term			{ $$ = op2(MOD, $1, $3); }
    361 	| term POWER term		{ $$ = op2(POWER, $1, $3); }
    362 	| '-' term %prec UMINUS		{ $$ = op1(UMINUS, $2); }
    363 	| '+' term %prec UMINUS		{ $$ = $2; }
    364 	| NOT term %prec UMINUS		{ $$ = op1(NOT, notnull($2)); }
    365 	| BLTIN '(' ')'			{ $$ = op2(BLTIN, itonp($1), rectonode()); }
    366 	| BLTIN '(' patlist ')'		{ $$ = op2(BLTIN, itonp($1), $3); }
    367 	| BLTIN				{ $$ = op2(BLTIN, itonp($1), rectonode()); }
    368 	| CALL '(' ')'			{ $$ = op2(CALL, celltonode($1,CVAR), NIL); }
    369 	| CALL '(' patlist ')'		{ $$ = op2(CALL, celltonode($1,CVAR), $3); }
    370 	| DECR var			{ $$ = op1(PREDECR, $2); }
    371 	| INCR var			{ $$ = op1(PREINCR, $2); }
    372 	| var DECR			{ $$ = op1(POSTDECR, $1); }
    373 	| var INCR			{ $$ = op1(POSTINCR, $1); }
    374 	| GETLINE var LT term		{ $$ = op3(GETLINE, $2, itonp($3), $4); }
    375 	| GETLINE LT term		{ $$ = op3(GETLINE, NIL, itonp($2), $3); }
    376 	| GETLINE var			{ $$ = op3(GETLINE, $2, NIL, NIL); }
    377 	| GETLINE			{ $$ = op3(GETLINE, NIL, NIL, NIL); }
    378 	| INDEX '(' pattern comma pattern ')'
    379 		{ $$ = op2(INDEX, $3, $5); }
    380 	| INDEX '(' pattern comma reg_expr ')'
    381 		{ SYNTAX("index() doesn't permit regular expressions");
    382 		  $$ = op2(INDEX, $3, (Node*)$5); }
    383 	| '(' pattern ')'		{ $$ = $2; }
    384 	| MATCHFCN '(' pattern comma reg_expr ')'
    385 		{ $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
    386 	| MATCHFCN '(' pattern comma pattern ')'
    387 		{ if (constnode($5))
    388 			$$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
    389 		  else
    390 			$$ = op3(MATCHFCN, (Node *)1, $3, $5); }
    391 	| NUMBER			{ $$ = celltonode($1, CCON); }
    392 	| SPLIT '(' pattern comma varname comma pattern ')'     /* string */
    393 		{ $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
    394 	| SPLIT '(' pattern comma varname comma reg_expr ')'    /* const /regexp/ */
    395 		{ $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
    396 	| SPLIT '(' pattern comma varname ')'
    397 		{ $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); }  /* default */
    398 	| SPRINTF '(' patlist ')'	{ $$ = op1($1, $3); }
    399 	| STRING	 		{ $$ = celltonode($1, CCON); }
    400 	| subop '(' reg_expr comma pattern ')'
    401 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
    402 	| subop '(' pattern comma pattern ')'
    403 		{ if (constnode($3))
    404 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
    405 		  else
    406 			$$ = op4($1, (Node *)1, $3, $5, rectonode()); }
    407 	| subop '(' reg_expr comma pattern comma var ')'
    408 		{ $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
    409 	| subop '(' pattern comma pattern comma var ')'
    410 		{ if (constnode($3))
    411 			$$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
    412 		  else
    413 			$$ = op4($1, (Node *)1, $3, $5, $7); }
    414 	| SUBSTR '(' pattern comma pattern comma pattern ')'
    415 		{ $$ = op3(SUBSTR, $3, $5, $7); }
    416 	| SUBSTR '(' pattern comma pattern ')'
    417 		{ $$ = op3(SUBSTR, $3, $5, NIL); }
    418 	| var
    419 	;
    420 
    421 var:
    422 	  varname
    423 	| varname '[' patlist ']'	{ $$ = op2(ARRAY, makearr($1), $3); }
    424 	| IVAR				{ $$ = op1(INDIRECT, celltonode($1, CVAR)); }
    425 	| INDIRECT term	 		{ $$ = op1(INDIRECT, $2); }
    426 	;	
    427 
    428 varlist:
    429 	  /* nothing */		{ arglist = $$ = 0; }
    430 	| VAR			{ arglist = $$ = celltonode($1,CVAR); }
    431 	| varlist comma VAR	{
    432 			checkdup($1, $3);
    433 			arglist = $$ = linkum($1,celltonode($3,CVAR)); }
    434 	;
    435 
    436 varname:
    437 	  VAR			{ $$ = celltonode($1, CVAR); }
    438 	| ARG 			{ $$ = op1(ARG, itonp($1)); }
    439 	| VARNF			{ $$ = op1(VARNF, (Node *) $1); }
    440 	;
    441 
    442 
    443 while:
    444 	  WHILE '(' pattern rparen	{ $$ = notnull($3); }
    445 	;
    446 
    447 %%
    448 
    449 void setfname(Cell *p)
    450 {
    451 	if (isarr(p))
    452 		SYNTAX("%s is an array, not a function", p->nval);
    453 	else if (isfcn(p))
    454 		SYNTAX("you can't define function %s more than once", p->nval);
    455 	curfname = p->nval;
    456 }
    457 
    458 int constnode(Node *p)
    459 {
    460 	return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
    461 }
    462 
    463 char *strnode(Node *p)
    464 {
    465 	return ((Cell *)(p->narg[0]))->sval;
    466 }
    467 
    468 Node *notnull(Node *n)
    469 {
    470 	switch (n->nobj) {
    471 	case LE: case LT: case EQ: case NE: case GT: case GE:
    472 	case BOR: case AND: case NOT:
    473 		return n;
    474 	default:
    475 		return op2(NE, n, nullnode);
    476 	}
    477 }
    478 
    479 void checkdup(Node *vl, Cell *cp)	/* check if name already in list */
    480 {
    481 	char *s = cp->nval;
    482 	for ( ; vl; vl = vl->nnext) {
    483 		if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
    484 			SYNTAX("duplicate argument %s", s);
    485 			break;
    486 		}
    487 	}
    488 }
    489