lex.c (6203B)
1 #include "rc.h" 2 #include "exec.h" 3 #include "io.h" 4 #include "getflags.h" 5 #include "fns.h" 6 int getnext(void); 7 8 int 9 wordchr(int c) 10 { 11 return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF; 12 } 13 14 int 15 idchr(int c) 16 { 17 /* 18 * Formerly: 19 * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9' 20 * || c=='_' || c=='*'; 21 */ 22 return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c); 23 } 24 int future = EOF; 25 int doprompt = 1; 26 int inquote; 27 int incomm; 28 /* 29 * Look ahead in the input stream 30 */ 31 32 int 33 nextc(void) 34 { 35 if(future==EOF) 36 future = getnext(); 37 return future; 38 } 39 /* 40 * Consume the lookahead character. 41 */ 42 43 int 44 advance(void) 45 { 46 int c = nextc(); 47 lastc = future; 48 future = EOF; 49 return c; 50 } 51 /* 52 * read a character from the input stream 53 */ 54 55 int 56 getnext(void) 57 { 58 int c; 59 static int peekc = EOF; 60 if(peekc!=EOF){ 61 c = peekc; 62 peekc = EOF; 63 return c; 64 } 65 if(runq->eof) 66 return EOF; 67 if(doprompt) 68 pprompt(); 69 c = rchr(runq->cmdfd); 70 if(!inquote && c=='\\'){ 71 c = rchr(runq->cmdfd); 72 if(c=='\n' && !incomm){ /* don't continue a comment */ 73 doprompt = 1; 74 c=' '; 75 } 76 else{ 77 peekc = c; 78 c='\\'; 79 } 80 } 81 doprompt = doprompt || c=='\n' || c==EOF; 82 if(c==EOF) 83 runq->eof++; 84 else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c); 85 return c; 86 } 87 88 void 89 pprompt(void) 90 { 91 var *prompt; 92 if(runq->iflag){ 93 pstr(err, promptstr); 94 flush(err); 95 prompt = vlook("prompt"); 96 if(prompt->val && prompt->val->next) 97 promptstr = prompt->val->next->word; 98 else 99 promptstr="\t"; 100 } 101 runq->lineno++; 102 doprompt = 0; 103 } 104 105 void 106 skipwhite(void) 107 { 108 int c; 109 for(;;){ 110 c = nextc(); 111 /* Why did this used to be if(!inquote && c=='#') ?? */ 112 if(c=='#'){ 113 incomm = 1; 114 for(;;){ 115 c = nextc(); 116 if(c=='\n' || c==EOF) { 117 incomm = 0; 118 break; 119 } 120 advance(); 121 } 122 } 123 if(c==' ' || c=='\t') 124 advance(); 125 else return; 126 } 127 } 128 129 void 130 skipnl(void) 131 { 132 int c; 133 for(;;){ 134 skipwhite(); 135 c = nextc(); 136 if(c!='\n') 137 return; 138 advance(); 139 } 140 } 141 142 int 143 nextis(int c) 144 { 145 if(nextc()==c){ 146 advance(); 147 return 1; 148 } 149 return 0; 150 } 151 152 char* 153 addtok(char *p, int val) 154 { 155 if(p==0) 156 return 0; 157 if(p==&tok[NTOK-1]){ 158 *p = 0; 159 yyerror("token buffer too short"); 160 return 0; 161 } 162 *p++=val; 163 return p; 164 } 165 166 char* 167 addutf(char *p, int c) 168 { 169 p = addtok(p, c); 170 if(twobyte(c)) /* 2-byte escape */ 171 return addtok(p, advance()); 172 if(threebyte(c)){ /* 3-byte escape */ 173 p = addtok(p, advance()); 174 return addtok(p, advance()); 175 } 176 return p; 177 } 178 int lastdol; /* was the last token read '$' or '$#' or '"'? */ 179 int lastword; /* was the last token read a word or compound word terminator? */ 180 181 int 182 yylex(void) 183 { 184 int c, d = nextc(); 185 char *w = tok; 186 struct tree *t; 187 yylval.tree = 0; 188 /* 189 * Embarassing sneakiness: if the last token read was a quoted or unquoted 190 * WORD then we alter the meaning of what follows. If the next character 191 * is `(', we return SUB (a subscript paren) and consume the `('. Otherwise, 192 * if the next character is the first character of a simple or compound word, 193 * we insert a `^' before it. 194 */ 195 if(lastword){ 196 lastword = 0; 197 if(d=='('){ 198 advance(); 199 strcpy(tok, "( [SUB]"); 200 return SUB; 201 } 202 if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){ 203 strcpy(tok, "^"); 204 return '^'; 205 } 206 } 207 inquote = 0; 208 skipwhite(); 209 switch(c = advance()){ 210 case EOF: 211 lastdol = 0; 212 strcpy(tok, "EOF"); 213 return EOF; 214 case '$': 215 lastdol = 1; 216 if(nextis('#')){ 217 strcpy(tok, "$#"); 218 return COUNT; 219 } 220 if(nextis('"')){ 221 strcpy(tok, "$\""); 222 return '"'; 223 } 224 strcpy(tok, "$"); 225 return '$'; 226 case '&': 227 lastdol = 0; 228 if(nextis('&')){ 229 skipnl(); 230 strcpy(tok, "&&"); 231 return ANDAND; 232 } 233 strcpy(tok, "&"); 234 return '&'; 235 case '|': 236 lastdol = 0; 237 if(nextis(c)){ 238 skipnl(); 239 strcpy(tok, "||"); 240 return OROR; 241 } 242 case '<': 243 case '>': 244 lastdol = 0; 245 /* 246 * funny redirection tokens: 247 * redir: arrow | arrow '[' fd ']' 248 * arrow: '<' | '<<' | '>' | '>>' | '|' 249 * fd: digit | digit '=' | digit '=' digit 250 * digit: '0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9' 251 * some possibilities are nonsensical and get a message. 252 */ 253 *w++=c; 254 t = newtree(); 255 switch(c){ 256 case '|': 257 t->type = PIPE; 258 t->fd0 = 1; 259 t->fd1 = 0; 260 break; 261 case '>': 262 t->type = REDIR; 263 if(nextis(c)){ 264 t->rtype = APPEND; 265 *w++=c; 266 } 267 else t->rtype = WRITE; 268 t->fd0 = 1; 269 break; 270 case '<': 271 t->type = REDIR; 272 if(nextis(c)){ 273 t->rtype = HERE; 274 *w++=c; 275 } else if (nextis('>')){ 276 t->rtype = RDWR; 277 *w++=c; 278 } else t->rtype = READ; 279 t->fd0 = 0; 280 break; 281 } 282 if(nextis('[')){ 283 *w++='['; 284 c = advance(); 285 *w++=c; 286 if(c<'0' || '9'<c){ 287 RedirErr: 288 *w = 0; 289 yyerror(t->type==PIPE?"pipe syntax" 290 :"redirection syntax"); 291 return EOF; 292 } 293 t->fd0 = 0; 294 do{ 295 t->fd0 = t->fd0*10+c-'0'; 296 *w++=c; 297 c = advance(); 298 }while('0'<=c && c<='9'); 299 if(c=='='){ 300 *w++='='; 301 if(t->type==REDIR) 302 t->type = DUP; 303 c = advance(); 304 if('0'<=c && c<='9'){ 305 t->rtype = DUPFD; 306 t->fd1 = t->fd0; 307 t->fd0 = 0; 308 do{ 309 t->fd0 = t->fd0*10+c-'0'; 310 *w++=c; 311 c = advance(); 312 }while('0'<=c && c<='9'); 313 } 314 else{ 315 if(t->type==PIPE) 316 goto RedirErr; 317 t->rtype = CLOSE; 318 } 319 } 320 if(c!=']' 321 || t->type==DUP && (t->rtype==HERE || t->rtype==APPEND)) 322 goto RedirErr; 323 *w++=']'; 324 } 325 *w='\0'; 326 yylval.tree = t; 327 if(t->type==PIPE) 328 skipnl(); 329 return t->type; 330 case '\'': 331 lastdol = 0; 332 lastword = 1; 333 inquote = 1; 334 for(;;){ 335 c = advance(); 336 if(c==EOF) 337 break; 338 if(c=='\''){ 339 if(nextc()!='\'') 340 break; 341 advance(); 342 } 343 w = addutf(w, c); 344 } 345 if(w!=0) 346 *w='\0'; 347 t = token(tok, WORD); 348 t->quoted = 1; 349 yylval.tree = t; 350 return t->type; 351 } 352 if(!wordchr(c)){ 353 lastdol = 0; 354 tok[0] = c; 355 tok[1]='\0'; 356 return c; 357 } 358 for(;;){ 359 /* next line should have (char)c==GLOB, but ken's compiler is broken */ 360 if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB) 361 w = addtok(w, GLOB); 362 w = addutf(w, c); 363 c = nextc(); 364 if(lastdol?!idchr(c):!wordchr(c)) break; 365 advance(); 366 } 367 368 lastword = 1; 369 lastdol = 0; 370 if(w!=0) 371 *w='\0'; 372 t = klook(tok); 373 if(t->type!=WORD) 374 lastword = 0; 375 t->quoted = 0; 376 yylval.tree = t; 377 return t->type; 378 }