9base

revived minimalist port of Plan 9 userland to Unix
git clone git://git.suckless.org/9base
Log | Files | Refs | README | LICENSE

lex.c (6203B)


      1 #include "rc.h"
      2 #include "exec.h"
      3 #include "io.h"
      4 #include "getflags.h"
      5 #include "fns.h"
      6 int getnext(void);
      7 
      8 int
      9 wordchr(int c)
     10 {
     11 	return !strchr("\n \t#;&|^$=`'{}()<>", c) && c!=EOF;
     12 }
     13 
     14 int
     15 idchr(int c)
     16 {
     17 	/*
     18 	 * Formerly:
     19 	 * return 'a'<=c && c<='z' || 'A'<=c && c<='Z' || '0'<=c && c<='9'
     20 	 *	|| c=='_' || c=='*';
     21 	 */
     22 	return c>' ' && !strchr("!\"#$%&'()+,-./:;<=>?@[\\]^`{|}~", c);
     23 }
     24 int future = EOF;
     25 int doprompt = 1;
     26 int inquote;
     27 int incomm;
     28 /*
     29  * Look ahead in the input stream
     30  */
     31 
     32 int
     33 nextc(void)
     34 {
     35 	if(future==EOF)
     36 		future = getnext();
     37 	return future;
     38 }
     39 /*
     40  * Consume the lookahead character.
     41  */
     42 
     43 int
     44 advance(void)
     45 {
     46 	int c = nextc();
     47 	lastc = future;
     48 	future = EOF;
     49 	return c;
     50 }
     51 /*
     52  * read a character from the input stream
     53  */	
     54 
     55 int
     56 getnext(void)
     57 {
     58 	int c;
     59 	static int peekc = EOF;
     60 	if(peekc!=EOF){
     61 		c = peekc;
     62 		peekc = EOF;
     63 		return c;
     64 	}
     65 	if(runq->eof)
     66 		return EOF;
     67 	if(doprompt)
     68 		pprompt();
     69 	c = rchr(runq->cmdfd);
     70 	if(!inquote && c=='\\'){
     71 		c = rchr(runq->cmdfd);
     72 		if(c=='\n' && !incomm){		/* don't continue a comment */
     73 			doprompt = 1;
     74 			c=' ';
     75 		}
     76 		else{
     77 			peekc = c;
     78 			c='\\';
     79 		}
     80 	}
     81 	doprompt = doprompt || c=='\n' || c==EOF;
     82 	if(c==EOF)
     83 		runq->eof++;
     84 	else if(flag['V'] || ndot>=2 && flag['v']) pchr(err, c);
     85 	return c;
     86 }
     87 
     88 void
     89 pprompt(void)
     90 {
     91 	var *prompt;
     92 	if(runq->iflag){
     93 		pstr(err, promptstr);
     94 		flush(err);
     95 		prompt = vlook("prompt");
     96 		if(prompt->val && prompt->val->next)
     97 			promptstr = prompt->val->next->word;
     98 		else
     99 			promptstr="\t";
    100 	}
    101 	runq->lineno++;
    102 	doprompt = 0;
    103 }
    104 
    105 void
    106 skipwhite(void)
    107 {
    108 	int c;
    109 	for(;;){
    110 		c = nextc();
    111 		/* Why did this used to be  if(!inquote && c=='#') ?? */
    112 		if(c=='#'){
    113 			incomm = 1;
    114 			for(;;){
    115 				c = nextc();
    116 				if(c=='\n' || c==EOF) {
    117 					incomm = 0;
    118 					break;
    119 				}
    120 				advance();
    121 			}
    122 		}
    123 		if(c==' ' || c=='\t')
    124 			advance();
    125 		else return;
    126 	}
    127 }
    128 
    129 void
    130 skipnl(void)
    131 {
    132 	int c;
    133 	for(;;){
    134 		skipwhite();
    135 		c = nextc();
    136 		if(c!='\n')
    137 			return;
    138 		advance();
    139 	}
    140 }
    141 
    142 int
    143 nextis(int c)
    144 {
    145 	if(nextc()==c){
    146 		advance();
    147 		return 1;
    148 	}
    149 	return 0;
    150 }
    151 
    152 char*
    153 addtok(char *p, int val)
    154 {
    155 	if(p==0)
    156 		return 0;
    157 	if(p==&tok[NTOK-1]){
    158 		*p = 0;
    159 		yyerror("token buffer too short");
    160 		return 0;
    161 	}
    162 	*p++=val;
    163 	return p;
    164 }
    165 
    166 char*
    167 addutf(char *p, int c)
    168 {
    169 	p = addtok(p, c);
    170 	if(twobyte(c))	 /* 2-byte escape */
    171 		return addtok(p, advance());
    172 	if(threebyte(c)){	/* 3-byte escape */
    173 		p = addtok(p, advance());
    174 		return addtok(p, advance());
    175 	}
    176 	return p;
    177 }
    178 int lastdol;	/* was the last token read '$' or '$#' or '"'? */
    179 int lastword;	/* was the last token read a word or compound word terminator? */
    180 
    181 int
    182 yylex(void)
    183 {
    184 	int c, d = nextc();
    185 	char *w = tok;
    186 	struct tree *t;
    187 	yylval.tree = 0;
    188 	/*
    189 	 * Embarassing sneakiness:  if the last token read was a quoted or unquoted
    190 	 * WORD then we alter the meaning of what follows.  If the next character
    191 	 * is `(', we return SUB (a subscript paren) and consume the `('.  Otherwise,
    192 	 * if the next character is the first character of a simple or compound word,
    193 	 * we insert a `^' before it.
    194 	 */
    195 	if(lastword){
    196 		lastword = 0;
    197 		if(d=='('){
    198 			advance();
    199 			strcpy(tok, "( [SUB]");
    200 			return SUB;
    201 		}
    202 		if(wordchr(d) || d=='\'' || d=='`' || d=='$' || d=='"'){
    203 			strcpy(tok, "^");
    204 			return '^';
    205 		}
    206 	}
    207 	inquote = 0;
    208 	skipwhite();
    209 	switch(c = advance()){
    210 	case EOF:
    211 		lastdol = 0;
    212 		strcpy(tok, "EOF");
    213 		return EOF;
    214 	case '$':
    215 		lastdol = 1;
    216 		if(nextis('#')){
    217 			strcpy(tok, "$#");
    218 			return COUNT;
    219 		}
    220 		if(nextis('"')){
    221 			strcpy(tok, "$\"");
    222 			return '"';
    223 		}
    224 		strcpy(tok, "$");
    225 		return '$';
    226 	case '&':
    227 		lastdol = 0;
    228 		if(nextis('&')){
    229 			skipnl();
    230 			strcpy(tok, "&&");
    231 			return ANDAND;
    232 		}
    233 		strcpy(tok, "&");
    234 		return '&';
    235 	case '|':
    236 		lastdol = 0;
    237 		if(nextis(c)){
    238 			skipnl();
    239 			strcpy(tok, "||");
    240 			return OROR;
    241 		}
    242 	case '<':
    243 	case '>':
    244 		lastdol = 0;
    245 		/*
    246 		 * funny redirection tokens:
    247 		 *	redir:	arrow | arrow '[' fd ']'
    248 		 *	arrow:	'<' | '<<' | '>' | '>>' | '|'
    249 		 *	fd:	digit | digit '=' | digit '=' digit
    250 		 *	digit:	'0'|'1'|'2'|'3'|'4'|'5'|'6'|'7'|'8'|'9'
    251 		 * some possibilities are nonsensical and get a message.
    252 		 */
    253 		*w++=c;
    254 		t = newtree();
    255 		switch(c){
    256 		case '|':
    257 			t->type = PIPE;
    258 			t->fd0 = 1;
    259 			t->fd1 = 0;
    260 			break;
    261 		case '>':
    262 			t->type = REDIR;
    263 			if(nextis(c)){
    264 				t->rtype = APPEND;
    265 				*w++=c;
    266 			}
    267 			else t->rtype = WRITE;
    268 			t->fd0 = 1;
    269 			break;
    270 		case '<':
    271 			t->type = REDIR;
    272 			if(nextis(c)){
    273 				t->rtype = HERE;
    274 				*w++=c;
    275 			} else if (nextis('>')){
    276 				t->rtype = RDWR;
    277 				*w++=c;
    278 			} else t->rtype = READ;
    279 			t->fd0 = 0;
    280 			break;
    281 		}
    282 		if(nextis('[')){
    283 			*w++='[';
    284 			c = advance();
    285 			*w++=c;
    286 			if(c<'0' || '9'<c){
    287 			RedirErr:
    288 				*w = 0;
    289 				yyerror(t->type==PIPE?"pipe syntax"
    290 						:"redirection syntax");
    291 				return EOF;
    292 			}
    293 			t->fd0 = 0;
    294 			do{
    295 				t->fd0 = t->fd0*10+c-'0';
    296 				*w++=c;
    297 				c = advance();
    298 			}while('0'<=c && c<='9');
    299 			if(c=='='){
    300 				*w++='=';
    301 				if(t->type==REDIR)
    302 					t->type = DUP;
    303 				c = advance();
    304 				if('0'<=c && c<='9'){
    305 					t->rtype = DUPFD;
    306 					t->fd1 = t->fd0;
    307 					t->fd0 = 0;
    308 					do{
    309 						t->fd0 = t->fd0*10+c-'0';
    310 						*w++=c;
    311 						c = advance();
    312 					}while('0'<=c && c<='9');
    313 				}
    314 				else{
    315 					if(t->type==PIPE)
    316 						goto RedirErr;
    317 					t->rtype = CLOSE;
    318 				}
    319 			}
    320 			if(c!=']'
    321 			|| t->type==DUP && (t->rtype==HERE || t->rtype==APPEND))
    322 				goto RedirErr;
    323 			*w++=']';
    324 		}
    325 		*w='\0';
    326 		yylval.tree = t;
    327 		if(t->type==PIPE)
    328 			skipnl();
    329 		return t->type;
    330 	case '\'':
    331 		lastdol = 0;
    332 		lastword = 1;
    333 		inquote = 1;
    334 		for(;;){
    335 			c = advance();
    336 			if(c==EOF)
    337 				break;
    338 			if(c=='\''){
    339 				if(nextc()!='\'')
    340 					break;
    341 				advance();
    342 			}
    343 			w = addutf(w, c);
    344 		}
    345 		if(w!=0)
    346 			*w='\0';
    347 		t = token(tok, WORD);
    348 		t->quoted = 1;
    349 		yylval.tree = t;
    350 		return t->type;
    351 	}
    352 	if(!wordchr(c)){
    353 		lastdol = 0;
    354 		tok[0] = c;
    355 		tok[1]='\0';
    356 		return c;
    357 	}
    358 	for(;;){
    359 		/* next line should have (char)c==GLOB, but ken's compiler is broken */
    360 		if(c=='*' || c=='[' || c=='?' || c==(unsigned char)GLOB)
    361 			w = addtok(w, GLOB);
    362 		w = addutf(w, c);
    363 		c = nextc();
    364 		if(lastdol?!idchr(c):!wordchr(c)) break;
    365 		advance();
    366 	}
    367 
    368 	lastword = 1;
    369 	lastdol = 0;
    370 	if(w!=0)
    371 		*w='\0';
    372 	t = klook(tok);
    373 	if(t->type!=WORD)
    374 		lastword = 0;
    375 	t->quoted = 0;
    376 	yylval.tree = t;
    377 	return t->type;
    378 }