9base

revived minimalist port of Plan 9 userland to Unix
git clone git://git.suckless.org/9base
Log | Files | Refs | README | LICENSE

regexec.c (5029B)


      1 #include "lib9.h"
      2 #include "regexp9.h"
      3 #include "regcomp.h"
      4 
      5 
      6 /*
      7  *  return	0 if no match
      8  *		>0 if a match
      9  *		<0 if we ran out of _relist space
     10  */
     11 static int
     12 regexec1(Reprog *progp,	/* program to run */
     13 	char *bol,	/* string to run machine on */
     14 	Resub *mp,	/* subexpression elements */
     15 	int ms,		/* number of elements at mp */
     16 	Reljunk *j
     17 )
     18 {
     19 	int flag=0;
     20 	Reinst *inst;
     21 	Relist *tlp;
     22 	char *s;
     23 	int i, checkstart;
     24 	Rune r, *rp, *ep;
     25 	int n;
     26 	Relist* tl;		/* This list, next list */
     27 	Relist* nl;
     28 	Relist* tle;		/* ends of this and next list */
     29 	Relist* nle;
     30 	int match;
     31 	char *p;
     32 
     33 	match = 0;
     34 	checkstart = j->starttype;
     35 	if(mp)
     36 		for(i=0; i<ms; i++) {
     37 			mp[i].s.sp = 0;
     38 			mp[i].e.ep = 0;
     39 		}
     40 	j->relist[0][0].inst = 0;
     41 	j->relist[1][0].inst = 0;
     42 
     43 	/* Execute machine once for each character, including terminal NUL */
     44 	s = j->starts;
     45 	do{
     46 		/* fast check for first char */
     47 		if(checkstart) {
     48 			switch(j->starttype) {
     49 			case RUNE:
     50 				p = utfrune(s, j->startchar);
     51 				if(p == 0 || s == j->eol)
     52 					return match;
     53 				s = p;
     54 				break;
     55 			case BOL:
     56 				if(s == bol)
     57 					break;
     58 				p = utfrune(s, '\n');
     59 				if(p == 0 || s == j->eol)
     60 					return match;
     61 				s = p+1;
     62 				break;
     63 			}
     64 		}
     65 		r = *(uchar*)s;
     66 		if(r < Runeself)
     67 			n = 1;
     68 		else
     69 			n = chartorune(&r, s);
     70 
     71 		/* switch run lists */
     72 		tl = j->relist[flag];
     73 		tle = j->reliste[flag];
     74 		nl = j->relist[flag^=1];
     75 		nle = j->reliste[flag];
     76 		nl->inst = 0;
     77 
     78 		/* Add first instruction to current list */
     79 		if(match == 0)
     80 			_renewemptythread(tl, progp->startinst, ms, s);
     81 
     82 		/* Execute machine until current list is empty */
     83 		for(tlp=tl; tlp->inst; tlp++){	/* assignment = */
     84 			for(inst = tlp->inst; ; inst = inst->u2.next){
     85 				switch(inst->type){
     86 				case RUNE:	/* regular character */
     87 					if(inst->u1.r == r){
     88 						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
     89 							return -1;
     90 					}
     91 					break;
     92 				case LBRA:
     93 					tlp->se.m[inst->u1.subid].s.sp = s;
     94 					continue;
     95 				case RBRA:
     96 					tlp->se.m[inst->u1.subid].e.ep = s;
     97 					continue;
     98 				case ANY:
     99 					if(r != '\n')
    100 						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
    101 							return -1;
    102 					break;
    103 				case ANYNL:
    104 					if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
    105 							return -1;
    106 					break;
    107 				case BOL:
    108 					if(s == bol || *(s-1) == '\n')
    109 						continue;
    110 					break;
    111 				case EOL:
    112 					if(s == j->eol || r == 0 || r == '\n')
    113 						continue;
    114 					break;
    115 				case CCLASS:
    116 					ep = inst->u1.cp->end;
    117 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
    118 						if(r >= rp[0] && r <= rp[1]){
    119 							if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
    120 								return -1;
    121 							break;
    122 						}
    123 					break;
    124 				case NCCLASS:
    125 					ep = inst->u1.cp->end;
    126 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
    127 						if(r >= rp[0] && r <= rp[1])
    128 							break;
    129 					if(rp == ep)
    130 						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
    131 							return -1;
    132 					break;
    133 				case OR:
    134 					/* evaluate right choice later */
    135 					if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle)
    136 						return -1;
    137 					/* efficiency: advance and re-evaluate */
    138 					continue;
    139 				case END:	/* Match! */
    140 					match = 1;
    141 					tlp->se.m[0].e.ep = s;
    142 					if(mp != 0)
    143 						_renewmatch(mp, ms, &tlp->se);
    144 					break;
    145 				}
    146 				break;
    147 			}
    148 		}
    149 		if(s == j->eol)
    150 			break;
    151 		checkstart = j->starttype && nl->inst==0;
    152 		s += n;
    153 	}while(r);
    154 	return match;
    155 }
    156 
    157 static int
    158 regexec2(Reprog *progp,	/* program to run */
    159 	char *bol,	/* string to run machine on */
    160 	Resub *mp,	/* subexpression elements */
    161 	int ms,		/* number of elements at mp */
    162 	Reljunk *j
    163 )
    164 {
    165 	int rv;
    166 	Relist *relist0, *relist1;
    167 
    168 	/* mark space */
    169 	relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
    170 	if(relist0 == nil)
    171 		return -1;
    172 	relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
    173 	if(relist1 == nil){
    174 		free(relist1);
    175 		return -1;
    176 	}
    177 	j->relist[0] = relist0;
    178 	j->relist[1] = relist1;
    179 	j->reliste[0] = relist0 + BIGLISTSIZE - 2;
    180 	j->reliste[1] = relist1 + BIGLISTSIZE - 2;
    181 
    182 	rv = regexec1(progp, bol, mp, ms, j);
    183 	free(relist0);
    184 	free(relist1);
    185 	return rv;
    186 }
    187 
    188 extern int
    189 regexec(Reprog *progp,	/* program to run */
    190 	char *bol,	/* string to run machine on */
    191 	Resub *mp,	/* subexpression elements */
    192 	int ms)		/* number of elements at mp */
    193 {
    194 	Reljunk j;
    195 	Relist relist0[LISTSIZE], relist1[LISTSIZE];
    196 	int rv;
    197 
    198 	/*
    199  	 *  use user-specified starting/ending location if specified
    200 	 */
    201 	j.starts = bol;
    202 	j.eol = 0;
    203 	if(mp && ms>0){
    204 		if(mp->s.sp)
    205 			j.starts = mp->s.sp;
    206 		if(mp->e.ep)
    207 			j.eol = mp->e.ep;
    208 	}
    209 	j.starttype = 0;
    210 	j.startchar = 0;
    211 	if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) {
    212 		j.starttype = RUNE;
    213 		j.startchar = progp->startinst->u1.r;
    214 	}
    215 	if(progp->startinst->type == BOL)
    216 		j.starttype = BOL;
    217 
    218 	/* mark space */
    219 	j.relist[0] = relist0;
    220 	j.relist[1] = relist1;
    221 	j.reliste[0] = relist0 + nelem(relist0) - 2;
    222 	j.reliste[1] = relist1 + nelem(relist1) - 2;
    223 
    224 	rv = regexec1(progp, bol, mp, ms, &j);
    225 	if(rv >= 0)
    226 		return rv;
    227 	rv = regexec2(progp, bol, mp, ms, &j);
    228 	if(rv >= 0)
    229 		return rv;
    230 	return -1;
    231 }