9base

revived minimalist port of Plan 9 userland to Unix
git clone git://git.suckless.org/9base
Log | Files | Refs | README | LICENSE

mbwc.c (2272B)


      1 #include <stdlib.h>
      2 #include "mbwc.h"
      3 
      4 /*
      5  * Use the FSS-UTF transformation proposed by posix.
      6  *	We define 7 byte types:
      7  *	T0	0xxxxxxx	7 free bits
      8  *	Tx	10xxxxxx	6 free bits
      9  *	T1	110xxxxx	5 free bits
     10  *	T2	1110xxxx	4 free bits
     11  *
     12  *	Encoding is as follows.
     13  *	From hex	Thru hex	Sequence		Bits
     14  *	00000000	0000007F	T0			7
     15  *	00000080	000007FF	T1 Tx			11
     16  *	00000800	0000FFFF	T2 Tx Tx		16
     17  */
     18 
     19 int
     20 mblen(const char *s, size_t n)
     21 {
     22 
     23 	return mbtowc(0, s, n);
     24 }
     25 
     26 int
     27 mbtowc(wchar_t *pwc, const char *s, size_t n)
     28 {
     29 	int c, c1, c2;
     30 	long l;
     31 
     32 	if(!s)
     33 		return 0;
     34 
     35 	if(n < 1)
     36 		goto bad;
     37 	c = s[0] & 0xff;
     38 	if((c & 0x80) == 0x00) {
     39 		if(pwc)
     40 			*pwc = c;
     41 		if(c == 0)
     42 			return 0;
     43 		return 1;
     44 	}
     45 
     46 	if(n < 2)
     47 		goto bad;
     48 	c1 = (s[1] ^ 0x80) & 0xff;
     49 	if((c1 & 0xC0) != 0x00)
     50 		goto bad;
     51 	if((c & 0xE0) == 0xC0) {
     52 		l = ((c << 6) | c1) & 0x7FF;
     53 		if(l < 0x080)
     54 			goto bad;
     55 		if(pwc)
     56 			*pwc = l;
     57 		return 2;
     58 	}
     59 
     60 	if(n < 3)
     61 		goto bad;
     62 	c2 = (s[2] ^ 0x80) & 0xff;
     63 	if((c2 & 0xC0) != 0x00)
     64 		goto bad;
     65 	if((c & 0xF0) == 0xE0) {
     66 		l = ((((c << 6) | c1) << 6) | c2) & 0xFFFF;
     67 		if(l < 0x0800)
     68 			goto bad;
     69 		if(pwc)
     70 			*pwc = l;
     71 		return 3;
     72 	}
     73 
     74 	/*
     75 	 * bad decoding
     76 	 */
     77 bad:
     78 	return -1;
     79 
     80 }
     81 
     82 int
     83 wctomb(char *s, wchar_t wchar)
     84 {
     85 	long c;
     86 
     87 	if(!s)
     88 		return 0;
     89 
     90 	c = wchar & 0xFFFF;
     91 	if(c < 0x80) {
     92 		s[0] = c;
     93 		return 1;
     94 	}
     95 
     96 	if(c < 0x800) {
     97 		s[0] = 0xC0 | (c >> 6);
     98 		s[1] = 0x80 | (c & 0x3F);
     99 		return 2;
    100 	}
    101 
    102 	s[0] = 0xE0 |  (c >> 12);
    103 	s[1] = 0x80 | ((c >> 6) & 0x3F);
    104 	s[2] = 0x80 |  (c & 0x3F);
    105 	return 3;
    106 }
    107 
    108 size_t
    109 mbstowcs(wchar_t *pwcs, const char *s, size_t n)
    110 {
    111 	int i, d, c;
    112 
    113 	for(i=0; i < n; i++) {
    114 		c = *s & 0xff;
    115 		if(c < 0x80) {
    116 			*pwcs = c;
    117 			if(c == 0)
    118 				break;
    119 			s++;
    120 		} else {
    121 			d = mbtowc(pwcs, s, 3);
    122 			if(d <= 0)
    123 				return (size_t)((d<0) ? -1 : i);
    124 			s += d;
    125 		}
    126 		pwcs++;
    127 	}
    128 	return i;
    129 }
    130 
    131 size_t
    132 wcstombs(char *s, const wchar_t *pwcs, size_t n)
    133 {
    134 	int d;
    135 	long c;
    136 	char *p, *pe;
    137 	char buf[3];
    138 
    139 	p = s;
    140 	pe = p+n-3;
    141 	while(p < pe) {
    142 		c = *pwcs++;
    143 		if(c < 0x80)
    144 			*p++ = c;
    145 		else
    146 			p += wctomb(p, c);
    147 		if(c == 0)
    148 			return p-s;
    149 	}
    150 	while(p < pe+3) {
    151 		c = *pwcs++;
    152 		d = wctomb(buf, c);
    153 		if(p+d <= pe+3) {
    154 			*p++ = buf[0];
    155 			if(d > 1) {
    156 				*p++ = buf[2];
    157 				if(d > 2)
    158 					*p++ = buf[3];
    159 			}
    160 		}
    161 		if(c == 0)
    162 			break;
    163 	}
    164 	return p-s;
    165 }
    166