mbwc.c (2272B)
1 #include <stdlib.h> 2 #include "mbwc.h" 3 4 /* 5 * Use the FSS-UTF transformation proposed by posix. 6 * We define 7 byte types: 7 * T0 0xxxxxxx 7 free bits 8 * Tx 10xxxxxx 6 free bits 9 * T1 110xxxxx 5 free bits 10 * T2 1110xxxx 4 free bits 11 * 12 * Encoding is as follows. 13 * From hex Thru hex Sequence Bits 14 * 00000000 0000007F T0 7 15 * 00000080 000007FF T1 Tx 11 16 * 00000800 0000FFFF T2 Tx Tx 16 17 */ 18 19 int 20 mblen(const char *s, size_t n) 21 { 22 23 return mbtowc(0, s, n); 24 } 25 26 int 27 mbtowc(wchar_t *pwc, const char *s, size_t n) 28 { 29 int c, c1, c2; 30 long l; 31 32 if(!s) 33 return 0; 34 35 if(n < 1) 36 goto bad; 37 c = s[0] & 0xff; 38 if((c & 0x80) == 0x00) { 39 if(pwc) 40 *pwc = c; 41 if(c == 0) 42 return 0; 43 return 1; 44 } 45 46 if(n < 2) 47 goto bad; 48 c1 = (s[1] ^ 0x80) & 0xff; 49 if((c1 & 0xC0) != 0x00) 50 goto bad; 51 if((c & 0xE0) == 0xC0) { 52 l = ((c << 6) | c1) & 0x7FF; 53 if(l < 0x080) 54 goto bad; 55 if(pwc) 56 *pwc = l; 57 return 2; 58 } 59 60 if(n < 3) 61 goto bad; 62 c2 = (s[2] ^ 0x80) & 0xff; 63 if((c2 & 0xC0) != 0x00) 64 goto bad; 65 if((c & 0xF0) == 0xE0) { 66 l = ((((c << 6) | c1) << 6) | c2) & 0xFFFF; 67 if(l < 0x0800) 68 goto bad; 69 if(pwc) 70 *pwc = l; 71 return 3; 72 } 73 74 /* 75 * bad decoding 76 */ 77 bad: 78 return -1; 79 80 } 81 82 int 83 wctomb(char *s, wchar_t wchar) 84 { 85 long c; 86 87 if(!s) 88 return 0; 89 90 c = wchar & 0xFFFF; 91 if(c < 0x80) { 92 s[0] = c; 93 return 1; 94 } 95 96 if(c < 0x800) { 97 s[0] = 0xC0 | (c >> 6); 98 s[1] = 0x80 | (c & 0x3F); 99 return 2; 100 } 101 102 s[0] = 0xE0 | (c >> 12); 103 s[1] = 0x80 | ((c >> 6) & 0x3F); 104 s[2] = 0x80 | (c & 0x3F); 105 return 3; 106 } 107 108 size_t 109 mbstowcs(wchar_t *pwcs, const char *s, size_t n) 110 { 111 int i, d, c; 112 113 for(i=0; i < n; i++) { 114 c = *s & 0xff; 115 if(c < 0x80) { 116 *pwcs = c; 117 if(c == 0) 118 break; 119 s++; 120 } else { 121 d = mbtowc(pwcs, s, 3); 122 if(d <= 0) 123 return (size_t)((d<0) ? -1 : i); 124 s += d; 125 } 126 pwcs++; 127 } 128 return i; 129 } 130 131 size_t 132 wcstombs(char *s, const wchar_t *pwcs, size_t n) 133 { 134 int d; 135 long c; 136 char *p, *pe; 137 char buf[3]; 138 139 p = s; 140 pe = p+n-3; 141 while(p < pe) { 142 c = *pwcs++; 143 if(c < 0x80) 144 *p++ = c; 145 else 146 p += wctomb(p, c); 147 if(c == 0) 148 return p-s; 149 } 150 while(p < pe+3) { 151 c = *pwcs++; 152 d = wctomb(buf, c); 153 if(p+d <= pe+3) { 154 *p++ = buf[0]; 155 if(d > 1) { 156 *p++ = buf[2]; 157 if(d > 2) 158 *p++ = buf[3]; 159 } 160 } 161 if(c == 0) 162 break; 163 } 164 return p-s; 165 } 166