blob: f1ba66730c5a9936afc04fef061a255557377d3d [file] [log] [blame]
/* utf8.c - collect routines for conversion to/from utf8 */
#include "utf8.h"
/*
* Convert utf8 to long.
* On success: update *inptr to be the first nonread character,
* set *err to 0, and return the obtained value.
* On failure: leave *inptr unchanged, set *err to some nonzero error value:
* UTF8_BAD: bad utf8, UTF8_SHORT: input too short
* and return 0;
*
* cnt is either 0 or gives the number of available bytes
*/
unsigned long
from_utf8(char **inptr, int cnt, int *err) {
unsigned char *in;
unsigned int uc, uc2;
int need, bit, bad = 0;
in = (unsigned char *)(* inptr);
uc = *in++;
need = 0;
bit = 0x80;
while(uc & bit) {
need++;
bit >>= 1;
}
uc &= (bit-1);
if (cnt && cnt < need) {
*err = UTF8_SHORT;
return 0;
}
if (need == 1)
bad = 1;
else if (need) while(--need) {
uc2 = *in++;
if ((uc2 & 0xc0) != 0x80) {
bad = 1;
break;
}
uc = ((uc << 6) | (uc2 & 0x3f));
}
if (bad) {
*err = UTF8_BAD;
return 0;
}
*inptr = (char *)in;
*err = 0;
return uc;
}