Add utf8 <-> utf16 support.

git-svn-id: svn://svn.h5l.se/heimdal/trunk/heimdal@23246 ec53bebd-3082-4978-b11e-865c3cabbd6b
This commit is contained in:
Love Hörnquist Åstrand
2008-06-01 22:29:04 +00:00
parent 125f5659a7
commit a40cae4e15

View File

@@ -38,6 +38,67 @@
RCSID("$Id$");
static int
utf8toutf32(const unsigned char **pp, uint32_t *out)
{
const unsigned char *p = *pp;
unsigned c = *p;
if (c & 0x80) {
if ((c & 0xE0) == 0xC0) {
const unsigned c2 = *++p;
if ((c2 & 0xC0) == 0x80) {
*out = ((c & 0x1F) << 6)
| (c2 & 0x3F);
} else {
return WIND_ERR_INVALID_UTF8;
}
} else if ((c & 0xF0) == 0xE0) {
const unsigned c2 = *++p;
if ((c2 & 0xC0) == 0x80) {
const unsigned c3 = *++p;
if ((c3 & 0xC0) == 0x80) {
*out = ((c & 0x0F) << 12)
| ((c2 & 0x3F) << 6)
| (c3 & 0x3F);
} else {
return WIND_ERR_INVALID_UTF8;
}
} else {
return WIND_ERR_INVALID_UTF8;
}
} else if ((c & 0xF8) == 0xF0) {
const unsigned c2 = *++p;
if ((c2 & 0xC0) == 0x80) {
const unsigned c3 = *++p;
if ((c3 & 0xC0) == 0x80) {
const unsigned c4 = *++p;
if ((c4 & 0xC0) == 0x80) {
*out = ((c & 0x07) << 18)
| ((c2 & 0x3F) << 12)
| ((c3 & 0x3F) << 6)
| (c4 & 0x3F);
} else {
return WIND_ERR_INVALID_UTF8;
}
} else {
return WIND_ERR_INVALID_UTF8;
}
} else {
return WIND_ERR_INVALID_UTF8;
}
} else {
return WIND_ERR_INVALID_UTF8;
}
} else {
*out = c;
}
*pp = p;
return 0;
}
/**
* Convert an UTF-8 string to an UCS4 string.
*
@@ -59,60 +120,15 @@ wind_utf8ucs4(const char *in, uint32_t *out, size_t *out_len)
{
const unsigned char *p;
size_t o = 0;
int ret;
for (p = (const unsigned char *)in; *p != '\0'; ++p) {
unsigned c = *p;
uint32_t u;
if (c & 0x80) {
if ((c & 0xE0) == 0xC0) {
const unsigned c2 = *++p;
if ((c2 & 0xC0) == 0x80) {
u = ((c & 0x1F) << 6)
| (c2 & 0x3F);
} else {
return WIND_ERR_INVALID_UTF8;
}
} else if ((c & 0xF0) == 0xE0) {
const unsigned c2 = *++p;
if ((c2 & 0xC0) == 0x80) {
const unsigned c3 = *++p;
if ((c3 & 0xC0) == 0x80) {
u = ((c & 0x0F) << 12)
| ((c2 & 0x3F) << 6)
| (c3 & 0x3F);
} else {
return WIND_ERR_INVALID_UTF8;
}
} else {
return WIND_ERR_INVALID_UTF8;
}
} else if ((c & 0xF8) == 0xF0) {
const unsigned c2 = *++p;
if ((c2 & 0xC0) == 0x80) {
const unsigned c3 = *++p;
if ((c3 & 0xC0) == 0x80) {
const unsigned c4 = *++p;
if ((c4 & 0xC0) == 0x80) {
u = ((c & 0x07) << 18)
| ((c2 & 0x3F) << 12)
| ((c3 & 0x3F) << 6)
| (c4 & 0x3F);
} else {
return WIND_ERR_INVALID_UTF8;
}
} else {
return WIND_ERR_INVALID_UTF8;
}
} else {
return WIND_ERR_INVALID_UTF8;
}
} else {
return WIND_ERR_INVALID_UTF8;
}
} else {
u = c;
}
ret = utf8toutf32(&p, &u);
if (ret)
return ret;
if (out) {
if (o >= *out_len)
return WIND_ERR_OVERRUN;
@@ -364,6 +380,67 @@ wind_ucs2write(const uint16_t *in, size_t in_len, unsigned int *flags,
}
/**
* Convert an UTF-8 string to an UCS2 string.
*
* @param in an UTF-8 string to convert.
* @param out the resulting UCS2 strint, must be at least
* wind_utf8ucs2_length() long. If out is NULL, the function will
* calculate the needed space for the out variable (just like
* wind_utf8ucs2_length()).
* @param out_len before processing out_len should be the length of
* the out variable, after processing it will be the length of the out
* string.
*
* @return returns 0 on success, an wind error code otherwise
* @ingroup wind
*/
int
wind_utf8ucs2(const char *in, uint16_t *out, size_t *out_len)
{
const unsigned char *p;
size_t o = 0;
int ret;
for (p = (const unsigned char *)in; *p != '\0'; ++p) {
uint32_t u;
ret = utf8toutf32(&p, &u);
if (ret)
return ret;
if (u & 0xffff0000)
return WIND_ERR_NOT_UTF16;
if (out) {
if (o >= *out_len)
return WIND_ERR_OVERRUN;
out[o] = u;
}
o++;
}
*out_len = o;
return 0;
}
/**
* Calculate the length of from converting a UTF-8 string to a UCS2
* string.
*
* @param in an UTF-8 string to convert.
* @param out_len the length of the resulting UCS4 string.
*
* @return returns 0 on success, an wind error code otherwise
* @ingroup wind
*/
int
wind_utf8ucs2_length(const char *in, size_t *out_len)
{
return wind_utf8ucs2(in, NULL, out_len);
}
/**
* Convert an UCS2 string to a UTF-8 string.
*