UCS-2 read/write functions
git-svn-id: svn://svn.h5l.se/heimdal/trunk/heimdal@22559 ec53bebd-3082-4978-b11e-865c3cabbd6b
This commit is contained in:
@@ -1,3 +1,7 @@
|
||||
2008-02-03 Love H<>rnquist <20>strand <lha@it.su.se>
|
||||
|
||||
* Add UCS-2 reading and writing functions
|
||||
|
||||
2008-02-01 Love H<>rnquist <20>strand <lha@it.su.se>
|
||||
|
||||
* Add and use com_err error codes
|
||||
|
@@ -55,6 +55,7 @@ TESTS = \
|
||||
check_PROGRAMS = \
|
||||
test-bidi \
|
||||
test-map \
|
||||
test-rw \
|
||||
test-normalize \
|
||||
test-prohibited \
|
||||
test-punycode \
|
||||
|
186
lib/wind/test-rw.c
Normal file
186
lib/wind/test-rw.c
Normal file
@@ -0,0 +1,186 @@
|
||||
/*
|
||||
* Copyright (c) 2008 Kungliga Tekniska H<>gskolan
|
||||
* (Royal Institute of Technology, Stockholm, Sweden).
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the Institute nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "windlocl.h"
|
||||
#include <stdio.h>
|
||||
#include <err.h>
|
||||
#include <assert.h>
|
||||
|
||||
#define MAX_LENGTH 10
|
||||
|
||||
|
||||
struct testcase {
|
||||
unsigned int in_flags;
|
||||
size_t in_len;
|
||||
const char *in_ptr;
|
||||
int ret;
|
||||
size_t ucs2_len;
|
||||
uint16_t ucs2[MAX_LENGTH];
|
||||
unsigned int out_flags;
|
||||
} testcases[] = {
|
||||
{
|
||||
WIND_RW_BOM,
|
||||
4, "\xff\xfe\x20\x00",
|
||||
0,
|
||||
1, { 0x0020 },
|
||||
WIND_RW_LE
|
||||
},
|
||||
{
|
||||
WIND_RW_BOM,
|
||||
4, "\xfe\xff\x00\x20",
|
||||
0,
|
||||
1, { 0x0020 },
|
||||
WIND_RW_BE
|
||||
},
|
||||
/* only BE BOM */
|
||||
{
|
||||
WIND_RW_BOM,
|
||||
2, "\xfe\xff",
|
||||
0,
|
||||
0, { },
|
||||
WIND_RW_BE
|
||||
},
|
||||
/* no input */
|
||||
{
|
||||
WIND_RW_BOM,
|
||||
0, "",
|
||||
0,
|
||||
0, { },
|
||||
WIND_RW_BOM
|
||||
},
|
||||
/* BOM only */
|
||||
{
|
||||
WIND_RW_BOM,
|
||||
2, "\xff\xfe",
|
||||
0,
|
||||
0, { },
|
||||
WIND_RW_LE
|
||||
},
|
||||
/* water + z */
|
||||
{
|
||||
WIND_RW_BOM|WIND_RW_LE,
|
||||
4, "\x34\x6C\x7A\x00",
|
||||
0,
|
||||
2, { 0x6C34, 0x7a },
|
||||
WIND_RW_LE
|
||||
},
|
||||
/* water + z */
|
||||
{
|
||||
WIND_RW_LE,
|
||||
4, "\x34\x6C\x7A\x00",
|
||||
0,
|
||||
2, { 0x6C34, 0x7a },
|
||||
WIND_RW_LE
|
||||
},
|
||||
/* BOM + water + z */
|
||||
{
|
||||
WIND_RW_BOM,
|
||||
6, "\xFF\xFE\x34\x6C\x7A\x00",
|
||||
0,
|
||||
2, { 0x6C34, 0x7a },
|
||||
WIND_RW_LE
|
||||
},
|
||||
/* BOM + water + z */
|
||||
{
|
||||
WIND_RW_BOM,
|
||||
6, "\xFE\xFF\x6C\x34\x00\x7A",
|
||||
0,
|
||||
2, { 0x6C34, 0x7a },
|
||||
WIND_RW_BE
|
||||
},
|
||||
/* error, odd length */
|
||||
{
|
||||
WIND_RW_BOM,
|
||||
1, "\xfe",
|
||||
WIND_ERR_LENGTH_NOT_MOD2,
|
||||
0, { },
|
||||
WIND_RW_BOM
|
||||
},
|
||||
/* error, missing BOM */
|
||||
{
|
||||
WIND_RW_BOM,
|
||||
2, "\x00\x20",
|
||||
WIND_ERR_NO_BOM,
|
||||
0, { },
|
||||
WIND_RW_BOM
|
||||
},
|
||||
/* error, overrun */
|
||||
{
|
||||
WIND_RW_BE,
|
||||
4, "\x00\x20\x00\x20",
|
||||
WIND_ERR_OVERRUN,
|
||||
1, { 0x20 },
|
||||
WIND_RW_BOM
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
unsigned int n, m, flags;
|
||||
uint16_t data[MAX_LENGTH];
|
||||
size_t datalen;
|
||||
int ret;
|
||||
|
||||
for (n = 0; n < sizeof(testcases)/sizeof(testcases[0]); n++) {
|
||||
flags = testcases[n].in_flags;
|
||||
|
||||
datalen = testcases[n].ucs2_len;
|
||||
assert(datalen < sizeof(data));
|
||||
|
||||
ret = wind_ucs2read(testcases[n].in_ptr,
|
||||
testcases[n].in_len,
|
||||
&flags,
|
||||
data,
|
||||
&datalen);
|
||||
if (ret != testcases[n].ret)
|
||||
errx(1, "testcases %u: wind_ucs2read: %d", n, ret);
|
||||
|
||||
/* on error, skip all other tests */
|
||||
if (ret)
|
||||
continue;
|
||||
|
||||
if (flags != testcases[n].out_flags)
|
||||
errx(1, "testcases %u: flags wrong", n);
|
||||
|
||||
if (datalen != testcases[n].ucs2_len)
|
||||
errx(1, "testcases %u: ucs len wrong", n);
|
||||
|
||||
for (m = 0; m < datalen; m++)
|
||||
if (testcases[n].ucs2[m] != data[m])
|
||||
errx(1, "testcases %u: char %u wrong", n, m);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
122
lib/wind/utf8.c
122
lib/wind/utf8.c
@@ -231,34 +231,136 @@ wind_ucs4utf8_length(const uint32_t *in, size_t in_len, size_t *out_len)
|
||||
/**
|
||||
* Read in an UCS2 from a buffer.
|
||||
*
|
||||
* @param ptr The input buffer to read from
|
||||
* @param len the length of the input buffer, must be an even number.
|
||||
* @param ptr The input buffer to read from.
|
||||
* @param len the length of the input buffer.
|
||||
* @param flags Flags to control the behavior of the function.
|
||||
* @param out the output UCS2, the array must be at least out/2 long.
|
||||
* @param out_len the output length
|
||||
*
|
||||
* @return returns 0 on success, an wind error code otherwise.
|
||||
* @ingroup wind
|
||||
*/
|
||||
|
||||
ssize_t
|
||||
_wind_ucs2read(void *ptr, size_t len, uint16_t *out)
|
||||
int
|
||||
wind_ucs2read(const void *ptr, size_t len, unsigned int *flags,
|
||||
uint16_t *out, size_t *out_len)
|
||||
{
|
||||
unsigned char *p = ptr;
|
||||
int little = 1;
|
||||
const unsigned char *p = ptr;
|
||||
int little = ((*flags) & WIND_RW_LE);
|
||||
size_t olen = *out_len;
|
||||
|
||||
/** if len is zero, flags are unchanged */
|
||||
if (len == 0) {
|
||||
*out_len = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** if len is odd, WIND_ERR_LENGTH_NOT_MOD2 is returned */
|
||||
if (len & 1)
|
||||
return WIND_ERR_LENGTH_NOT_EVEN;
|
||||
/* check for BOM */
|
||||
return WIND_ERR_LENGTH_NOT_MOD2;
|
||||
|
||||
/**
|
||||
* If the flags WIND_RW_BOM is set, check for BOM. If not BOM is
|
||||
* found, check is LE/BE flag is already and use that otherwise
|
||||
* fail with WIND_ERR_NO_BOM. When done, clear WIND_RW_BOM and
|
||||
* the LE/BE flag and set the resulting LE/BE flag.
|
||||
*/
|
||||
if ((*flags) & WIND_RW_BOM) {
|
||||
uint16_t bom = (p[0] << 8) + p[1];
|
||||
if (bom == 0xfffe || bom == 0xfeff) {
|
||||
little = (bom == 0xfffe);
|
||||
p += 2;
|
||||
len -= 2;
|
||||
} else if (((*flags) & (WIND_RW_LE|WIND_RW_BE)) != 0) {
|
||||
/* little already set */
|
||||
} else
|
||||
return WIND_ERR_NO_BOM;
|
||||
*flags = ((*flags) & ~(WIND_RW_BOM|WIND_RW_LE|WIND_RW_BE));
|
||||
*flags |= little ? WIND_RW_LE : WIND_RW_BE;
|
||||
}
|
||||
|
||||
while (len) {
|
||||
if (olen < 1)
|
||||
return WIND_ERR_OVERRUN;
|
||||
if (little)
|
||||
*out = (p[1] << 8) + p[0];
|
||||
else
|
||||
*out = (p[0] << 8) + p[1];
|
||||
out++; p += 2;
|
||||
out++; p += 2; len -= 2; olen--;
|
||||
}
|
||||
return (p - (unsigned char *)ptr) >> 1;
|
||||
*out_len -= olen;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Write an UCS2 string to a buffer.
|
||||
*
|
||||
* @param in The input UCS2 string.
|
||||
* @param in_len the length of the input buffer.
|
||||
* @param flags Flags to control the behavior of the function.
|
||||
* @param ptr The input buffer to write to, the array must be at least
|
||||
* (in + 1) * 2 bytes long.
|
||||
* @param out_len the output length
|
||||
*
|
||||
* @return returns 0 on success, an wind error code otherwise.
|
||||
* @ingroup wind
|
||||
*/
|
||||
|
||||
int
|
||||
wind_ucs2write(const uint16_t *in, size_t in_len, unsigned int *flags,
|
||||
void *ptr, size_t *out_len)
|
||||
{
|
||||
unsigned char *p = ptr;
|
||||
size_t len = *out_len;
|
||||
|
||||
/** If in buffer is not of length be mod 2, WIND_ERR_LENGTH_NOT_MOD2 is returned*/
|
||||
if (len & 1)
|
||||
return WIND_ERR_LENGTH_NOT_MOD2;
|
||||
|
||||
/** On zero input length, flags are preserved */
|
||||
if (in_len == 0) {
|
||||
*out_len = 0;
|
||||
return 0;
|
||||
}
|
||||
/** If flags have WIND_RW_BOM set, the byte order mark is written
|
||||
* first to the output data */
|
||||
if ((*flags) & WIND_RW_BOM) {
|
||||
uint16_t bom = 0xfffe;
|
||||
|
||||
if (len < 2)
|
||||
return WIND_ERR_OVERRUN;
|
||||
|
||||
if ((*flags) & WIND_RW_LE) {
|
||||
p[0] = (bom >> 8) & 0xff;
|
||||
p[1] = (bom ) & 0xff;
|
||||
} else {
|
||||
p[1] = (bom ) & 0xff;
|
||||
p[0] = (bom >> 8) & 0xff;
|
||||
}
|
||||
len -= 2;
|
||||
}
|
||||
|
||||
while (in_len) {
|
||||
/** If the output wont fit into out_len, WIND_ERR_OVERRUN is returned */
|
||||
if (len < 2)
|
||||
return WIND_ERR_OVERRUN;
|
||||
if ((*flags) & WIND_RW_LE) {
|
||||
p[0] = (in[0] >> 8) & 0xff;
|
||||
p[1] = (in[0] ) & 0xff;
|
||||
} else {
|
||||
p[1] = (in[0] ) & 0xff;
|
||||
p[0] = (in[0] >> 8) & 0xff;
|
||||
}
|
||||
len -= 2;
|
||||
in_len--;
|
||||
p += 2;
|
||||
in++;
|
||||
}
|
||||
*out_len -= len;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Convert an UCS2 string to a UTF-8 string.
|
||||
*
|
||||
|
@@ -45,6 +45,11 @@ typedef unsigned int wind_profile_flags;
|
||||
#define WIND_PROFILE_LDAP 2
|
||||
#define WIND_PROFILE_SASL 4
|
||||
|
||||
/* flags to wind_ucs2read/wind_ucs2write */
|
||||
#define WIND_RW_LE 1
|
||||
#define WIND_RW_BE 2
|
||||
#define WIND_RW_BOM 4
|
||||
|
||||
int wind_stringprep(const unsigned *in, size_t in_len,
|
||||
unsigned *out, size_t *out_len,
|
||||
wind_profile_flags flags);
|
||||
@@ -62,4 +67,8 @@ int wind_ucs4utf8_length(const uint32_t *, size_t, size_t *);
|
||||
int wind_ucs2utf8(const uint16_t *, size_t, char *, size_t *);
|
||||
int wind_ucs2utf8_length(const uint16_t *, size_t, size_t *);
|
||||
|
||||
|
||||
int wind_ucs2read(const void *, size_t, unsigned int *, uint16_t *, size_t *);
|
||||
int wind_ucs2write(const uint16_t *, size_t, unsigned int *, void *, size_t *);
|
||||
|
||||
#endif /* _WIND_H_ */
|
||||
|
@@ -12,9 +12,11 @@ error_code NONE, "No error"
|
||||
error_code NO_PROFILE, "No such profile"
|
||||
error_code OVERRUN, "Buffer overrun"
|
||||
error_code UNDERUN, "Buffer underrun"
|
||||
error_code LENGTH_NOT_EVEN, "Lenght not even"
|
||||
error_code LENGTH_NOT_MOD2, "Lenght not mod2"
|
||||
error_code LENGTH_NOT_MOD4, "Lenght not mod4"
|
||||
error_code INVALID_UTF8, "Invalid UTF-8 combination in string"
|
||||
error_code INVALID_UTF16, "Invalid UTF-16 combination in string"
|
||||
error_code INVALID_UTF32, "Invalid UTF-32 combination in string"
|
||||
error_code NO_BOM, "No byte order mark (BOM) in string"
|
||||
|
||||
end
|
||||
|
@@ -55,9 +55,6 @@ int _wind_stringprep_map(const uint32_t *, size_t,
|
||||
uint32_t *, size_t *,
|
||||
wind_profile_flags);
|
||||
|
||||
int _wind_stringprep_normalize(const uint32_t *in, size_t in_len,
|
||||
uint32_t *out, size_t *out_len);
|
||||
|
||||
ssize_t _wind_ucs2read(void *, size_t, uint16_t *);
|
||||
int _wind_stringprep_normalize(const uint32_t *, size_t, uint32_t *, size_t *);
|
||||
|
||||
#endif /* _WINDLOCL_H_ */
|
||||
|
Reference in New Issue
Block a user