diff --git a/lib/wind/ChangeLog b/lib/wind/ChangeLog index 950f62e68..7fe44caf5 100644 --- a/lib/wind/ChangeLog +++ b/lib/wind/ChangeLog @@ -1,3 +1,7 @@ +2008-02-03 Love Hörnquist Åstrand + + * Add UCS-2 reading and writing functions + 2008-02-01 Love Hörnquist Åstrand * Add and use com_err error codes diff --git a/lib/wind/Makefile.am b/lib/wind/Makefile.am index 12c4990be..df9584422 100644 --- a/lib/wind/Makefile.am +++ b/lib/wind/Makefile.am @@ -55,6 +55,7 @@ TESTS = \ check_PROGRAMS = \ test-bidi \ test-map \ + test-rw \ test-normalize \ test-prohibited \ test-punycode \ diff --git a/lib/wind/test-rw.c b/lib/wind/test-rw.c new file mode 100644 index 000000000..f7f90c496 --- /dev/null +++ b/lib/wind/test-rw.c @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2008 Kungliga Tekniska Högskolan + * (Royal Institute of Technology, Stockholm, Sweden). + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the Institute nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include "windlocl.h" +#include +#include +#include + +#define MAX_LENGTH 10 + + +struct testcase { + unsigned int in_flags; + size_t in_len; + const char *in_ptr; + int ret; + size_t ucs2_len; + uint16_t ucs2[MAX_LENGTH]; + unsigned int out_flags; +} testcases[] = { + { + WIND_RW_BOM, + 4, "\xff\xfe\x20\x00", + 0, + 1, { 0x0020 }, + WIND_RW_LE + }, + { + WIND_RW_BOM, + 4, "\xfe\xff\x00\x20", + 0, + 1, { 0x0020 }, + WIND_RW_BE + }, + /* only BE BOM */ + { + WIND_RW_BOM, + 2, "\xfe\xff", + 0, + 0, { }, + WIND_RW_BE + }, + /* no input */ + { + WIND_RW_BOM, + 0, "", + 0, + 0, { }, + WIND_RW_BOM + }, + /* BOM only */ + { + WIND_RW_BOM, + 2, "\xff\xfe", + 0, + 0, { }, + WIND_RW_LE + }, + /* water + z */ + { + WIND_RW_BOM|WIND_RW_LE, + 4, "\x34\x6C\x7A\x00", + 0, + 2, { 0x6C34, 0x7a }, + WIND_RW_LE + }, + /* water + z */ + { + WIND_RW_LE, + 4, "\x34\x6C\x7A\x00", + 0, + 2, { 0x6C34, 0x7a }, + WIND_RW_LE + }, + /* BOM + water + z */ + { + WIND_RW_BOM, + 6, "\xFF\xFE\x34\x6C\x7A\x00", + 0, + 2, { 0x6C34, 0x7a }, + WIND_RW_LE + }, + /* BOM + water + z */ + { + WIND_RW_BOM, + 6, "\xFE\xFF\x6C\x34\x00\x7A", + 0, + 2, { 0x6C34, 0x7a }, + WIND_RW_BE + }, + /* error, odd length */ + { + WIND_RW_BOM, + 1, "\xfe", + WIND_ERR_LENGTH_NOT_MOD2, + 0, { }, + WIND_RW_BOM + }, + /* error, missing BOM */ + { + WIND_RW_BOM, + 2, "\x00\x20", + WIND_ERR_NO_BOM, + 0, { }, + WIND_RW_BOM + }, + /* error, overrun */ + { + WIND_RW_BE, + 4, "\x00\x20\x00\x20", + WIND_ERR_OVERRUN, + 1, { 0x20 }, + WIND_RW_BOM + } + +}; + +int +main(void) +{ + unsigned int n, m, flags; + uint16_t data[MAX_LENGTH]; + size_t datalen; + int ret; + + for (n = 0; n < sizeof(testcases)/sizeof(testcases[0]); n++) { + flags = testcases[n].in_flags; + + datalen = testcases[n].ucs2_len; + assert(datalen < sizeof(data)); + + ret = wind_ucs2read(testcases[n].in_ptr, + testcases[n].in_len, + &flags, + data, + &datalen); + if (ret != testcases[n].ret) + errx(1, "testcases %u: wind_ucs2read: %d", n, ret); + + /* on error, skip all other tests */ + if (ret) + continue; + + if (flags != testcases[n].out_flags) + errx(1, "testcases %u: flags wrong", n); + + if (datalen != testcases[n].ucs2_len) + errx(1, "testcases %u: ucs len wrong", n); + + for (m = 0; m < datalen; m++) + if (testcases[n].ucs2[m] != data[m]) + errx(1, "testcases %u: char %u wrong", n, m); + } + + return 0; +} diff --git a/lib/wind/utf8.c b/lib/wind/utf8.c index 06309072e..7ca97aac1 100644 --- a/lib/wind/utf8.c +++ b/lib/wind/utf8.c @@ -231,34 +231,136 @@ wind_ucs4utf8_length(const uint32_t *in, size_t in_len, size_t *out_len) /** * Read in an UCS2 from a buffer. * - * @param ptr The input buffer to read from - * @param len the length of the input buffer, must be an even number. + * @param ptr The input buffer to read from. + * @param len the length of the input buffer. + * @param flags Flags to control the behavior of the function. * @param out the output UCS2, the array must be at least out/2 long. + * @param out_len the output length * * @return returns 0 on success, an wind error code otherwise. * @ingroup wind */ -ssize_t -_wind_ucs2read(void *ptr, size_t len, uint16_t *out) +int +wind_ucs2read(const void *ptr, size_t len, unsigned int *flags, + uint16_t *out, size_t *out_len) { - unsigned char *p = ptr; - int little = 1; + const unsigned char *p = ptr; + int little = ((*flags) & WIND_RW_LE); + size_t olen = *out_len; + /** if len is zero, flags are unchanged */ + if (len == 0) { + *out_len = 0; + return 0; + } + + /** if len is odd, WIND_ERR_LENGTH_NOT_MOD2 is returned */ if (len & 1) - return WIND_ERR_LENGTH_NOT_EVEN; - /* check for BOM */ + return WIND_ERR_LENGTH_NOT_MOD2; + + /** + * If the flags WIND_RW_BOM is set, check for BOM. If not BOM is + * found, check is LE/BE flag is already and use that otherwise + * fail with WIND_ERR_NO_BOM. When done, clear WIND_RW_BOM and + * the LE/BE flag and set the resulting LE/BE flag. + */ + if ((*flags) & WIND_RW_BOM) { + uint16_t bom = (p[0] << 8) + p[1]; + if (bom == 0xfffe || bom == 0xfeff) { + little = (bom == 0xfffe); + p += 2; + len -= 2; + } else if (((*flags) & (WIND_RW_LE|WIND_RW_BE)) != 0) { + /* little already set */ + } else + return WIND_ERR_NO_BOM; + *flags = ((*flags) & ~(WIND_RW_BOM|WIND_RW_LE|WIND_RW_BE)); + *flags |= little ? WIND_RW_LE : WIND_RW_BE; + } while (len) { + if (olen < 1) + return WIND_ERR_OVERRUN; if (little) *out = (p[1] << 8) + p[0]; else *out = (p[0] << 8) + p[1]; - out++; p += 2; + out++; p += 2; len -= 2; olen--; } - return (p - (unsigned char *)ptr) >> 1; + *out_len -= olen; + return 0; } +/** + * Write an UCS2 string to a buffer. + * + * @param in The input UCS2 string. + * @param in_len the length of the input buffer. + * @param flags Flags to control the behavior of the function. + * @param ptr The input buffer to write to, the array must be at least + * (in + 1) * 2 bytes long. + * @param out_len the output length + * + * @return returns 0 on success, an wind error code otherwise. + * @ingroup wind + */ + +int +wind_ucs2write(const uint16_t *in, size_t in_len, unsigned int *flags, + void *ptr, size_t *out_len) +{ + unsigned char *p = ptr; + size_t len = *out_len; + + /** If in buffer is not of length be mod 2, WIND_ERR_LENGTH_NOT_MOD2 is returned*/ + if (len & 1) + return WIND_ERR_LENGTH_NOT_MOD2; + + /** On zero input length, flags are preserved */ + if (in_len == 0) { + *out_len = 0; + return 0; + } + /** If flags have WIND_RW_BOM set, the byte order mark is written + * first to the output data */ + if ((*flags) & WIND_RW_BOM) { + uint16_t bom = 0xfffe; + + if (len < 2) + return WIND_ERR_OVERRUN; + + if ((*flags) & WIND_RW_LE) { + p[0] = (bom >> 8) & 0xff; + p[1] = (bom ) & 0xff; + } else { + p[1] = (bom ) & 0xff; + p[0] = (bom >> 8) & 0xff; + } + len -= 2; + } + + while (in_len) { + /** If the output wont fit into out_len, WIND_ERR_OVERRUN is returned */ + if (len < 2) + return WIND_ERR_OVERRUN; + if ((*flags) & WIND_RW_LE) { + p[0] = (in[0] >> 8) & 0xff; + p[1] = (in[0] ) & 0xff; + } else { + p[1] = (in[0] ) & 0xff; + p[0] = (in[0] >> 8) & 0xff; + } + len -= 2; + in_len--; + p += 2; + in++; + } + *out_len -= len; + return 0; +} + + /** * Convert an UCS2 string to a UTF-8 string. * diff --git a/lib/wind/wind.h b/lib/wind/wind.h index 8a6eee6be..ecda75939 100644 --- a/lib/wind/wind.h +++ b/lib/wind/wind.h @@ -45,6 +45,11 @@ typedef unsigned int wind_profile_flags; #define WIND_PROFILE_LDAP 2 #define WIND_PROFILE_SASL 4 +/* flags to wind_ucs2read/wind_ucs2write */ +#define WIND_RW_LE 1 +#define WIND_RW_BE 2 +#define WIND_RW_BOM 4 + int wind_stringprep(const unsigned *in, size_t in_len, unsigned *out, size_t *out_len, wind_profile_flags flags); @@ -62,4 +67,8 @@ int wind_ucs4utf8_length(const uint32_t *, size_t, size_t *); int wind_ucs2utf8(const uint16_t *, size_t, char *, size_t *); int wind_ucs2utf8_length(const uint16_t *, size_t, size_t *); + +int wind_ucs2read(const void *, size_t, unsigned int *, uint16_t *, size_t *); +int wind_ucs2write(const uint16_t *, size_t, unsigned int *, void *, size_t *); + #endif /* _WIND_H_ */ diff --git a/lib/wind/wind_err.et b/lib/wind/wind_err.et index c70ca1ec1..508746570 100644 --- a/lib/wind/wind_err.et +++ b/lib/wind/wind_err.et @@ -12,9 +12,11 @@ error_code NONE, "No error" error_code NO_PROFILE, "No such profile" error_code OVERRUN, "Buffer overrun" error_code UNDERUN, "Buffer underrun" -error_code LENGTH_NOT_EVEN, "Lenght not even" +error_code LENGTH_NOT_MOD2, "Lenght not mod2" +error_code LENGTH_NOT_MOD4, "Lenght not mod4" error_code INVALID_UTF8, "Invalid UTF-8 combination in string" error_code INVALID_UTF16, "Invalid UTF-16 combination in string" error_code INVALID_UTF32, "Invalid UTF-32 combination in string" +error_code NO_BOM, "No byte order mark (BOM) in string" end diff --git a/lib/wind/windlocl.h b/lib/wind/windlocl.h index 8c0d1af1d..7add4eb5a 100644 --- a/lib/wind/windlocl.h +++ b/lib/wind/windlocl.h @@ -55,9 +55,6 @@ int _wind_stringprep_map(const uint32_t *, size_t, uint32_t *, size_t *, wind_profile_flags); -int _wind_stringprep_normalize(const uint32_t *in, size_t in_len, - uint32_t *out, size_t *out_len); - -ssize_t _wind_ucs2read(void *, size_t, uint16_t *); +int _wind_stringprep_normalize(const uint32_t *, size_t, uint32_t *, size_t *); #endif /* _WINDLOCL_H_ */