git-svn-id: svn://svn.h5l.se/heimdal/trunk/heimdal@23815 ec53bebd-3082-4978-b11e-865c3cabbd6b
		
			
				
	
	
		
			302 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			302 lines
		
	
	
		
			7.0 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/*
 | 
						|
 * Copyright (c) 2004 Kungliga Tekniska Högskolan
 | 
						|
 * (Royal Institute of Technology, Stockholm, Sweden).
 | 
						|
 * All rights reserved.
 | 
						|
 *
 | 
						|
 * Redistribution and use in source and binary forms, with or without
 | 
						|
 * modification, are permitted provided that the following conditions
 | 
						|
 * are met:
 | 
						|
 *
 | 
						|
 * 1. Redistributions of source code must retain the above copyright
 | 
						|
 *    notice, this list of conditions and the following disclaimer.
 | 
						|
 *
 | 
						|
 * 2. Redistributions in binary form must reproduce the above copyright
 | 
						|
 *    notice, this list of conditions and the following disclaimer in the
 | 
						|
 *    documentation and/or other materials provided with the distribution.
 | 
						|
 *
 | 
						|
 * 3. Neither the name of the Institute nor the names of its contributors
 | 
						|
 *    may be used to endorse or promote products derived from this software
 | 
						|
 *    without specific prior written permission.
 | 
						|
 *
 | 
						|
 * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
 | 
						|
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 | 
						|
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 | 
						|
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
 | 
						|
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 | 
						|
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 | 
						|
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 | 
						|
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 | 
						|
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 | 
						|
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 | 
						|
 * SUCH DAMAGE.
 | 
						|
 */
 | 
						|
 | 
						|
#ifdef HAVE_CONFIG_H
 | 
						|
#include <config.h>
 | 
						|
#endif
 | 
						|
#include "windlocl.h"
 | 
						|
 | 
						|
#include <assert.h>
 | 
						|
#include <stdlib.h>
 | 
						|
#include <errno.h>
 | 
						|
 | 
						|
#include "normalize_table.h"
 | 
						|
 | 
						|
RCSID("$Id$");
 | 
						|
 | 
						|
static int
 | 
						|
translation_cmp(const void *key, const void *data)
 | 
						|
{
 | 
						|
    const struct translation *t1 = (const struct translation *)key;
 | 
						|
    const struct translation *t2 = (const struct translation *)data;
 | 
						|
 | 
						|
    return t1->key - t2->key;
 | 
						|
}
 | 
						|
 | 
						|
enum { s_base  = 0xAC00};
 | 
						|
enum { s_count = 11172};
 | 
						|
enum { l_base  = 0x1100};
 | 
						|
enum { l_count = 19};
 | 
						|
enum { v_base  = 0x1161};
 | 
						|
enum { v_count = 21};
 | 
						|
enum { t_base  = 0x11A7};
 | 
						|
enum { t_count = 28};
 | 
						|
enum { n_count = v_count * t_count};
 | 
						|
 | 
						|
static int
 | 
						|
hangul_decomp(const uint32_t *in, size_t in_len,
 | 
						|
	      uint32_t *out, size_t *out_len)
 | 
						|
{
 | 
						|
    uint32_t u = *in;
 | 
						|
    unsigned s_index;
 | 
						|
    unsigned l, v, t;
 | 
						|
    unsigned o;
 | 
						|
 | 
						|
    if (u < s_base || u >= s_base + s_count)
 | 
						|
	return 0;
 | 
						|
    s_index = u - s_base;
 | 
						|
    l = l_base + s_index / n_count;
 | 
						|
    v = v_base + (s_index % n_count) / t_count;
 | 
						|
    t = t_base + s_index % t_count;
 | 
						|
    o = 2;
 | 
						|
    if (t != t_base)
 | 
						|
	++o;
 | 
						|
    if (*out_len < o)
 | 
						|
	return WIND_ERR_OVERRUN;
 | 
						|
    out[0] = l;
 | 
						|
    out[1] = v;
 | 
						|
    if (t != t_base)
 | 
						|
	out[2] = t;
 | 
						|
    *out_len = o;
 | 
						|
    return 1;
 | 
						|
}
 | 
						|
 | 
						|
static uint32_t
 | 
						|
hangul_composition(const uint32_t *in, size_t in_len)
 | 
						|
{
 | 
						|
    if (in_len < 2)
 | 
						|
	return 0;
 | 
						|
    if (in[0] >= l_base && in[0] < l_base + l_count) {
 | 
						|
	unsigned l_index = in[0] - l_base;
 | 
						|
	unsigned v_index;
 | 
						|
 | 
						|
	if (in[1] < v_base || in[1] >= v_base + v_count)
 | 
						|
	    return 0;
 | 
						|
	v_index = in[1] - v_base;
 | 
						|
	return (l_index * v_count + v_index) * t_count + s_base;
 | 
						|
    } else if (in[0] >= s_base && in[0] < s_base + s_count) {
 | 
						|
	unsigned s_index = in[0] - s_base;
 | 
						|
	unsigned t_index;
 | 
						|
 | 
						|
	if (s_index % t_count != 0)
 | 
						|
	    return 0;
 | 
						|
	if (in[1] < t_base || in[1] >= t_base + t_count)
 | 
						|
	    return 0;
 | 
						|
	t_index = in[1] - t_base;
 | 
						|
	return in[0] + t_index;
 | 
						|
    }
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
compat_decomp(const uint32_t *in, size_t in_len,
 | 
						|
	      uint32_t *out, size_t *out_len)
 | 
						|
{
 | 
						|
    unsigned i;
 | 
						|
    unsigned o = 0;
 | 
						|
 | 
						|
    for (i = 0; i < in_len; ++i) {
 | 
						|
	struct translation ts = {in[i]};
 | 
						|
	size_t sub_len = *out_len - o;
 | 
						|
	int ret;
 | 
						|
	
 | 
						|
	ret = hangul_decomp(in + i, in_len - i,
 | 
						|
			    out + o, &sub_len);
 | 
						|
	if (ret) {
 | 
						|
	    if (ret == WIND_ERR_OVERRUN)
 | 
						|
		return ret;
 | 
						|
	    o += sub_len;
 | 
						|
	} else {
 | 
						|
	    void *s = bsearch(&ts,
 | 
						|
			      _wind_normalize_table,
 | 
						|
			      _wind_normalize_table_size,
 | 
						|
			      sizeof(_wind_normalize_table[0]),
 | 
						|
			      translation_cmp);
 | 
						|
	    if (s != NULL) {
 | 
						|
		const struct translation *t = (const struct translation *)s;
 | 
						|
 | 
						|
		ret = compat_decomp(_wind_normalize_val_table + t->val_offset,
 | 
						|
				    t->val_len,
 | 
						|
				    out + o, &sub_len);
 | 
						|
		if (ret)
 | 
						|
		    return ret;
 | 
						|
		o += sub_len;
 | 
						|
	    } else {
 | 
						|
		if (o >= *out_len)
 | 
						|
		    return WIND_ERR_OVERRUN;
 | 
						|
		out[o++] = in[i];
 | 
						|
 | 
						|
	    }
 | 
						|
	}
 | 
						|
    }
 | 
						|
    *out_len = o;
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
cc_cmp(const void *a, const void *b)
 | 
						|
{
 | 
						|
    const uint32_t *ua = (const uint32_t *)a;
 | 
						|
    const uint32_t *ub = (const uint32_t *)b;
 | 
						|
 | 
						|
    return _wind_combining_class(*ua) - _wind_combining_class(*ub);
 | 
						|
}
 | 
						|
 | 
						|
static void
 | 
						|
canonical_reorder(uint32_t *tmp, size_t tmp_len)
 | 
						|
{
 | 
						|
    unsigned i;
 | 
						|
 | 
						|
    for (i = 0; i < tmp_len; ++i) {
 | 
						|
	int cc = _wind_combining_class(tmp[i]);
 | 
						|
	if (cc) {
 | 
						|
	    size_t j;
 | 
						|
	    for (j = i + 1;
 | 
						|
		 j < tmp_len && _wind_combining_class(tmp[j]);
 | 
						|
		 ++j)
 | 
						|
		;
 | 
						|
	    qsort(&tmp[i], j - i, sizeof(unsigned),
 | 
						|
		  cc_cmp);
 | 
						|
	    i = j;
 | 
						|
	}
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
static uint32_t
 | 
						|
find_composition(const uint32_t *in, unsigned in_len)
 | 
						|
{
 | 
						|
    unsigned short canon_index = 0;
 | 
						|
    uint32_t cur;
 | 
						|
    unsigned n = 0;
 | 
						|
 | 
						|
    cur = hangul_composition(in, in_len);
 | 
						|
    if (cur)
 | 
						|
	return cur;
 | 
						|
 | 
						|
    do {
 | 
						|
	const struct canon_node *c = &_wind_canon_table[canon_index];
 | 
						|
	unsigned i;
 | 
						|
 | 
						|
	if (n % 5 == 0) {
 | 
						|
	    cur = *in++;
 | 
						|
	    if (in_len-- == 0)
 | 
						|
		return c->val;
 | 
						|
	}
 | 
						|
 | 
						|
	i = cur >> 16;
 | 
						|
	if (i < c->next_start || i >= c->next_end)
 | 
						|
	    canon_index = 0;
 | 
						|
	else
 | 
						|
	    canon_index =
 | 
						|
		_wind_canon_next_table[c->next_offset + i - c->next_start];
 | 
						|
	if (canon_index != 0) {
 | 
						|
	    cur = (cur << 4) & 0xFFFFF;
 | 
						|
	    ++n;
 | 
						|
	}
 | 
						|
    } while (canon_index != 0);
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
static int
 | 
						|
combine(const uint32_t *in, size_t in_len,
 | 
						|
	uint32_t *out, size_t *out_len)
 | 
						|
{
 | 
						|
    unsigned i;
 | 
						|
    int ostarter;
 | 
						|
    unsigned o = 0;
 | 
						|
    int old_cc;
 | 
						|
    int cc;
 | 
						|
 | 
						|
    for (i = 0; i < in_len;) {
 | 
						|
	while (i < in_len && (cc = _wind_combining_class(in[i])) != 0) {
 | 
						|
	    out[o++] = in[i++];
 | 
						|
	}
 | 
						|
	if (i < in_len) {
 | 
						|
	    if (o >= *out_len)
 | 
						|
		return WIND_ERR_OVERRUN;
 | 
						|
	    ostarter = o;
 | 
						|
	    out[o++] = in[i++];
 | 
						|
	    old_cc   = -1;
 | 
						|
 | 
						|
	    while (i < in_len) {
 | 
						|
		uint32_t comb;
 | 
						|
		uint32_t v[2];
 | 
						|
 | 
						|
		v[0] = out[ostarter];
 | 
						|
		v[1] = in[i];
 | 
						|
 | 
						|
		cc = _wind_combining_class(in[i]);
 | 
						|
		if (old_cc != cc && (comb = find_composition(v, 2))) {
 | 
						|
		    out[ostarter] = comb;
 | 
						|
		} else if (cc == 0) {
 | 
						|
		    break;
 | 
						|
		} else {
 | 
						|
		    if (o >= *out_len)
 | 
						|
			return WIND_ERR_OVERRUN;
 | 
						|
		    out[o++] = in[i];
 | 
						|
		    old_cc   = cc;
 | 
						|
		}
 | 
						|
		++i;
 | 
						|
	    }
 | 
						|
	}
 | 
						|
    }
 | 
						|
    *out_len = o;
 | 
						|
    return 0;
 | 
						|
}
 | 
						|
 | 
						|
int
 | 
						|
_wind_stringprep_normalize(const uint32_t *in, size_t in_len,
 | 
						|
			   uint32_t *out, size_t *out_len)
 | 
						|
{
 | 
						|
    size_t tmp_len;
 | 
						|
    uint32_t *tmp;
 | 
						|
    int ret;
 | 
						|
 | 
						|
    tmp_len = in_len * 4;
 | 
						|
    if (tmp_len < MAX_LENGTH_CANON)
 | 
						|
	tmp_len = MAX_LENGTH_CANON;
 | 
						|
    tmp = malloc(tmp_len * sizeof(uint32_t));
 | 
						|
    if (tmp == NULL)
 | 
						|
	return ENOMEM;
 | 
						|
 | 
						|
    ret = compat_decomp(in, in_len, tmp, &tmp_len);
 | 
						|
    if (ret) {
 | 
						|
	free(tmp);
 | 
						|
	return ret;
 | 
						|
    }
 | 
						|
    canonical_reorder(tmp, tmp_len);
 | 
						|
    ret = combine(tmp, tmp_len, out, out_len);
 | 
						|
    free(tmp);
 | 
						|
    return ret;
 | 
						|
}
 |