libwind from Assar. stringprep library supporting nameprep, saslprep and ldapprep
git-svn-id: svn://svn.h5l.se/heimdal/trunk/heimdal@22551 ec53bebd-3082-4978-b11e-865c3cabbd6b
This commit is contained in:
33
lib/wind/ChangeLog
Normal file
33
lib/wind/ChangeLog
Normal file
@@ -0,0 +1,33 @@
|
||||
2007-01-17 Love H<>rnquist <20>strand <lha@it.su.se>
|
||||
|
||||
* test-utf8.c: spelling
|
||||
|
||||
* utf8.c: Add wind_ucs4toutf8
|
||||
|
||||
2006-10-08 Love H<>rnquist <20>strand <lha@it.su.se>
|
||||
|
||||
* test-utf8.c: Add test for wind_ucs4toutf8.
|
||||
|
||||
* wind.h (wind_ucs4toutf8): New function.
|
||||
|
||||
* utf8.c (wind_utf8toucs4): allow calculation of length by passing
|
||||
in NULL as out buffer.
|
||||
(wind_ucs4toutf8): new function.
|
||||
|
||||
2004-12-20 Assar Westerlund <assar@kth.se>
|
||||
|
||||
* util.py:
|
||||
* rfc3454.py:
|
||||
* generate.py:
|
||||
* gen-punycode-examples.py:
|
||||
* gen-normalize.py:
|
||||
* gen-map.py:
|
||||
* gen-errorlist.py:
|
||||
* gen-combining.py:
|
||||
* gen-bidi.py:
|
||||
* UnicodeData.py:
|
||||
Add encoding.
|
||||
|
||||
2004-12-20 Assar Westerlund <assar@kth.se>
|
||||
|
||||
* First version of stringprep library.
|
176
lib/wind/CompositionExclusions-3.2.0.txt
Normal file
176
lib/wind/CompositionExclusions-3.2.0.txt
Normal file
@@ -0,0 +1,176 @@
|
||||
# CompositionExclusions-3.2.0.txt
|
||||
# Date: 2002-03-19,23:30:28 GMT [MD]
|
||||
#
|
||||
# This file lists the characters from the UAX #15 Composition Exclusion Table.
|
||||
#
|
||||
# The format of the comments in this file has been updated since the last version,
|
||||
# CompositionExclusions-3.txt. The only substantive change to this file between that
|
||||
# version and this one is the addition of U+2ADC FORKING.
|
||||
#
|
||||
# For more information, see
|
||||
# http://www.unicode.org/unicode/reports/tr15/#Primary Exclusion List Table
|
||||
# ================================================
|
||||
|
||||
# (1) Script Specifics
|
||||
# This list of characters cannot be derived from the UnicodeData file.
|
||||
# ================================================
|
||||
|
||||
0958 # DEVANAGARI LETTER QA
|
||||
0959 # DEVANAGARI LETTER KHHA
|
||||
095A # DEVANAGARI LETTER GHHA
|
||||
095B # DEVANAGARI LETTER ZA
|
||||
095C # DEVANAGARI LETTER DDDHA
|
||||
095D # DEVANAGARI LETTER RHA
|
||||
095E # DEVANAGARI LETTER FA
|
||||
095F # DEVANAGARI LETTER YYA
|
||||
09DC # BENGALI LETTER RRA
|
||||
09DD # BENGALI LETTER RHA
|
||||
09DF # BENGALI LETTER YYA
|
||||
0A33 # GURMUKHI LETTER LLA
|
||||
0A36 # GURMUKHI LETTER SHA
|
||||
0A59 # GURMUKHI LETTER KHHA
|
||||
0A5A # GURMUKHI LETTER GHHA
|
||||
0A5B # GURMUKHI LETTER ZA
|
||||
0A5E # GURMUKHI LETTER FA
|
||||
0B5C # ORIYA LETTER RRA
|
||||
0B5D # ORIYA LETTER RHA
|
||||
0F43 # TIBETAN LETTER GHA
|
||||
0F4D # TIBETAN LETTER DDHA
|
||||
0F52 # TIBETAN LETTER DHA
|
||||
0F57 # TIBETAN LETTER BHA
|
||||
0F5C # TIBETAN LETTER DZHA
|
||||
0F69 # TIBETAN LETTER KSSA
|
||||
0F76 # TIBETAN VOWEL SIGN VOCALIC R
|
||||
0F78 # TIBETAN VOWEL SIGN VOCALIC L
|
||||
0F93 # TIBETAN SUBJOINED LETTER GHA
|
||||
0F9D # TIBETAN SUBJOINED LETTER DDHA
|
||||
0FA2 # TIBETAN SUBJOINED LETTER DHA
|
||||
0FA7 # TIBETAN SUBJOINED LETTER BHA
|
||||
0FAC # TIBETAN SUBJOINED LETTER DZHA
|
||||
0FB9 # TIBETAN SUBJOINED LETTER KSSA
|
||||
FB1D # HEBREW LETTER YOD WITH HIRIQ
|
||||
FB1F # HEBREW LIGATURE YIDDISH YOD YOD PATAH
|
||||
FB2A # HEBREW LETTER SHIN WITH SHIN DOT
|
||||
FB2B # HEBREW LETTER SHIN WITH SIN DOT
|
||||
FB2C # HEBREW LETTER SHIN WITH DAGESH AND SHIN DOT
|
||||
FB2D # HEBREW LETTER SHIN WITH DAGESH AND SIN DOT
|
||||
FB2E # HEBREW LETTER ALEF WITH PATAH
|
||||
FB2F # HEBREW LETTER ALEF WITH QAMATS
|
||||
FB30 # HEBREW LETTER ALEF WITH MAPIQ
|
||||
FB31 # HEBREW LETTER BET WITH DAGESH
|
||||
FB32 # HEBREW LETTER GIMEL WITH DAGESH
|
||||
FB33 # HEBREW LETTER DALET WITH DAGESH
|
||||
FB34 # HEBREW LETTER HE WITH MAPIQ
|
||||
FB35 # HEBREW LETTER VAV WITH DAGESH
|
||||
FB36 # HEBREW LETTER ZAYIN WITH DAGESH
|
||||
FB38 # HEBREW LETTER TET WITH DAGESH
|
||||
FB39 # HEBREW LETTER YOD WITH DAGESH
|
||||
FB3A # HEBREW LETTER FINAL KAF WITH DAGESH
|
||||
FB3B # HEBREW LETTER KAF WITH DAGESH
|
||||
FB3C # HEBREW LETTER LAMED WITH DAGESH
|
||||
FB3E # HEBREW LETTER MEM WITH DAGESH
|
||||
FB40 # HEBREW LETTER NUN WITH DAGESH
|
||||
FB41 # HEBREW LETTER SAMEKH WITH DAGESH
|
||||
FB43 # HEBREW LETTER FINAL PE WITH DAGESH
|
||||
FB44 # HEBREW LETTER PE WITH DAGESH
|
||||
FB46 # HEBREW LETTER TSADI WITH DAGESH
|
||||
FB47 # HEBREW LETTER QOF WITH DAGESH
|
||||
FB48 # HEBREW LETTER RESH WITH DAGESH
|
||||
FB49 # HEBREW LETTER SHIN WITH DAGESH
|
||||
FB4A # HEBREW LETTER TAV WITH DAGESH
|
||||
FB4B # HEBREW LETTER VAV WITH HOLAM
|
||||
FB4C # HEBREW LETTER BET WITH RAFE
|
||||
FB4D # HEBREW LETTER KAF WITH RAFE
|
||||
FB4E # HEBREW LETTER PE WITH RAFE
|
||||
|
||||
# Total code points: 67
|
||||
|
||||
# ================================================
|
||||
# (2) Post Composition Version precomposed characters
|
||||
# These characters cannot be derived solely from the UnicodeData.txt file
|
||||
# in this version of Unicode.
|
||||
# ================================================
|
||||
|
||||
2ADC # FORKING
|
||||
1D15E # MUSICAL SYMBOL HALF NOTE
|
||||
1D15F # MUSICAL SYMBOL QUARTER NOTE
|
||||
1D160 # MUSICAL SYMBOL EIGHTH NOTE
|
||||
1D161 # MUSICAL SYMBOL SIXTEENTH NOTE
|
||||
1D162 # MUSICAL SYMBOL THIRTY-SECOND NOTE
|
||||
1D163 # MUSICAL SYMBOL SIXTY-FOURTH NOTE
|
||||
1D164 # MUSICAL SYMBOL ONE HUNDRED TWENTY-EIGHTH NOTE
|
||||
1D1BB # MUSICAL SYMBOL MINIMA
|
||||
1D1BC # MUSICAL SYMBOL MINIMA BLACK
|
||||
1D1BD # MUSICAL SYMBOL SEMIMINIMA WHITE
|
||||
1D1BE # MUSICAL SYMBOL SEMIMINIMA BLACK
|
||||
1D1BF # MUSICAL SYMBOL FUSA WHITE
|
||||
1D1C0 # MUSICAL SYMBOL FUSA BLACK
|
||||
|
||||
# Total code points: 14
|
||||
|
||||
# ================================================
|
||||
# (3) Singleton Decompositions
|
||||
# These characters can be derived from the UnicodeData file
|
||||
# by including all characters whose canonical decomposition
|
||||
# consists of a single character.
|
||||
# These characters are simply quoted here for reference.
|
||||
# ================================================
|
||||
|
||||
# 0340..0341 [2] COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
|
||||
# 0343 COMBINING GREEK KORONIS
|
||||
# 0374 GREEK NUMERAL SIGN
|
||||
# 037E GREEK QUESTION MARK
|
||||
# 0387 GREEK ANO TELEIA
|
||||
# 1F71 GREEK SMALL LETTER ALPHA WITH OXIA
|
||||
# 1F73 GREEK SMALL LETTER EPSILON WITH OXIA
|
||||
# 1F75 GREEK SMALL LETTER ETA WITH OXIA
|
||||
# 1F77 GREEK SMALL LETTER IOTA WITH OXIA
|
||||
# 1F79 GREEK SMALL LETTER OMICRON WITH OXIA
|
||||
# 1F7B GREEK SMALL LETTER UPSILON WITH OXIA
|
||||
# 1F7D GREEK SMALL LETTER OMEGA WITH OXIA
|
||||
# 1FBB GREEK CAPITAL LETTER ALPHA WITH OXIA
|
||||
# 1FBE GREEK PROSGEGRAMMENI
|
||||
# 1FC9 GREEK CAPITAL LETTER EPSILON WITH OXIA
|
||||
# 1FCB GREEK CAPITAL LETTER ETA WITH OXIA
|
||||
# 1FD3 GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
|
||||
# 1FDB GREEK CAPITAL LETTER IOTA WITH OXIA
|
||||
# 1FE3 GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
|
||||
# 1FEB GREEK CAPITAL LETTER UPSILON WITH OXIA
|
||||
# 1FEE..1FEF [2] GREEK DIALYTIKA AND OXIA..GREEK VARIA
|
||||
# 1FF9 GREEK CAPITAL LETTER OMICRON WITH OXIA
|
||||
# 1FFB GREEK CAPITAL LETTER OMEGA WITH OXIA
|
||||
# 1FFD GREEK OXIA
|
||||
# 2000..2001 [2] EN QUAD..EM QUAD
|
||||
# 2126 OHM SIGN
|
||||
# 212A..212B [2] KELVIN SIGN..ANGSTROM SIGN
|
||||
# 2329 LEFT-POINTING ANGLE BRACKET
|
||||
# 232A RIGHT-POINTING ANGLE BRACKET
|
||||
# F900..FA0D [270] CJK COMPATIBILITY IDEOGRAPH-F900..CJK COMPATIBILITY IDEOGRAPH-FA0D
|
||||
# FA10 CJK COMPATIBILITY IDEOGRAPH-FA10
|
||||
# FA12 CJK COMPATIBILITY IDEOGRAPH-FA12
|
||||
# FA15..FA1E [10] CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E
|
||||
# FA20 CJK COMPATIBILITY IDEOGRAPH-FA20
|
||||
# FA22 CJK COMPATIBILITY IDEOGRAPH-FA22
|
||||
# FA25..FA26 [2] CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
|
||||
# FA2A..FA2D [4] CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D
|
||||
# FA30..FA6A [59] CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6A
|
||||
# 2F800..2FA1D [542] CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
|
||||
|
||||
# Total code points: 924
|
||||
|
||||
# ================================================
|
||||
# (4) Non-Starter Decompositions
|
||||
# These characters can be derived from the UnicodeData file
|
||||
# by including all characters whose canonical decomposition consists
|
||||
# of a sequence of characters, the first of which has a non-zero
|
||||
# combining class.
|
||||
# These characters are simply quoted here for reference.
|
||||
# ================================================
|
||||
|
||||
# 0344 COMBINING GREEK DIALYTIKA TONOS
|
||||
# 0F73 TIBETAN VOWEL SIGN II
|
||||
# 0F75 TIBETAN VOWEL SIGN UU
|
||||
# 0F81 TIBETAN VOWEL SIGN REVERSED II
|
||||
|
||||
# Total code points: 4
|
||||
|
2574
lib/wind/DerivedNormalizationProps.txt
Normal file
2574
lib/wind/DerivedNormalizationProps.txt
Normal file
File diff suppressed because it is too large
Load Diff
118
lib/wind/Makefile.am
Normal file
118
lib/wind/Makefile.am
Normal file
@@ -0,0 +1,118 @@
|
||||
# $Id: Makefile.am,v 1.1 2004/12/20 08:31:45 assar Exp $
|
||||
|
||||
include $(top_srcdir)/Makefile.am.common
|
||||
|
||||
lib_LTLIBRARIES = libwind.la
|
||||
|
||||
libwind_la_SOURCES = \
|
||||
bidi.c \
|
||||
bidi_table.c \
|
||||
combining.c \
|
||||
combining_table.c \
|
||||
doxygen.c \
|
||||
errorlist.c \
|
||||
errorlist_table.c \
|
||||
map_table.c \
|
||||
map.c \
|
||||
normalize.c \
|
||||
normalize_table.c \
|
||||
punycode.c \
|
||||
stringprep.c \
|
||||
wind_err.c \
|
||||
utf8.c
|
||||
|
||||
$(libwind_la_OBJECTS): wind_err.h
|
||||
|
||||
libwind_la_LDFLAGS = -version-info 0:0:0
|
||||
|
||||
if versionscript
|
||||
libwind_la_LDFLAGS += $(LDFLAGS_VERSION_SCRIPT)$(srcdir)/version-script.map
|
||||
endif
|
||||
|
||||
|
||||
BUILT_SOURCES = \
|
||||
bidi_table.c \
|
||||
bidi_table.h \
|
||||
combining_table.c \
|
||||
combining_table.h \
|
||||
errorlist_table.c \
|
||||
errorlist_table.h \
|
||||
map_table.c \
|
||||
map_table.h \
|
||||
normalize_table.c \
|
||||
normalize_table.h \
|
||||
punycode_examples.c \
|
||||
punycode_examples.h \
|
||||
wind_err.c \
|
||||
wind_err.h
|
||||
|
||||
include_HEADERS = wind.h
|
||||
|
||||
nodist_include_HEADERS = wind_err.h
|
||||
|
||||
TESTS = \
|
||||
$(check_PROGRAMS)
|
||||
|
||||
check_PROGRAMS = \
|
||||
test-bidi \
|
||||
test-map \
|
||||
test-normalize \
|
||||
test-prohibited \
|
||||
test-punycode \
|
||||
test-utf8
|
||||
|
||||
test_punycode_SOURCES = \
|
||||
test-punycode.c \
|
||||
punycode_examples.c
|
||||
|
||||
bin_PROGRAMS = idn-lookup
|
||||
|
||||
idn_lookup_SOURCES = idn-lookup.c
|
||||
|
||||
idn_lookup_LDADD = libwind.la $(LIB_roken)
|
||||
|
||||
LDADD = libwind.la
|
||||
|
||||
PYTHON = python
|
||||
|
||||
map_table.h map_table.c: rfc3454.txt gen-map.py stringprep.py
|
||||
$(PYTHON) $(srcdir)/gen-map.py $(srcdir)/rfc3454.txt
|
||||
|
||||
errorlist_table.h errorlist_table.c: rfc3454.txt gen-errorlist.py stringprep.py
|
||||
$(PYTHON) $(srcdir)/gen-errorlist.py $(srcdir)/rfc3454.txt
|
||||
|
||||
normalize_table.h normalize_table.c: UnicodeData.txt CompositionExclusions-3.2.0.txt gen-normalize.py
|
||||
$(PYTHON) $(srcdir)/gen-normalize.py $(srcdir)/UnicodeData.txt $(srcdir)/CompositionExclusions-3.2.0.txt
|
||||
|
||||
combining_table.h combining_table.c: UnicodeData.txt gen-combining.py
|
||||
$(PYTHON) $(srcdir)/gen-combining.py $(srcdir)/UnicodeData.txt
|
||||
|
||||
bidi_table.h bidi_table.c: rfc3454.txt gen-bidi.py
|
||||
$(PYTHON) $(srcdir)/gen-bidi.py $(srcdir)/rfc3454.txt
|
||||
|
||||
punycode_examples.h punycode_examples.c: gen-punycode-examples.py rfc3492.txt
|
||||
$(PYTHON) $(srcdir)/gen-punycode-examples.py $(srcdir)/rfc3492.txt
|
||||
|
||||
EXTRA_DIST = \
|
||||
CompositionExclusions-3.2.0.txt \
|
||||
UnicodeData.txt \
|
||||
gen-bidi.py \
|
||||
gen-errorlist.py \
|
||||
gen-map.py \
|
||||
gen-normalize.py \
|
||||
gen-punycode-examples.py \
|
||||
rfc3454.py \
|
||||
rfc3454.txt \
|
||||
rfc3490.txt \
|
||||
rfc3491.txt \
|
||||
rfc3492.txt \
|
||||
rfc4013.txt \
|
||||
rfc4518.py \
|
||||
rfc4518.txt \
|
||||
stringprep.py \
|
||||
warn_err.et
|
||||
|
||||
CLEANFILES = \
|
||||
wind_err.c wind_err.h
|
||||
|
||||
wind_err.h: wind_err.et
|
1096
lib/wind/Makefile.in
Normal file
1096
lib/wind/Makefile.in
Normal file
File diff suppressed because it is too large
Load Diff
43
lib/wind/NormalizationCorrections.txt
Normal file
43
lib/wind/NormalizationCorrections.txt
Normal file
@@ -0,0 +1,43 @@
|
||||
# NormalizationCorrections-4.0.0.txt
|
||||
#
|
||||
# This file is a normative contributory data file in the
|
||||
# Unicode Character Database.
|
||||
#
|
||||
# The normalization stabilization policy of the Unicode
|
||||
# Consortium ordinarily precludes any change to the decomposition
|
||||
# for any character, once established in a relevant version
|
||||
# of the UnicodeData.txt data file. However, under certain
|
||||
# exceptional (and rare) conditions, an error in a decomposition
|
||||
# mapping may be discovered that is truly just an unintended
|
||||
# typo in the data, and not a matter of dubious interpretation.
|
||||
#
|
||||
# Whenever such an error may be found, and if it meets the
|
||||
# requirements for possible exceptions to normalization
|
||||
# stability, the correction is entered in this data file,
|
||||
# so that any implementation depending on absolute stability
|
||||
# of normalization, *including* any errors in the data, can
|
||||
# safely reconstruct the exact state of the data tables at
|
||||
# any given version of Unicode.
|
||||
#
|
||||
# Currently this list has exactly six entries in it, one for the
|
||||
# typo found and corrected in Corrigendum #3, and five for
|
||||
# the typos and misidentifications found and corrected in
|
||||
# Corrigendum #4. All efforts
|
||||
# will be made to keep the entries limited to just those fixes.
|
||||
#
|
||||
# Interpretation of the fields:
|
||||
# Field 1: Unicode code point
|
||||
# Field 2: Original (erroneous) decomposition
|
||||
# Field 3: Corrected decomposition
|
||||
# Field 4: Version of Unicode for which the correction was
|
||||
# entered into UnicodeData.txt, in n.n.n format.
|
||||
# Comment: Indicates the Unicode Corrigendum which documents
|
||||
# the correction
|
||||
#
|
||||
#
|
||||
F951;96FB;964B;3.2.0 # Corrigendum 3
|
||||
2F868;2136A;36FC;4.0.0 # Corrigendum 4
|
||||
2F874;5F33;5F53;4.0.0 # Corrigendum 4
|
||||
2F91F;43AB;243AB;4.0.0 # Corrigendum 4
|
||||
2F95F;7AAE;7AEE;4.0.0 # Corrigendum 4
|
||||
2F9BF;4D57;45D7;4.0.0 # Corrigendum 4
|
17166
lib/wind/NormalizationTest.txt
Normal file
17166
lib/wind/NormalizationTest.txt
Normal file
File diff suppressed because it is too large
Load Diff
57
lib/wind/UnicodeData.py
Normal file
57
lib/wind/UnicodeData.py
Normal file
@@ -0,0 +1,57 @@
|
||||
#!/usr/local/bin/python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
|
||||
# $Id$
|
||||
|
||||
# Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
# (Royal Institute of Technology, Stockholm, Sweden).
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the Institute nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
# SUCH DAMAGE.
|
||||
|
||||
import re
|
||||
import string
|
||||
|
||||
def read(filename):
|
||||
"""return a dict of unicode characters"""
|
||||
ud = open(filename, 'r')
|
||||
ret = {}
|
||||
while True:
|
||||
l = ud.readline()
|
||||
if not l:
|
||||
break
|
||||
l = re.sub('#.*$', '', l)
|
||||
if l == "\n":
|
||||
continue
|
||||
f = l.split(';')
|
||||
key = int(f[0], 0x10)
|
||||
if key in ret:
|
||||
raise Exception('Duplicate key in UnicodeData')
|
||||
ret[key] = f[1:]
|
||||
ud.close()
|
||||
return ret
|
15100
lib/wind/UnicodeData.txt
Normal file
15100
lib/wind/UnicodeData.txt
Normal file
File diff suppressed because it is too large
Load Diff
92
lib/wind/bidi.c
Normal file
92
lib/wind/bidi.c
Normal file
@@ -0,0 +1,92 @@
|
||||
/*
|
||||
* Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
* (Royal Institute of Technology, Stockholm, Sweden).
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the Institute nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "windlocl.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "bidi_table.h"
|
||||
|
||||
static int
|
||||
range_entry_cmp(const void *a, const void *b)
|
||||
{
|
||||
const struct range_entry *ea = (const struct range_entry*)a;
|
||||
const struct range_entry *eb = (const struct range_entry*)b;
|
||||
|
||||
if (ea->start >= eb->start && ea->start < eb->start + eb->len)
|
||||
return 0;
|
||||
return ea->start - eb->start;
|
||||
}
|
||||
|
||||
static int
|
||||
is_ral(uint32_t cp)
|
||||
{
|
||||
struct range_entry ee = {cp};
|
||||
void *s = bsearch(&ee, _wind_ral_table, _wind_ral_table_size,
|
||||
sizeof(_wind_ral_table[0]),
|
||||
range_entry_cmp);
|
||||
return s != NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
is_l(uint32_t cp)
|
||||
{
|
||||
struct range_entry ee = {cp};
|
||||
void *s = bsearch(&ee, _wind_l_table, _wind_l_table_size,
|
||||
sizeof(_wind_l_table[0]),
|
||||
range_entry_cmp);
|
||||
return s != NULL;
|
||||
}
|
||||
|
||||
int
|
||||
_wind_stringprep_testbidi(const uint32_t *in, size_t in_len, wind_profile_flags flags)
|
||||
{
|
||||
size_t i;
|
||||
unsigned ral = 0;
|
||||
unsigned l = 0;
|
||||
|
||||
if ((flags & (WIND_PROFILE_NAME|WIND_PROFILE_SASL)) == 0)
|
||||
return 0;
|
||||
|
||||
for (i = 0; i < in_len; ++i) {
|
||||
ral |= is_ral(in[i]);
|
||||
l |= is_l(in[i]);
|
||||
}
|
||||
if (ral) {
|
||||
if (l)
|
||||
return 1;
|
||||
if (!is_ral(in[0]) || !is_ral(in[in_len - 1]))
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
29
lib/wind/combining.c
Normal file
29
lib/wind/combining.c
Normal file
@@ -0,0 +1,29 @@
|
||||
#include "windlocl.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "combining_table.h"
|
||||
|
||||
static int
|
||||
translation_cmp(const void *key, const void *data)
|
||||
{
|
||||
const struct translation *t1 = (const struct translation *)key;
|
||||
const struct translation *t2 = (const struct translation *)data;
|
||||
|
||||
return t1->key - t2->key;
|
||||
}
|
||||
|
||||
int
|
||||
_wind_combining_class(uint32_t code_point)
|
||||
{
|
||||
struct translation ts = {code_point};
|
||||
void *s = bsearch(&ts, _wind_combining_table, _wind_combining_table_size,
|
||||
sizeof(_wind_combining_table[0]),
|
||||
translation_cmp);
|
||||
if (s != NULL) {
|
||||
const struct translation *t = (const struct translation *)s;
|
||||
return t->combining_class;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
48
lib/wind/doxygen.c
Normal file
48
lib/wind/doxygen.c
Normal file
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 2008 Kungliga Tekniska H<>gskolan
|
||||
* (Royal Institute of Technology, Stockholm, Sweden).
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the Institute nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <config.h>
|
||||
RCSID("$Id$");
|
||||
|
||||
/*! @mainpage Heimdal wind library
|
||||
*
|
||||
* @section intro Introduction
|
||||
*
|
||||
* Heimdal wind library is a implementation of stringprep and some of
|
||||
* its profiles.
|
||||
*
|
||||
* The project web page: http://www.h5l.org/
|
||||
*
|
||||
*/
|
||||
|
||||
/** @defgroup wind Heimdal wind library */
|
44
lib/wind/errorlist.c
Normal file
44
lib/wind/errorlist.c
Normal file
@@ -0,0 +1,44 @@
|
||||
#include "windlocl.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "errorlist_table.h"
|
||||
|
||||
static int
|
||||
error_entry_cmp(const void *a, const void *b)
|
||||
{
|
||||
const struct error_entry *ea = (const struct error_entry*)a;
|
||||
const struct error_entry *eb = (const struct error_entry*)b;
|
||||
|
||||
if (ea->start >= eb->start && ea->start < eb->start + eb->len)
|
||||
return 0;
|
||||
return ea->start - eb->start;
|
||||
}
|
||||
|
||||
int
|
||||
_wind_stringprep_error(uint32_t cp, wind_profile_flags flags)
|
||||
{
|
||||
struct error_entry ee = {cp};
|
||||
const struct error_entry *s;
|
||||
|
||||
s = (const struct error_entry *)
|
||||
bsearch(&ee, _wind_errorlist_table,
|
||||
_wind_errorlist_table_size,
|
||||
sizeof(_wind_errorlist_table[0]),
|
||||
error_entry_cmp);
|
||||
if (s == NULL)
|
||||
return 0;
|
||||
return (s->flags & flags);
|
||||
}
|
||||
|
||||
int
|
||||
_wind_stringprep_prohibited(const uint32_t *in, size_t in_len,
|
||||
wind_profile_flags flags)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < in_len; ++i)
|
||||
if (_wind_stringprep_error(in[i], flags))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
102
lib/wind/gen-bidi.py
Executable file
102
lib/wind/gen-bidi.py
Executable file
@@ -0,0 +1,102 @@
|
||||
#!/usr/local/bin/python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
|
||||
# $Id$
|
||||
|
||||
# Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
# (Royal Institute of Technology, Stockholm, Sweden).
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the Institute nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
# SUCH DAMAGE.
|
||||
|
||||
import re
|
||||
import string
|
||||
import sys
|
||||
|
||||
import generate
|
||||
import rfc3454
|
||||
|
||||
if len(sys.argv) != 2:
|
||||
print "usage: %s rfc3454.txt" % sys.argv[0]
|
||||
sys.exit(1)
|
||||
|
||||
tables = rfc3454.read(sys.argv[1])
|
||||
|
||||
bidi_h = generate.Header('bidi_table.h')
|
||||
|
||||
bidi_c = generate.Implementation('bidi_table.c')
|
||||
|
||||
bidi_h.file.write(
|
||||
'''
|
||||
#include <stdint.h>
|
||||
#include <stddef.h>
|
||||
|
||||
struct range_entry {
|
||||
uint32_t start;
|
||||
unsigned len;
|
||||
};
|
||||
|
||||
extern const struct range_entry _wind_ral_table[];
|
||||
extern const struct range_entry _wind_l_table[];
|
||||
|
||||
extern const size_t _wind_ral_table_size;
|
||||
extern const size_t _wind_l_table_size;
|
||||
|
||||
''')
|
||||
|
||||
bidi_c.file.write(
|
||||
'''
|
||||
#include "bidi_table.h"
|
||||
|
||||
''')
|
||||
|
||||
def printTable(file, table, variable):
|
||||
"""print table to file named as variable"""
|
||||
file.write("const struct range_entry %s[] = {\n" % variable)
|
||||
count = 0
|
||||
for l in tables[table]:
|
||||
m = re.search('^ *([0-9A-F]+)-([0-9A-F]+) *$', l)
|
||||
if m:
|
||||
start = int(m.group(1), 0x10)
|
||||
end = int(m.group(2), 0x10)
|
||||
file.write(" {0x%x, 0x%x},\n" % (start, end - start + 1))
|
||||
count += 1
|
||||
else:
|
||||
m = re.search('^ *([0-9A-F]+) *$', l)
|
||||
if m:
|
||||
v = int(m.group(1), 0x10)
|
||||
file.write(" {0x%x, 1},\n" % v)
|
||||
count += 1
|
||||
file.write("};\n\n")
|
||||
file.write("const size_t %s_size = %u;\n\n" % (variable, count))
|
||||
|
||||
printTable(bidi_c.file, 'D.1', '_wind_ral_table')
|
||||
printTable(bidi_c.file, 'D.2', '_wind_l_table')
|
||||
|
||||
bidi_h.close()
|
||||
bidi_c.close()
|
105
lib/wind/gen-combining.py
Executable file
105
lib/wind/gen-combining.py
Executable file
@@ -0,0 +1,105 @@
|
||||
#!/usr/local/bin/python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
|
||||
# $Id$
|
||||
|
||||
# Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
# (Royal Institute of Technology, Stockholm, Sweden).
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the Institute nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
# SUCH DAMAGE.
|
||||
|
||||
import re
|
||||
import string
|
||||
import sys
|
||||
|
||||
import generate
|
||||
import UnicodeData
|
||||
|
||||
if len(sys.argv) != 2:
|
||||
print "usage: %s UnicodeData.txt" % sys.argv[0]
|
||||
sys.exit(1)
|
||||
|
||||
ud = UnicodeData.read(sys.argv[1])
|
||||
|
||||
trans = {}
|
||||
for k,v in ud.items():
|
||||
if int(v[2]) != 0 :
|
||||
trans[k] = [int(v[2]), v[1]]
|
||||
|
||||
# trans = [(x[0], int(x[3]), x[1]) for x in UnicodeData.read() if int(x[3]) != 0]
|
||||
|
||||
combining_h = generate.Header('combining_table.h')
|
||||
combining_c = generate.Implementation('combining_table.c')
|
||||
|
||||
combining_h.file.write(
|
||||
'''
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
struct translation {
|
||||
uint32_t key;
|
||||
unsigned combining_class;
|
||||
};
|
||||
|
||||
extern const struct translation _wind_combining_table[];
|
||||
|
||||
extern const size_t _wind_combining_table_size;
|
||||
''')
|
||||
|
||||
combining_c.file.write(
|
||||
'''
|
||||
#include "combining_table.h"
|
||||
|
||||
const struct translation _wind_combining_table[] = {
|
||||
''')
|
||||
|
||||
s = trans.keys()
|
||||
s.sort()
|
||||
for k in s:
|
||||
v = trans[k]
|
||||
combining_c.file.write("{0x%x, %u}, /* %s */\n"
|
||||
% (k, v[0], v[1]))
|
||||
|
||||
|
||||
#trans.sort()
|
||||
#for x in trans:
|
||||
# combining_c.file.write("{0x%x, %u}, /* %s */\n"
|
||||
# % (x[0], x[1], x[2]))
|
||||
|
||||
combining_c.file.write(
|
||||
'''
|
||||
};
|
||||
''')
|
||||
|
||||
combining_c.file.write(
|
||||
"const size_t _wind_combining_table_size = %u;\n" % len(trans))
|
||||
|
||||
|
||||
combining_h.close()
|
||||
combining_c.close()
|
120
lib/wind/gen-errorlist.py
Executable file
120
lib/wind/gen-errorlist.py
Executable file
@@ -0,0 +1,120 @@
|
||||
#!/usr/local/bin/python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
|
||||
# $Id$
|
||||
|
||||
# Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
# (Royal Institute of Technology, Stockholm, Sweden).
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the Institute nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
# SUCH DAMAGE.
|
||||
|
||||
import re
|
||||
import string
|
||||
import sys
|
||||
|
||||
import generate
|
||||
import rfc3454
|
||||
import rfc4518
|
||||
import stringprep
|
||||
|
||||
if len(sys.argv) != 2:
|
||||
print "usage: %s rfc3454.txt" % sys.argv[0]
|
||||
sys.exit(1)
|
||||
|
||||
tables = rfc3454.read(sys.argv[1])
|
||||
t2 = rfc4518.read()
|
||||
|
||||
for x in t2.iterkeys():
|
||||
tables[x] = t2[x]
|
||||
|
||||
error_list = stringprep.get_errorlist()
|
||||
|
||||
errorlist_h = generate.Header('errorlist_table.h')
|
||||
|
||||
errorlist_c = generate.Implementation('errorlist_table.c')
|
||||
|
||||
errorlist_h.file.write(
|
||||
'''
|
||||
#include "windlocl.h"
|
||||
|
||||
struct error_entry {
|
||||
uint32_t start;
|
||||
unsigned len;
|
||||
wind_profile_flags flags;
|
||||
};
|
||||
|
||||
extern const struct error_entry _wind_errorlist_table[];
|
||||
|
||||
extern const size_t _wind_errorlist_table_size;
|
||||
|
||||
''')
|
||||
|
||||
errorlist_c.file.write(
|
||||
'''
|
||||
#include "errorlist_table.h"
|
||||
|
||||
const struct error_entry _wind_errorlist_table[] = {
|
||||
''')
|
||||
|
||||
trans=[]
|
||||
|
||||
for t in error_list.iterkeys():
|
||||
for l in tables[t]:
|
||||
m = re.search('^ *([0-9A-F]+)-([0-9A-F]+); *(.*) *$', l)
|
||||
if m:
|
||||
start = int(m.group(1), 0x10)
|
||||
end = int(m.group(2), 0x10)
|
||||
desc = m.group(3)
|
||||
trans.append([start, end - start + 1, desc, [t]])
|
||||
else:
|
||||
m = re.search('^ *([0-9A-F]+); *(.*) *$', l)
|
||||
if m:
|
||||
trans.append([int(m.group(1), 0x10), 1, m.group(2), [t]])
|
||||
|
||||
trans = stringprep.sort_merge_trans(trans)
|
||||
|
||||
for x in trans:
|
||||
(start, length, description, tables) = x
|
||||
symbols = stringprep.symbols(error_list, tables)
|
||||
if len(symbols) == 0:
|
||||
print "no symbol for %s" % description
|
||||
sys.exit(1)
|
||||
errorlist_c.file.write(" {0x%x, 0x%x, %s}, /* %s: %s */\n"
|
||||
% (start, length, symbols, ",".join(tables), description))
|
||||
|
||||
errorlist_c.file.write(
|
||||
'''};
|
||||
|
||||
''')
|
||||
|
||||
errorlist_c.file.write(
|
||||
"const size_t _wind_errorlist_table_size = %u;\n" % len(trans))
|
||||
|
||||
errorlist_h.close()
|
||||
errorlist_c.close()
|
158
lib/wind/gen-map.py
Executable file
158
lib/wind/gen-map.py
Executable file
@@ -0,0 +1,158 @@
|
||||
#!/usr/local/bin/python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
|
||||
# $Id$
|
||||
|
||||
# Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
# (Royal Institute of Technology, Stockholm, Sweden).
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the Institute nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
# SUCH DAMAGE.
|
||||
|
||||
import re
|
||||
import string
|
||||
import sys
|
||||
|
||||
import generate
|
||||
import rfc3454
|
||||
import rfc4518
|
||||
import stringprep
|
||||
import util
|
||||
|
||||
if len(sys.argv) != 2:
|
||||
print "usage: %s rfc3454.txt" % sys.argv[0]
|
||||
sys.exit(1)
|
||||
|
||||
tables = rfc3454.read(sys.argv[1])
|
||||
t2 = rfc4518.read()
|
||||
|
||||
for x in t2.iterkeys():
|
||||
tables[x] = t2[x]
|
||||
|
||||
map_list = stringprep.get_maplist()
|
||||
|
||||
map_h = generate.Header('map_table.h')
|
||||
|
||||
map_c = generate.Implementation('map_table.c')
|
||||
|
||||
map_h.file.write(
|
||||
'''
|
||||
#include "windlocl.h"
|
||||
|
||||
struct translation {
|
||||
uint32_t key;
|
||||
unsigned short val_len;
|
||||
unsigned short val_offset;
|
||||
wind_profile_flags flags;
|
||||
};
|
||||
|
||||
extern const struct translation _wind_map_table[];
|
||||
|
||||
extern const size_t _wind_map_table_size;
|
||||
|
||||
extern const uint32_t _wind_map_table_val[];
|
||||
|
||||
''')
|
||||
|
||||
map_c.file.write(
|
||||
'''
|
||||
#include "map_table.h"
|
||||
|
||||
const struct translation _wind_map_table[] = {
|
||||
''')
|
||||
|
||||
trans=[]
|
||||
|
||||
for t in map_list.iterkeys():
|
||||
for l in tables[t]:
|
||||
m = re.search('^ *([0-9A-F]+)-([0-9A-F]+); *([^;]+); *(.*) *$', l)
|
||||
if m:
|
||||
start = int(m.group(1), 0x10)
|
||||
end = int(m.group(2), 0x10)
|
||||
value = m.group(3)
|
||||
desc = m.group(4)
|
||||
for key in xrange(start,end,1):
|
||||
trans.append((key, value, desc, [t]))
|
||||
continue
|
||||
m = re.search('^ *([^;]+); *([^;]+); *(.*) *$', l)
|
||||
if m:
|
||||
key = int(m.group(1), 0x10)
|
||||
value = m.group(2)
|
||||
desc = m.group(3)
|
||||
trans.append((key, value, desc, [t]))
|
||||
continue
|
||||
|
||||
valTable = []
|
||||
offsetTable = {}
|
||||
|
||||
trans = stringprep.sort_merge_trans(trans)
|
||||
|
||||
for x in trans:
|
||||
if x[0] == 0xad:
|
||||
print "fooresult %s" % ",".join(x[3])
|
||||
|
||||
for x in trans:
|
||||
(key, value, description, table) = x
|
||||
v = value.split()
|
||||
i = util.subList(valTable, v)
|
||||
if i:
|
||||
offsetTable[key] = i
|
||||
else:
|
||||
offsetTable[key] = len(valTable)
|
||||
valTable.extend(v)
|
||||
|
||||
for x in trans:
|
||||
(key, value, description, tables) = x
|
||||
symbols = stringprep.symbols(map_list, tables)
|
||||
if len(symbols) == 0:
|
||||
print "no symbol for %s %s (%s)" % (key, description, tables)
|
||||
sys.exit(1)
|
||||
v = value.split()
|
||||
map_c.file.write(" {0x%x, %u, %u, %s}, /* %s: %s */\n"
|
||||
% (key, len(v), offsetTable[key], symbols, ",".join(tables), description))
|
||||
|
||||
map_c.file.write(
|
||||
'''
|
||||
};
|
||||
|
||||
''')
|
||||
|
||||
map_c.file.write(
|
||||
"const size_t _wind_map_table_size = %u;\n\n" % len(trans))
|
||||
|
||||
map_c.file.write(
|
||||
"const uint32_t _wind_map_table_val[] = {\n")
|
||||
|
||||
for x in valTable:
|
||||
map_c.file.write(" 0x%s,\n" % x)
|
||||
|
||||
map_c.file.write(
|
||||
"};\n\n")
|
||||
|
||||
map_h.close()
|
||||
map_c.close()
|
211
lib/wind/gen-normalize.py
Executable file
211
lib/wind/gen-normalize.py
Executable file
@@ -0,0 +1,211 @@
|
||||
#!/usr/local/bin/python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
|
||||
# $Id$
|
||||
|
||||
# Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
# (Royal Institute of Technology, Stockholm, Sweden).
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the Institute nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
# SUCH DAMAGE.
|
||||
|
||||
import re
|
||||
import string
|
||||
import sys
|
||||
|
||||
import generate
|
||||
import UnicodeData
|
||||
import util
|
||||
|
||||
if len(sys.argv) != 3:
|
||||
print "usage: %s UnicodeData.txt"
|
||||
" CompositionExclusions-3.2.0.txt" % sys.argv[0]
|
||||
sys.exit(1)
|
||||
|
||||
ud = UnicodeData.read(sys.argv[1])
|
||||
|
||||
def sortedKeys(d):
|
||||
"""Return a sorted list of the keys of a dict"""
|
||||
keys = d.keys()
|
||||
keys.sort()
|
||||
return keys
|
||||
|
||||
trans = dict([(k, [re.sub('<[a-zA-Z]+>', '', v[4]), v[0]])
|
||||
for k,v in ud.items() if v[4]])
|
||||
|
||||
maxLength = 0
|
||||
for v in trans.values():
|
||||
maxLength = max(maxLength, len(v[0].split()))
|
||||
|
||||
normalize_h = generate.Header('normalize_table.h')
|
||||
normalize_c = generate.Implementation('normalize_table.c')
|
||||
|
||||
normalize_h.file.write(
|
||||
'''
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define MAX_LENGTH %u
|
||||
|
||||
struct translation {
|
||||
uint32_t key;
|
||||
unsigned short val_len;
|
||||
unsigned short val_offset;
|
||||
};
|
||||
|
||||
extern const struct translation _wind_normalize_table[];
|
||||
|
||||
extern const uint32_t _wind_normalize_val_table[];
|
||||
|
||||
extern const size_t _wind_normalize_table_size;
|
||||
|
||||
struct canon_node {
|
||||
uint32_t val;
|
||||
unsigned char next_start;
|
||||
unsigned char next_end;
|
||||
unsigned short next_offset;
|
||||
};
|
||||
|
||||
extern const struct canon_node _wind_canon_table[];
|
||||
|
||||
extern const unsigned short _wind_canon_next_table[];
|
||||
''' % maxLength)
|
||||
|
||||
normalize_c.file.write(
|
||||
'''
|
||||
#include "normalize_table.h"
|
||||
|
||||
const struct translation _wind_normalize_table[] = {
|
||||
''')
|
||||
|
||||
normalizeValTable = []
|
||||
|
||||
for k in sortedKeys(trans) :
|
||||
v = trans[k]
|
||||
(key, value, description) = k, v[0], v[1]
|
||||
vec = [int(x, 0x10) for x in value.split()];
|
||||
offset = util.subList(normalizeValTable, vec)
|
||||
if not offset:
|
||||
offset = len(normalizeValTable)
|
||||
normalizeValTable.extend(vec) # [("0x%s" % i) for i in vec])
|
||||
normalize_c.file.write(" {0x%x, %u, %u}, /* %s */\n"
|
||||
% (key, len(vec), offset, description))
|
||||
|
||||
normalize_c.file.write(
|
||||
'''};
|
||||
|
||||
''')
|
||||
|
||||
normalize_c.file.write(
|
||||
"const size_t _wind_normalize_table_size = %u;\n\n" % len(trans))
|
||||
|
||||
normalize_c.file.write("const uint32_t _wind_normalize_val_table[] = {\n")
|
||||
|
||||
for v in normalizeValTable:
|
||||
normalize_c.file.write(" 0x%x,\n" % v)
|
||||
|
||||
normalize_c.file.write("};\n\n");
|
||||
|
||||
exclusions = UnicodeData.read(sys.argv[2])
|
||||
|
||||
inv = dict([(''.join(["%05x" % int(x, 0x10) for x in v[4].split(' ')]),
|
||||
[k, v[0]])
|
||||
for k,v in ud.items()
|
||||
if v[4] and not re.search('<[a-zA-Z]+> *', v[4]) and not exclusions.has_key(k)])
|
||||
|
||||
table = 0
|
||||
|
||||
tables = {}
|
||||
|
||||
def createTable():
|
||||
"""add a new table"""
|
||||
global table, tables
|
||||
ret = table
|
||||
table += 1
|
||||
tables[ret] = [0] + [None] * 16
|
||||
return ret
|
||||
|
||||
def add(table, k, v):
|
||||
"""add an entry (k, v) to table (recursively)"""
|
||||
if len(k) == 0:
|
||||
table[0] = v[0]
|
||||
else:
|
||||
i = int(k[0], 0x10) + 1
|
||||
if table[i] == None:
|
||||
table[i] = createTable()
|
||||
add(tables[table[i]], k[1:], v)
|
||||
|
||||
top = createTable()
|
||||
|
||||
for k,v in inv.items():
|
||||
add(tables[top], k, v)
|
||||
|
||||
next_table = []
|
||||
tableToNext = {}
|
||||
tableEnd = {}
|
||||
tableStart = {}
|
||||
|
||||
for k in sortedKeys(tables) :
|
||||
t = tables[k]
|
||||
tableToNext[k] = len(next_table)
|
||||
l = t[1:]
|
||||
start = 0
|
||||
while start < 16 and l[start] == None:
|
||||
start += 1
|
||||
end = 16
|
||||
while end > start and l[end - 1] == None:
|
||||
end -= 1
|
||||
tableStart[k] = start
|
||||
tableEnd[k] = end
|
||||
n = []
|
||||
for i in range(start, end):
|
||||
x = l[i]
|
||||
if x:
|
||||
n.append(x)
|
||||
else:
|
||||
n.append(0)
|
||||
next_table.extend(n)
|
||||
|
||||
normalize_c.file.write("const struct canon_node _wind_canon_table[] = {\n")
|
||||
|
||||
for k in sortedKeys(tables) :
|
||||
t = tables[k]
|
||||
normalize_c.file.write(" {0x%x, %u, %u, %u},\n" %
|
||||
(t[0], tableStart[k], tableEnd[k], tableToNext[k]))
|
||||
|
||||
normalize_c.file.write("};\n\n")
|
||||
|
||||
normalize_c.file.write("const unsigned short _wind_canon_next_table[] = {\n")
|
||||
|
||||
for k in next_table:
|
||||
normalize_c.file.write(" %u,\n" % k)
|
||||
|
||||
normalize_c.file.write("};\n\n")
|
||||
|
||||
normalize_h.close()
|
||||
normalize_c.close()
|
131
lib/wind/gen-punycode-examples.py
Executable file
131
lib/wind/gen-punycode-examples.py
Executable file
@@ -0,0 +1,131 @@
|
||||
#!/usr/local/bin/python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
|
||||
# $Id$
|
||||
|
||||
# Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
# (Royal Institute of Technology, Stockholm, Sweden).
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the Institute nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
# SUCH DAMAGE.
|
||||
|
||||
import re
|
||||
import string
|
||||
import sys
|
||||
|
||||
import generate
|
||||
|
||||
if len(sys.argv) != 2:
|
||||
print "usage: %s rfc3492.txt" % sys.argv[0]
|
||||
sys.exit(1)
|
||||
|
||||
f = open(sys.argv[1], 'r')
|
||||
|
||||
examples_h = generate.Header('punycode_examples.h')
|
||||
examples_c = generate.Header('punycode_examples.c')
|
||||
|
||||
start = False
|
||||
|
||||
while True:
|
||||
l = f.readline()
|
||||
if not l:
|
||||
break
|
||||
if l[-2:] == "\\\n":
|
||||
l2 = f.readline()
|
||||
if not l2:
|
||||
raise Exception("EOF in backslash escape")
|
||||
l2 = re.sub('^ *', '', l2)
|
||||
l = l[:-2] + l2
|
||||
if start:
|
||||
if re.match('7\.2', l):
|
||||
start = False
|
||||
else:
|
||||
m = re.search('^ *\([A-Z]\) *(.*)$', l);
|
||||
if m:
|
||||
desc = m.group(1)
|
||||
codes = []
|
||||
else:
|
||||
m = re.search('^ *([uU]+.*) *$', l)
|
||||
if m:
|
||||
codes.extend(string.split(m.group(1), ' '))
|
||||
else:
|
||||
m = re.search('^ *Punycode: (.*) *$', l)
|
||||
if m:
|
||||
cases.append([codes, m.group(1), desc])
|
||||
else:
|
||||
if re.match('^7\.1', l):
|
||||
start = True
|
||||
cases = []
|
||||
|
||||
f.close()
|
||||
|
||||
examples_h.file.write(
|
||||
'''
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define MAX_LENGTH 40
|
||||
|
||||
struct punycode_example {
|
||||
size_t len;
|
||||
uint32_t val[MAX_LENGTH];
|
||||
const char *pc;
|
||||
const char *description;
|
||||
};
|
||||
|
||||
extern const struct punycode_example punycode_examples[];
|
||||
|
||||
extern const size_t punycode_examples_size;
|
||||
''')
|
||||
|
||||
examples_c.file.write(
|
||||
'''
|
||||
#include "punycode_examples.h"
|
||||
|
||||
const struct punycode_example punycode_examples[] = {
|
||||
''')
|
||||
|
||||
for x in cases:
|
||||
[cp, pc, desc] = x
|
||||
examples_c.file.write(
|
||||
" {%u, {%s}, \"%s\", \"%s\"},\n" %
|
||||
(len(cp),
|
||||
string.join([re.sub('[uU]\+', '0x', x) for x in cp], ', '),
|
||||
pc,
|
||||
desc))
|
||||
|
||||
examples_c.file.write(
|
||||
'''};
|
||||
|
||||
''')
|
||||
|
||||
examples_c.file.write(
|
||||
"const size_t punycode_examples_size = %u;\n\n" % len(cases))
|
||||
|
||||
examples_h.close()
|
||||
examples_c.close()
|
80
lib/wind/generate.py
Normal file
80
lib/wind/generate.py
Normal file
@@ -0,0 +1,80 @@
|
||||
#!/usr/local/bin/python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
|
||||
# $Id$
|
||||
|
||||
# Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
# (Royal Institute of Technology, Stockholm, Sweden).
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the Institute nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
# SUCH DAMAGE.
|
||||
|
||||
import datetime
|
||||
import string
|
||||
|
||||
class GeneratedFile :
|
||||
"Represents a generated file"
|
||||
def __init__(self, name) :
|
||||
"Create a new GeneratedFile with name"
|
||||
self.name = name
|
||||
self.file = open(name, 'w')
|
||||
self.file.write('/* ' + name + ' */\n')
|
||||
self.file.write('/* Automatically generated at ' +
|
||||
datetime.datetime.now().isoformat() +
|
||||
' */\n\n')
|
||||
|
||||
def close(self) :
|
||||
"""End and close the file header"""
|
||||
self.file.close()
|
||||
|
||||
|
||||
class Header(GeneratedFile) :
|
||||
"Represents a generated header file"
|
||||
guardTrans = string.maketrans('-.', '__')
|
||||
def makeGuard(self) :
|
||||
"""Return a name to be used as ifdef guard"""
|
||||
return string.upper(string.translate(self.name, self.guardTrans))
|
||||
|
||||
def __init__(self, name) :
|
||||
"Create a new Header with name"
|
||||
GeneratedFile.__init__(self, name)
|
||||
self.guard = self.makeGuard()
|
||||
self.file.write('#ifndef ' + self.guard + '\n')
|
||||
self.file.write('#define ' + self.guard + ' 1\n')
|
||||
|
||||
def close(self) :
|
||||
"""End and close the file header"""
|
||||
self.file.write('#endif /* ' + self.guard + ' */\n')
|
||||
GeneratedFile.close(self)
|
||||
|
||||
|
||||
class Implementation(GeneratedFile) :
|
||||
"Represents a generated implementation file"
|
||||
def __init__(self, name) :
|
||||
"Create a new Implementation with name"
|
||||
GeneratedFile.__init__(self, name)
|
169
lib/wind/idn-lookup.c
Normal file
169
lib/wind/idn-lookup.c
Normal file
@@ -0,0 +1,169 @@
|
||||
/*
|
||||
* Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
* (Royal Institute of Technology, Stockholm, Sweden).
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the Institute nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
#include <assert.h>
|
||||
#include <err.h>
|
||||
#include <netdb.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include <getarg.h>
|
||||
#include <roken.h>
|
||||
|
||||
#include "windlocl.h"
|
||||
|
||||
RCSID("$Id$");
|
||||
|
||||
static int version_flag = 0;
|
||||
static int help_flag = 0;
|
||||
|
||||
|
||||
static int
|
||||
is_separator(uint32_t u)
|
||||
{
|
||||
return u == 0x002E || u == 0x3002;
|
||||
}
|
||||
|
||||
static void
|
||||
lookup(const char *name)
|
||||
{
|
||||
unsigned i;
|
||||
char encoded[1024];
|
||||
char *ep;
|
||||
int ret;
|
||||
struct addrinfo hints;
|
||||
struct addrinfo *ai;
|
||||
|
||||
size_t u_len = strlen(name);
|
||||
uint32_t *u = malloc(u_len * sizeof(uint32_t));
|
||||
size_t norm_len = u_len;
|
||||
uint32_t *norm = malloc(norm_len * sizeof(uint32_t));
|
||||
|
||||
if (u == NULL || norm == NULL)
|
||||
errx(1, "malloc failed");
|
||||
|
||||
ret = wind_utf8ucs4(name, u, &u_len);
|
||||
if (ret)
|
||||
errx(1, "utf8 conversion failed");
|
||||
ret = wind_stringprep(u, u_len, norm, &norm_len, WIND_PROFILE_NAME);
|
||||
if (ret)
|
||||
errx(1, "stringprep failed");
|
||||
free(u);
|
||||
|
||||
ep = encoded;
|
||||
for (i = 0; i < norm_len; ++i) {
|
||||
unsigned j;
|
||||
size_t len;
|
||||
|
||||
for (j = i; j < norm_len && !is_separator(norm[j]); ++j)
|
||||
;
|
||||
len = sizeof(encoded) - (ep - encoded);
|
||||
ret = wind_punycode_toascii(norm + i, j - i, ep, &len);
|
||||
if (ret < 0)
|
||||
errx(1, "punycode failed");
|
||||
if (ret) {
|
||||
memmove(ep + 4, ep, len);
|
||||
memcpy(ep, "xn--", 4);
|
||||
ep += 4;
|
||||
} else {
|
||||
--len;
|
||||
}
|
||||
ep += len;
|
||||
*ep++ = '.';
|
||||
i = j;
|
||||
}
|
||||
*ep++ = '\0';
|
||||
free(norm);
|
||||
|
||||
printf("Converted \"%s\" into \"%s\"\n", name, encoded);
|
||||
|
||||
memset(&hints, 0, sizeof(hints));
|
||||
hints.ai_flags = AI_CANONNAME;
|
||||
ret = getaddrinfo(encoded, NULL, &hints, &ai);
|
||||
if(ret)
|
||||
errx(1, "getaddrinfo failed: %s", gai_strerror(ret));
|
||||
printf("canonical-name: %s\n", ai->ai_canonname);
|
||||
freeaddrinfo(ai);
|
||||
}
|
||||
|
||||
static struct getargs args[] = {
|
||||
{"version", 0, arg_flag, &version_flag,
|
||||
"print version", NULL },
|
||||
{"help", 0, arg_flag, &help_flag,
|
||||
NULL, NULL }
|
||||
};
|
||||
|
||||
static void
|
||||
usage (int ret)
|
||||
{
|
||||
arg_printusage(args, sizeof(args)/sizeof(args[0]), NULL,
|
||||
"dns-names ...");
|
||||
exit (ret);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
int optidx = 0;
|
||||
unsigned i;
|
||||
|
||||
setprogname (argv[0]);
|
||||
|
||||
if(getarg(args, sizeof(args) / sizeof(args[0]), argc, argv, &optidx))
|
||||
usage(1);
|
||||
|
||||
if (help_flag)
|
||||
usage (0);
|
||||
|
||||
if(version_flag){
|
||||
print_version(NULL);
|
||||
exit(0);
|
||||
}
|
||||
|
||||
argc -= optidx;
|
||||
argv += optidx;
|
||||
|
||||
if (argc == 0)
|
||||
usage(1);
|
||||
|
||||
for (i = 0; i < argc; ++i)
|
||||
lookup(argv[i]);
|
||||
return 0;
|
||||
}
|
87
lib/wind/map.c
Normal file
87
lib/wind/map.c
Normal file
@@ -0,0 +1,87 @@
|
||||
/*
|
||||
* Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
* (Royal Institute of Technology, Stockholm, Sweden).
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the Institute nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
#include "windlocl.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "map_table.h"
|
||||
|
||||
RCSID("$Id$");
|
||||
|
||||
static int
|
||||
translation_cmp(const void *key, const void *data)
|
||||
{
|
||||
const struct translation *t1 = (const struct translation *)key;
|
||||
const struct translation *t2 = (const struct translation *)data;
|
||||
|
||||
return t1->key - t2->key;
|
||||
}
|
||||
|
||||
int
|
||||
_wind_stringprep_map(const uint32_t *in, size_t in_len,
|
||||
uint32_t *out, size_t *out_len,
|
||||
wind_profile_flags flags)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned o = 0;
|
||||
|
||||
for (i = 0; i < in_len; ++i) {
|
||||
struct translation ts = {in[i]};
|
||||
const struct translation *s;
|
||||
|
||||
s = (const struct translation *)
|
||||
bsearch(&ts, _wind_map_table, _wind_map_table_size,
|
||||
sizeof(_wind_map_table[0]),
|
||||
translation_cmp);
|
||||
if (s != NULL && (s->flags & flags)) {
|
||||
unsigned j;
|
||||
|
||||
for (j = 0; j < s->val_len; ++j) {
|
||||
if (o >= *out_len)
|
||||
return -1;
|
||||
out[o++] = _wind_map_table_val[s->val_offset + j];
|
||||
}
|
||||
} else {
|
||||
if (o >= *out_len)
|
||||
return -1;
|
||||
out[o++] = in[i];
|
||||
|
||||
}
|
||||
}
|
||||
*out_len = o;
|
||||
return 0;
|
||||
}
|
288
lib/wind/normalize.c
Normal file
288
lib/wind/normalize.c
Normal file
@@ -0,0 +1,288 @@
|
||||
/*
|
||||
* Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
* (Royal Institute of Technology, Stockholm, Sweden).
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the Institute nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
#include "windlocl.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "normalize_table.h"
|
||||
|
||||
RCSID("$Id$");
|
||||
|
||||
static int
|
||||
translation_cmp(const void *key, const void *data)
|
||||
{
|
||||
const struct translation *t1 = (const struct translation *)key;
|
||||
const struct translation *t2 = (const struct translation *)data;
|
||||
|
||||
return t1->key - t2->key;
|
||||
}
|
||||
|
||||
enum { s_base = 0xAC00};
|
||||
enum { s_count = 11172};
|
||||
enum { l_base = 0x1100};
|
||||
enum { l_count = 19};
|
||||
enum { v_base = 0x1161};
|
||||
enum { v_count = 21};
|
||||
enum { t_base = 0x11A7};
|
||||
enum { t_count = 28};
|
||||
enum { n_count = v_count * t_count};
|
||||
|
||||
static int
|
||||
hangul_decomp(const uint32_t *in, size_t in_len,
|
||||
uint32_t *out, size_t *out_len)
|
||||
{
|
||||
uint32_t u = *in;
|
||||
unsigned s_index;
|
||||
unsigned l, v, t;
|
||||
unsigned o;
|
||||
|
||||
if (u < s_base || u >= s_base + s_count)
|
||||
return 0;
|
||||
s_index = u - s_base;
|
||||
l = l_base + s_index / n_count;
|
||||
v = v_base + (s_index % n_count) / t_count;
|
||||
t = t_base + s_index % t_count;
|
||||
o = 2;
|
||||
if (t != t_base)
|
||||
++o;
|
||||
if (*out_len < o)
|
||||
return -1;
|
||||
out[0] = l;
|
||||
out[1] = v;
|
||||
if (t != t_base)
|
||||
out[2] = t;
|
||||
*out_len = o;
|
||||
return 1;
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
hangul_composition(const uint32_t *in, size_t in_len)
|
||||
{
|
||||
if (in_len < 2)
|
||||
return 0;
|
||||
if (in[0] >= l_base && in[0] < l_base + l_count) {
|
||||
unsigned l_index = in[0] - l_base;
|
||||
unsigned v_index;
|
||||
|
||||
if (in[1] < v_base || in[1] >= v_base + v_count)
|
||||
return 0;
|
||||
v_index = in[1] - v_base;
|
||||
return (l_index * v_count + v_index) * t_count + s_base;
|
||||
} else if (in[0] >= s_base && in[0] < s_base + s_count) {
|
||||
unsigned s_index = in[0] - s_base;
|
||||
unsigned t_index;
|
||||
|
||||
if (s_index % t_count != 0)
|
||||
return 0;
|
||||
if (in[1] < t_base || in[1] >= t_base + t_count)
|
||||
return 0;
|
||||
t_index = in[1] - t_base;
|
||||
return in[0] + t_index;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
compat_decomp(const uint32_t *in, size_t in_len,
|
||||
uint32_t *out, size_t *out_len)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned o = 0;
|
||||
|
||||
for (i = 0; i < in_len; ++i) {
|
||||
struct translation ts = {in[i]};
|
||||
size_t sub_len = *out_len - o;
|
||||
int ret;
|
||||
|
||||
ret = hangul_decomp(in + i, in_len - i,
|
||||
out + o, &sub_len);
|
||||
if (ret) {
|
||||
if (ret == -1)
|
||||
return ret;
|
||||
o += sub_len;
|
||||
} else {
|
||||
void *s = bsearch(&ts,
|
||||
_wind_normalize_table,
|
||||
_wind_normalize_table_size,
|
||||
sizeof(_wind_normalize_table[0]),
|
||||
translation_cmp);
|
||||
if (s != NULL) {
|
||||
const struct translation *t = (const struct translation *)s;
|
||||
|
||||
ret = compat_decomp(_wind_normalize_val_table + t->val_offset,
|
||||
t->val_len,
|
||||
out + o, &sub_len);
|
||||
if (ret)
|
||||
return ret;
|
||||
o += sub_len;
|
||||
} else {
|
||||
if (o >= *out_len)
|
||||
return -1;
|
||||
out[o++] = in[i];
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
*out_len = o;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
cc_cmp(const void *a, const void *b)
|
||||
{
|
||||
const uint32_t *ua = (const uint32_t *)a;
|
||||
const uint32_t *ub = (const uint32_t *)b;
|
||||
|
||||
return _wind_combining_class(*ua) - _wind_combining_class(*ub);
|
||||
}
|
||||
|
||||
static void
|
||||
canonical_reorder(uint32_t *tmp, size_t tmp_len)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i = 0; i < tmp_len; ++i) {
|
||||
int cc = _wind_combining_class(tmp[i]);
|
||||
if (cc) {
|
||||
size_t j;
|
||||
for (j = i + 1;
|
||||
j < tmp_len && _wind_combining_class(tmp[j]);
|
||||
++j)
|
||||
;
|
||||
qsort(&tmp[i], j - i, sizeof(unsigned),
|
||||
cc_cmp);
|
||||
i = j;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static uint32_t
|
||||
find_composition(const uint32_t *in, unsigned in_len)
|
||||
{
|
||||
unsigned short canon_index = 0;
|
||||
uint32_t cur;
|
||||
unsigned n = 0;
|
||||
|
||||
cur = hangul_composition(in, in_len);
|
||||
if (cur)
|
||||
return cur;
|
||||
|
||||
do {
|
||||
const struct canon_node *c = &_wind_canon_table[canon_index];
|
||||
if (n % 5 == 0) {
|
||||
cur = *in++;
|
||||
if (in_len-- == 0)
|
||||
return c->val;
|
||||
}
|
||||
unsigned i = cur >> 16;
|
||||
if (i < c->next_start || i >= c->next_end)
|
||||
canon_index = 0;
|
||||
else
|
||||
canon_index =
|
||||
_wind_canon_next_table[c->next_offset + i - c->next_start];
|
||||
if (canon_index != 0) {
|
||||
cur = (cur << 4) & 0xFFFFF;
|
||||
++n;
|
||||
}
|
||||
} while (canon_index != 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
combine(const uint32_t *in, size_t in_len,
|
||||
uint32_t *out, size_t *out_len)
|
||||
{
|
||||
unsigned i;
|
||||
int ostarter;
|
||||
unsigned o = 0;
|
||||
int old_cc;
|
||||
int cc;
|
||||
|
||||
for (i = 0; i < in_len;) {
|
||||
while (i < in_len && (cc = _wind_combining_class(in[i])) != 0) {
|
||||
out[o++] = in[i++];
|
||||
}
|
||||
if (i < in_len) {
|
||||
ostarter = o;
|
||||
out[o++] = in[i++];
|
||||
old_cc = -1;
|
||||
|
||||
while (i < in_len) {
|
||||
uint32_t v[2] = {out[ostarter], in[i]};
|
||||
uint32_t comb;
|
||||
cc = _wind_combining_class(in[i]);
|
||||
if (old_cc != cc && (comb = find_composition(v, 2))) {
|
||||
out[ostarter] = comb;
|
||||
} else if (cc == 0) {
|
||||
break;
|
||||
} else {
|
||||
out[o++] = in[i];
|
||||
old_cc = cc;
|
||||
}
|
||||
++i;
|
||||
}
|
||||
}
|
||||
}
|
||||
*out_len = o;
|
||||
}
|
||||
|
||||
int
|
||||
_wind_stringprep_normalize(const uint32_t *in, size_t in_len,
|
||||
uint32_t *out, size_t *out_len)
|
||||
{
|
||||
size_t tmp_len;
|
||||
uint32_t *tmp;
|
||||
int ret;
|
||||
|
||||
tmp_len = in_len * 4;
|
||||
if (tmp_len < MAX_LENGTH)
|
||||
tmp_len = MAX_LENGTH;
|
||||
tmp = malloc(tmp_len * sizeof(uint32_t));
|
||||
if (tmp == NULL)
|
||||
return -1;
|
||||
|
||||
ret = compat_decomp(in, in_len, tmp, &tmp_len);
|
||||
if (ret) {
|
||||
free(tmp);
|
||||
return ret;
|
||||
}
|
||||
canonical_reorder(tmp, tmp_len);
|
||||
combine(tmp, tmp_len, out, out_len);
|
||||
free(tmp);
|
||||
return 0;
|
||||
}
|
146
lib/wind/punycode.c
Normal file
146
lib/wind/punycode.c
Normal file
@@ -0,0 +1,146 @@
|
||||
/*
|
||||
* Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
* (Royal Institute of Technology, Stockholm, Sweden).
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the Institute nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
#include "windlocl.h"
|
||||
|
||||
RCSID("$Id$");
|
||||
|
||||
static const unsigned base = 36;
|
||||
static const unsigned t_min = 1;
|
||||
static const unsigned t_max = 26;
|
||||
static const unsigned skew = 38;
|
||||
static const unsigned damp = 700;
|
||||
static const unsigned initial_n = 128;
|
||||
static const unsigned initial_bias = 72;
|
||||
|
||||
static unsigned
|
||||
digit(unsigned n)
|
||||
{
|
||||
return "abcdefghijklmnopqrstuvwxyz0123456789"[n];
|
||||
}
|
||||
|
||||
static unsigned
|
||||
adapt(unsigned delta, unsigned numpoints, int first)
|
||||
{
|
||||
unsigned k;
|
||||
|
||||
if (first)
|
||||
delta = delta / damp;
|
||||
else
|
||||
delta /= 2;
|
||||
delta += delta / numpoints;
|
||||
k = 0;
|
||||
while (delta > ((base - t_min) * t_max) / 2) {
|
||||
delta /= base - t_min;
|
||||
k += base;
|
||||
}
|
||||
return k + (((base - t_min + 1) * delta) / (delta + skew));
|
||||
}
|
||||
|
||||
int
|
||||
wind_punycode_toascii(const uint32_t *in, size_t in_len,
|
||||
char *out, size_t *out_len)
|
||||
{
|
||||
unsigned n = initial_n;
|
||||
unsigned delta = 0;
|
||||
unsigned bias = initial_bias;
|
||||
unsigned h = 0;
|
||||
unsigned b;
|
||||
unsigned i;
|
||||
unsigned o = 0;
|
||||
unsigned m;
|
||||
int ret = 0;
|
||||
|
||||
for (i = 0; i < in_len; ++i) {
|
||||
if (in[i] < 0x80) {
|
||||
++h;
|
||||
if (o >= *out_len)
|
||||
return -1;
|
||||
out[o++] = in[i];
|
||||
}
|
||||
}
|
||||
b = h;
|
||||
if (b > 0) {
|
||||
if (o >= *out_len)
|
||||
return -1;
|
||||
out[o++] = 0x2D;
|
||||
}
|
||||
while (h < in_len) {
|
||||
ret = 1;
|
||||
m = (unsigned)-1;
|
||||
for (i = 0; i < in_len; ++i)
|
||||
if(in[i] < m && in[i] >= n)
|
||||
m = in[i];
|
||||
|
||||
delta += (m - n) * (h + 1);
|
||||
n = m;
|
||||
for (i = 0; i < in_len; ++i) {
|
||||
if (in[i] < n) {
|
||||
++delta;
|
||||
} else if (in[i] == n) {
|
||||
unsigned q = delta;
|
||||
unsigned k;
|
||||
for (k = base; ; k += base) {
|
||||
unsigned t;
|
||||
if (k <= bias)
|
||||
t = t_min;
|
||||
else if (k >= bias + t_max)
|
||||
t = t_max;
|
||||
else
|
||||
t = k - bias;
|
||||
if (q < t)
|
||||
break;
|
||||
if (o >= *out_len)
|
||||
return -1;
|
||||
out[o++] = digit(t + ((q - t) % (base - t)));
|
||||
q = (q - t) / (base - t);
|
||||
}
|
||||
if (o >= *out_len)
|
||||
return -1;
|
||||
out[o++] = digit(q);
|
||||
/* output */
|
||||
bias = adapt(delta, h + 1, h == b);
|
||||
delta = 0;
|
||||
++h;
|
||||
}
|
||||
}
|
||||
++delta;
|
||||
++n;
|
||||
}
|
||||
|
||||
*out_len = o;
|
||||
return ret;
|
||||
}
|
60
lib/wind/rfc3454.py
Normal file
60
lib/wind/rfc3454.py
Normal file
@@ -0,0 +1,60 @@
|
||||
#!/usr/local/bin/python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
|
||||
# $Id$
|
||||
|
||||
# Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
# (Royal Institute of Technology, Stockholm, Sweden).
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the Institute nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
# SUCH DAMAGE.
|
||||
|
||||
import re
|
||||
import string
|
||||
|
||||
def read(filename):
|
||||
"""return a dict of tables from rfc3454"""
|
||||
f = open(filename, 'r')
|
||||
inTable = False
|
||||
ret = {}
|
||||
while True:
|
||||
l = f.readline()
|
||||
if not l:
|
||||
break
|
||||
if inTable:
|
||||
m = re.search('^ *----- End Table ([A-Z0-9\.]+) ----- *$', l)
|
||||
if m:
|
||||
ret[m.group(1)] = t
|
||||
inTable = False
|
||||
else:
|
||||
t.append(l)
|
||||
if re.search('^ *----- Start Table ([A-Z0-9\.]+) ----- *$', l):
|
||||
inTable = True
|
||||
t = []
|
||||
f.close()
|
||||
return ret
|
5099
lib/wind/rfc3454.txt
Normal file
5099
lib/wind/rfc3454.txt
Normal file
File diff suppressed because it is too large
Load Diff
1235
lib/wind/rfc3490.txt
Normal file
1235
lib/wind/rfc3490.txt
Normal file
File diff suppressed because it is too large
Load Diff
395
lib/wind/rfc3491.txt
Normal file
395
lib/wind/rfc3491.txt
Normal file
@@ -0,0 +1,395 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Network Working Group P. Hoffman
|
||||
Request for Comments: 3491 IMC & VPNC
|
||||
Category: Standards Track M. Blanchet
|
||||
Viagenie
|
||||
March 2003
|
||||
|
||||
|
||||
Nameprep: A Stringprep Profile for
|
||||
Internationalized Domain Names (IDN)
|
||||
|
||||
Status of this Memo
|
||||
|
||||
This document specifies an Internet standards track protocol for the
|
||||
Internet community, and requests discussion and suggestions for
|
||||
improvements. Please refer to the current edition of the "Internet
|
||||
Official Protocol Standards" (STD 1) for the standardization state
|
||||
and status of this protocol. Distribution of this memo is unlimited.
|
||||
|
||||
Copyright Notice
|
||||
|
||||
Copyright (C) The Internet Society (2003). All Rights Reserved.
|
||||
|
||||
Abstract
|
||||
|
||||
This document describes how to prepare internationalized domain name
|
||||
(IDN) labels in order to increase the likelihood that name input and
|
||||
name comparison work in ways that make sense for typical users
|
||||
throughout the world. This profile of the stringprep protocol is
|
||||
used as part of a suite of on-the-wire protocols for
|
||||
internationalizing the Domain Name System (DNS).
|
||||
|
||||
1. Introduction
|
||||
|
||||
This document specifies processing rules that will allow users to
|
||||
enter internationalized domain names (IDNs) into applications and
|
||||
have the highest chance of getting the content of the strings
|
||||
correct. It is a profile of stringprep [STRINGPREP]. These
|
||||
processing rules are only intended for internationalized domain
|
||||
names, not for arbitrary text.
|
||||
|
||||
This profile defines the following, as required by [STRINGPREP].
|
||||
|
||||
- The intended applicability of the profile: internationalized
|
||||
domain names processed by IDNA.
|
||||
|
||||
- The character repertoire that is the input and output to
|
||||
stringprep: Unicode 3.2, specified in section 2.
|
||||
|
||||
|
||||
|
||||
|
||||
Hoffman & Blanchet Standards Track [Page 1]
|
||||
|
||||
RFC 3491 IDN Nameprep March 2003
|
||||
|
||||
|
||||
- The mappings used: specified in section 3.
|
||||
|
||||
- The Unicode normalization used: specified in section 4.
|
||||
|
||||
- The characters that are prohibited as output: specified in section
|
||||
5.
|
||||
|
||||
- Bidirectional character handling: specified in section 6.
|
||||
|
||||
1.1 Interaction of protocol parts
|
||||
|
||||
Nameprep is used by the IDNA [IDNA] protocol for preparing domain
|
||||
names; it is not designed for any other purpose. It is explicitly
|
||||
not designed for processing arbitrary free text and SHOULD NOT be
|
||||
used for that purpose. Nameprep is a profile of Stringprep
|
||||
[STRINGPREP]. Implementations of Nameprep MUST fully implement
|
||||
Stringprep.
|
||||
|
||||
Nameprep is used to process domain name labels, not domain names.
|
||||
IDNA calls nameprep for each label in a domain name, not for the
|
||||
whole domain name.
|
||||
|
||||
1.2 Terminology
|
||||
|
||||
The key words "MUST", "MUST NOT", "SHOULD", "SHOULD NOT", and "MAY"
|
||||
in this document are to be interpreted as described in BCP 14, RFC
|
||||
2119 [RFC2119].
|
||||
|
||||
2. Character Repertoire
|
||||
|
||||
This profile uses Unicode 3.2, as defined in [STRINGPREP] Appendix A.
|
||||
|
||||
3. Mapping
|
||||
|
||||
This profile specifies mapping using the following tables from
|
||||
[STRINGPREP]:
|
||||
|
||||
Table B.1
|
||||
Table B.2
|
||||
|
||||
4. Normalization
|
||||
|
||||
This profile specifies using Unicode normalization form KC, as
|
||||
described in [STRINGPREP].
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Hoffman & Blanchet Standards Track [Page 2]
|
||||
|
||||
RFC 3491 IDN Nameprep March 2003
|
||||
|
||||
|
||||
5. Prohibited Output
|
||||
|
||||
This profile specifies prohibiting using the following tables from
|
||||
[STRINGPREP]:
|
||||
|
||||
Table C.1.2
|
||||
Table C.2.2
|
||||
Table C.3
|
||||
Table C.4
|
||||
Table C.5
|
||||
Table C.6
|
||||
Table C.7
|
||||
Table C.8
|
||||
Table C.9
|
||||
|
||||
IMPORTANT NOTE: This profile MUST be used with the IDNA protocol.
|
||||
The IDNA protocol has additional prohibitions that are checked
|
||||
outside of this profile.
|
||||
|
||||
6. Bidirectional characters
|
||||
|
||||
This profile specifies checking bidirectional strings as described in
|
||||
[STRINGPREP] section 6.
|
||||
|
||||
7. Unassigned Code Points in Internationalized Domain Names
|
||||
|
||||
If the processing in [IDNA] specifies that a list of unassigned code
|
||||
points be used, the system uses table A.1 from [STRINGPREP] as its
|
||||
list of unassigned code points.
|
||||
|
||||
8. References
|
||||
|
||||
8.1 Normative References
|
||||
|
||||
[RFC2119] Bradner, S., "Key words for use in RFCs to Indicate
|
||||
Requirement Levels", BCP 14, RFC 2119, March 1997.
|
||||
|
||||
[STRINGPREP] Hoffman, P. and M. Blanchet, "Preparation of
|
||||
Internationalized Strings ("stringprep")", RFC 3454,
|
||||
December 2002.
|
||||
|
||||
[IDNA] Faltstrom, P., Hoffman, P. and A. Costello,
|
||||
"Internationalizing Domain Names in Applications
|
||||
(IDNA)", RFC 3490, March 2003.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Hoffman & Blanchet Standards Track [Page 3]
|
||||
|
||||
RFC 3491 IDN Nameprep March 2003
|
||||
|
||||
|
||||
8.2 Informative references
|
||||
|
||||
[STD13] Mockapetris, P., "Domain names - concepts and
|
||||
facilities", STD 13, RFC 1034, and "Domain names -
|
||||
implementation and specification", STD 13, RFC 1035,
|
||||
November 1987.
|
||||
|
||||
9. Security Considerations
|
||||
|
||||
The Unicode and ISO/IEC 10646 repertoires have many characters that
|
||||
look similar. In many cases, users of security protocols might do
|
||||
visual matching, such as when comparing the names of trusted third
|
||||
parties. Because it is impossible to map similar-looking characters
|
||||
without a great deal of context such as knowing the fonts used,
|
||||
stringprep does nothing to map similar-looking characters together
|
||||
nor to prohibit some characters because they look like others.
|
||||
|
||||
Security on the Internet partly relies on the DNS. Thus, any change
|
||||
to the characteristics of the DNS can change the security of much of
|
||||
the Internet.
|
||||
|
||||
Domain names are used by users to connect to Internet servers. The
|
||||
security of the Internet would be compromised if a user entering a
|
||||
single internationalized name could be connected to different servers
|
||||
based on different interpretations of the internationalized domain
|
||||
name.
|
||||
|
||||
Current applications might assume that the characters allowed in
|
||||
domain names will always be the same as they are in [STD13]. This
|
||||
document vastly increases the number of characters available in
|
||||
domain names. Every program that uses "special" characters in
|
||||
conjunction with domain names may be vulnerable to attack based on
|
||||
the new characters allowed by this specification.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Hoffman & Blanchet Standards Track [Page 4]
|
||||
|
||||
RFC 3491 IDN Nameprep March 2003
|
||||
|
||||
|
||||
10. IANA Considerations
|
||||
|
||||
This is a profile of stringprep. It has been registered by the IANA
|
||||
in the stringprep profile registry
|
||||
(www.iana.org/assignments/stringprep-profiles).
|
||||
|
||||
Name of this profile:
|
||||
Nameprep
|
||||
|
||||
RFC in which the profile is defined:
|
||||
This document.
|
||||
|
||||
Indicator whether or not this is the newest version of the
|
||||
profile:
|
||||
This is the first version of Nameprep.
|
||||
|
||||
11. Acknowledgements
|
||||
|
||||
Many people from the IETF IDN Working Group and the Unicode Technical
|
||||
Committee contributed ideas that went into this document.
|
||||
|
||||
The IDN Nameprep design team made many useful changes to the
|
||||
document. That team and its advisors include:
|
||||
|
||||
Asmus Freytag
|
||||
Cathy Wissink
|
||||
Francois Yergeau
|
||||
James Seng
|
||||
Marc Blanchet
|
||||
Mark Davis
|
||||
Martin Duerst
|
||||
Patrik Faltstrom
|
||||
Paul Hoffman
|
||||
|
||||
Additional significant improvements were proposed by:
|
||||
|
||||
Jonathan Rosenne
|
||||
Kent Karlsson
|
||||
Scott Hollenbeck
|
||||
Dave Crocker
|
||||
Erik Nordmark
|
||||
Matitiahu Allouche
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Hoffman & Blanchet Standards Track [Page 5]
|
||||
|
||||
RFC 3491 IDN Nameprep March 2003
|
||||
|
||||
|
||||
12. Authors' Addresses
|
||||
|
||||
Paul Hoffman
|
||||
Internet Mail Consortium and VPN Consortium
|
||||
127 Segre Place
|
||||
Santa Cruz, CA 95060 USA
|
||||
|
||||
EMail: paul.hoffman@imc.org and paul.hoffman@vpnc.org
|
||||
|
||||
|
||||
Marc Blanchet
|
||||
Viagenie inc.
|
||||
2875 boul. Laurier, bur. 300
|
||||
Ste-Foy, Quebec, Canada, G1V 2M2
|
||||
|
||||
EMail: Marc.Blanchet@viagenie.qc.ca
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Hoffman & Blanchet Standards Track [Page 6]
|
||||
|
||||
RFC 3491 IDN Nameprep March 2003
|
||||
|
||||
|
||||
13. Full Copyright Statement
|
||||
|
||||
Copyright (C) The Internet Society (2003). All Rights Reserved.
|
||||
|
||||
This document and translations of it may be copied and furnished to
|
||||
others, and derivative works that comment on or otherwise explain it
|
||||
or assist in its implementation may be prepared, copied, published
|
||||
and distributed, in whole or in part, without restriction of any
|
||||
kind, provided that the above copyright notice and this paragraph are
|
||||
included on all such copies and derivative works. However, this
|
||||
document itself may not be modified in any way, such as by removing
|
||||
the copyright notice or references to the Internet Society or other
|
||||
Internet organizations, except as needed for the purpose of
|
||||
developing Internet standards in which case the procedures for
|
||||
copyrights defined in the Internet Standards process must be
|
||||
followed, or as required to translate it into languages other than
|
||||
English.
|
||||
|
||||
The limited permissions granted above are perpetual and will not be
|
||||
revoked by the Internet Society or its successors or assigns.
|
||||
|
||||
This document and the information contained herein is provided on an
|
||||
"AS IS" basis and THE INTERNET SOCIETY AND THE INTERNET ENGINEERING
|
||||
TASK FORCE DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, INCLUDING
|
||||
BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE INFORMATION
|
||||
HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED WARRANTIES OF
|
||||
MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
Acknowledgement
|
||||
|
||||
Funding for the RFC Editor function is currently provided by the
|
||||
Internet Society.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Hoffman & Blanchet Standards Track [Page 7]
|
||||
|
1963
lib/wind/rfc3492.txt
Normal file
1963
lib/wind/rfc3492.txt
Normal file
File diff suppressed because it is too large
Load Diff
339
lib/wind/rfc4013.txt
Normal file
339
lib/wind/rfc4013.txt
Normal file
@@ -0,0 +1,339 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Network Working Group K. Zeilenga
|
||||
Request for Comments: 4013 OpenLDAP Foundation
|
||||
Category: Standards Track February 2005
|
||||
|
||||
|
||||
SASLprep: Stringprep Profile for User Names and Passwords
|
||||
|
||||
Status of This Memo
|
||||
|
||||
This document specifies an Internet standards track protocol for the
|
||||
Internet community, and requests discussion and suggestions for
|
||||
improvements. Please refer to the current edition of the "Internet
|
||||
Official Protocol Standards" (STD 1) for the standardization state
|
||||
and status of this protocol. Distribution of this memo is unlimited.
|
||||
|
||||
Copyright Notice
|
||||
|
||||
Copyright (C) The Internet Society (2005).
|
||||
|
||||
Abstract
|
||||
|
||||
This document describes how to prepare Unicode strings representing
|
||||
user names and passwords for comparison. The document defines the
|
||||
"SASLprep" profile of the "stringprep" algorithm to be used for both
|
||||
user names and passwords. This profile is intended to be used by
|
||||
Simple Authentication and Security Layer (SASL) mechanisms (such as
|
||||
PLAIN, CRAM-MD5, and DIGEST-MD5), as well as other protocols
|
||||
exchanging simple user names and/or passwords.
|
||||
|
||||
1. Introduction
|
||||
|
||||
The use of simple user names and passwords in authentication and
|
||||
authorization is pervasive on the Internet. To increase the
|
||||
likelihood that user name and password input and comparison work in
|
||||
ways that make sense for typical users throughout the world, this
|
||||
document defines rules for preparing internationalized user names and
|
||||
passwords for comparison. For simplicity and implementation ease, a
|
||||
single algorithm is defined for both user names and passwords.
|
||||
|
||||
The algorithm assumes all strings are comprised of characters from
|
||||
the Unicode [Unicode] character set.
|
||||
|
||||
This document defines the "SASLprep" profile of the "stringprep"
|
||||
algorithm [StringPrep].
|
||||
|
||||
The profile is designed for use in Simple Authentication and Security
|
||||
Layer ([SASL]) mechanisms, such as [PLAIN], [CRAM-MD5], and
|
||||
[DIGEST-MD5]. It may be applicable where simple user names and
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 1]
|
||||
|
||||
RFC 4013 SASLprep February 2005
|
||||
|
||||
|
||||
passwords are used. This profile is not intended for use in
|
||||
preparing identity strings that are not simple user names (e.g.,
|
||||
email addresses, domain names, distinguished names), or where
|
||||
identity or password strings that are not character data, or require
|
||||
different handling (e.g., case folding).
|
||||
|
||||
This document does not alter the technical specification of any
|
||||
existing protocols. Any specification that wishes to use the
|
||||
algorithm described in this document needs to explicitly incorporate
|
||||
this document and provide precise details as to where and how this
|
||||
algorithm is used by implementations of that specification.
|
||||
|
||||
2. The SASLprep Profile
|
||||
|
||||
This section defines the "SASLprep" profile of the "stringprep"
|
||||
algorithm [StringPrep]. This profile is intended for use in
|
||||
preparing strings representing simple user names and passwords.
|
||||
|
||||
This profile uses Unicode 3.2 [Unicode].
|
||||
|
||||
Character names in this document use the notation for code points and
|
||||
names from the Unicode Standard [Unicode]. For example, the letter
|
||||
"a" may be represented as either <U+0061> or <LATIN SMALL LETTER A>.
|
||||
In the lists of mappings and the prohibited characters, the "U+" is
|
||||
left off to make the lists easier to read. The comments for
|
||||
character ranges are shown in square brackets (such as "[CONTROL
|
||||
CHARACTERS]") and do not come from the standard.
|
||||
|
||||
Note: A glossary of terms used in Unicode can be found in [Glossary].
|
||||
Information on the Unicode character encoding model can be found in
|
||||
[CharModel].
|
||||
|
||||
2.1. Mapping
|
||||
|
||||
This profile specifies:
|
||||
|
||||
- non-ASCII space characters [StringPrep, C.1.2] that can be
|
||||
mapped to SPACE (U+0020), and
|
||||
|
||||
- the "commonly mapped to nothing" characters [StringPrep, B.1]
|
||||
that can be mapped to nothing.
|
||||
|
||||
2.2. Normalization
|
||||
|
||||
This profile specifies using Unicode normalization form KC, as
|
||||
described in Section 4 of [StringPrep].
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 2]
|
||||
|
||||
RFC 4013 SASLprep February 2005
|
||||
|
||||
|
||||
2.3. Prohibited Output
|
||||
|
||||
This profile specifies the following characters as prohibited input:
|
||||
|
||||
- Non-ASCII space characters [StringPrep, C.1.2]
|
||||
- ASCII control characters [StringPrep, C.2.1]
|
||||
- Non-ASCII control characters [StringPrep, C.2.2]
|
||||
- Private Use characters [StringPrep, C.3]
|
||||
- Non-character code points [StringPrep, C.4]
|
||||
- Surrogate code points [StringPrep, C.5]
|
||||
- Inappropriate for plain text characters [StringPrep, C.6]
|
||||
- Inappropriate for canonical representation characters
|
||||
[StringPrep, C.7]
|
||||
- Change display properties or deprecated characters
|
||||
[StringPrep, C.8]
|
||||
- Tagging characters [StringPrep, C.9]
|
||||
|
||||
2.4. Bidirectional Characters
|
||||
|
||||
This profile specifies checking bidirectional strings as described in
|
||||
[StringPrep, Section 6].
|
||||
|
||||
2.5. Unassigned Code Points
|
||||
|
||||
This profile specifies the [StringPrep, A.1] table as its list of
|
||||
unassigned code points.
|
||||
|
||||
3. Examples
|
||||
|
||||
The following table provides examples of how various character data
|
||||
is transformed by the SASLprep string preparation algorithm
|
||||
|
||||
# Input Output Comments
|
||||
- ----- ------ --------
|
||||
1 I<U+00AD>X IX SOFT HYPHEN mapped to nothing
|
||||
2 user user no transformation
|
||||
3 USER USER case preserved, will not match #2
|
||||
4 <U+00AA> a output is NFKC, input in ISO 8859-1
|
||||
5 <U+2168> IX output is NFKC, will match #1
|
||||
6 <U+0007> Error - prohibited character
|
||||
7 <U+0627><U+0031> Error - bidirectional check
|
||||
|
||||
4. Security Considerations
|
||||
|
||||
This profile is intended to prepare simple user name and password
|
||||
strings for comparison or use in cryptographic functions (e.g.,
|
||||
message digests). The preparation algorithm was specifically
|
||||
designed such that its output is canonical, and it is well-formed.
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 3]
|
||||
|
||||
RFC 4013 SASLprep February 2005
|
||||
|
||||
|
||||
However, due to an anomaly [PR29] in the specification of Unicode
|
||||
normalization, canonical equivalence is not guaranteed for a select
|
||||
few character sequences. These sequences, however, do not appear in
|
||||
well-formed text. This specification was published despite this
|
||||
known technical problem. It is expected that this specification will
|
||||
be revised before further progression on the Standards Track (after
|
||||
[Unicode] and/or [StringPrep] specifications have been updated to
|
||||
address this problem).
|
||||
|
||||
It is not intended for preparing identity strings that are not simple
|
||||
user names (e.g., distinguished names, domain names), nor is the
|
||||
profile intended for use of simple user names that require different
|
||||
handling (such as case folding). Protocols (or applications of those
|
||||
protocols) that have application-specific identity forms and/or
|
||||
comparison algorithms should use mechanisms specifically designed for
|
||||
these forms and algorithms.
|
||||
|
||||
Application of string preparation may have an impact upon the
|
||||
feasibility of brute force and dictionary attacks. While the number
|
||||
of possible prepared strings is less than the number of possible
|
||||
Unicode strings, the number of usable names and passwords is greater
|
||||
than as if only ASCII was used. Though SASLprep eliminates some
|
||||
Unicode code point sequences as possible prepared strings, that
|
||||
elimination generally makes the (canonical) output forms practicable
|
||||
and prohibits nonsensical inputs.
|
||||
|
||||
User names and passwords should be protected from eavesdropping.
|
||||
|
||||
General "stringprep" and Unicode security considerations apply. Both
|
||||
are discussed in [StringPrep].
|
||||
|
||||
5. IANA Considerations
|
||||
|
||||
This document details the "SASLprep" profile of the [StringPrep]
|
||||
protocol. This profile has been registered in the stringprep profile
|
||||
registry.
|
||||
|
||||
Name of this profile: SASLprep
|
||||
RFC in which the profile is defined: RFC 4013
|
||||
Indicator whether or not this is the newest version of the
|
||||
profile: This is the first version of the SASPprep profile.
|
||||
|
||||
6. Acknowledgement
|
||||
|
||||
This document borrows text from "Preparation of Internationalized
|
||||
Strings ('stringprep')" and "Nameprep: A Stringprep Profile for
|
||||
Internationalized Domain Names", both by Paul Hoffman and Marc
|
||||
Blanchet. This document is a product of the IETF SASL WG.
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 4]
|
||||
|
||||
RFC 4013 SASLprep February 2005
|
||||
|
||||
|
||||
7. Normative References
|
||||
|
||||
[StringPrep] Hoffman, P. and M. Blanchet, "Preparation of
|
||||
Internationalized Strings ("stringprep")", RFC 3454,
|
||||
December 2002.
|
||||
|
||||
[Unicode] The Unicode Consortium, "The Unicode Standard, Version
|
||||
3.2.0" is defined by "The Unicode Standard, Version
|
||||
3.0" (Reading, MA, Addison-Wesley, 2000. ISBN 0-201-
|
||||
61633-5), as amended by the "Unicode Standard Annex
|
||||
#27: Unicode 3.1"
|
||||
(http://www.unicode.org/reports/tr27/) and by the
|
||||
"Unicode Standard Annex #28: Unicode 3.2"
|
||||
(http://www.unicode.org/reports/tr28/).
|
||||
|
||||
8. Informative References
|
||||
|
||||
[Glossary] The Unicode Consortium, "Unicode Glossary",
|
||||
<http://www.unicode.org/glossary/>.
|
||||
|
||||
[CharModel] Whistler, K. and M. Davis, "Unicode Technical Report
|
||||
#17, Character Encoding Model", UTR17,
|
||||
<http://www.unicode.org/unicode/reports/tr17/>, August
|
||||
2000.
|
||||
|
||||
[SASL] Melnikov, A., Ed., "Simple Authentication and Security
|
||||
Layer (SASL)", Work in Progress.
|
||||
|
||||
[CRAM-MD5] Nerenberg, L., "The CRAM-MD5 SASL Mechanism", Work in
|
||||
Progress.
|
||||
|
||||
[DIGEST-MD5] Leach, P., Newman, C., and A. Melnikov, "Using Digest
|
||||
Authentication as a SASL Mechanism", Work in Progress.
|
||||
|
||||
[PLAIN] Zeilenga, K., Ed., "The Plain SASL Mechanism", Work in
|
||||
Progress.
|
||||
|
||||
[PR29] "Public Review Issue #29: Normalization Issue",
|
||||
<http://www.unicode.org/review/pr-29.html>, February
|
||||
2004.
|
||||
|
||||
Author's Address
|
||||
|
||||
Kurt D. Zeilenga
|
||||
OpenLDAP Foundation
|
||||
|
||||
EMail: Kurt@OpenLDAP.org
|
||||
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 5]
|
||||
|
||||
RFC 4013 SASLprep February 2005
|
||||
|
||||
|
||||
Full Copyright Statement
|
||||
|
||||
Copyright (C) The Internet Society (2005).
|
||||
|
||||
This document is subject to the rights, licenses and restrictions
|
||||
contained in BCP 78, and except as set forth therein, the authors
|
||||
retain all their rights.
|
||||
|
||||
This document and the information contained herein are provided on an
|
||||
"AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS
|
||||
OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET
|
||||
ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED,
|
||||
INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE
|
||||
INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
Intellectual Property
|
||||
|
||||
The IETF takes no position regarding the validity or scope of any
|
||||
Intellectual Property Rights or other rights that might be claimed to
|
||||
pertain to the implementation or use of the technology described in
|
||||
this document or the extent to which any license under such rights
|
||||
might or might not be available; nor does it represent that it has
|
||||
made any independent effort to identify any such rights. Information
|
||||
on the IETF's procedures with respect to rights in IETF Documents can
|
||||
be found in BCP 78 and BCP 79.
|
||||
|
||||
Copies of IPR disclosures made to the IETF Secretariat and any
|
||||
assurances of licenses to be made available, or the result of an
|
||||
attempt made to obtain a general license or permission for the use of
|
||||
such proprietary rights by implementers or users of this
|
||||
specification can be obtained from the IETF on-line IPR repository at
|
||||
http://www.ietf.org/ipr.
|
||||
|
||||
The IETF invites any interested party to bring to its attention any
|
||||
copyrights, patents or patent applications, or other proprietary
|
||||
rights that may cover technology that may be required to implement
|
||||
this standard. Please address the information to the IETF at ietf-
|
||||
ipr@ietf.org.
|
||||
|
||||
|
||||
Acknowledgement
|
||||
|
||||
Funding for the RFC Editor function is currently provided by the
|
||||
Internet Society.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 6]
|
||||
|
150
lib/wind/rfc4518.py
Normal file
150
lib/wind/rfc4518.py
Normal file
@@ -0,0 +1,150 @@
|
||||
#!/usr/local/bin/python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
|
||||
# $Id$
|
||||
|
||||
# Copyright (c) 2004, 2008 Kungliga Tekniska H<>gskolan
|
||||
# (Royal Institute of Technology, Stockholm, Sweden).
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the Institute nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
# SUCH DAMAGE.
|
||||
|
||||
import re
|
||||
import string
|
||||
|
||||
def read():
|
||||
"""return a dict of tables from rfc4518"""
|
||||
|
||||
ret = {}
|
||||
|
||||
#2.2. Map
|
||||
#
|
||||
# SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code
|
||||
# points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and
|
||||
# VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also
|
||||
# mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is
|
||||
# mapped to nothing.
|
||||
|
||||
t = []
|
||||
t.append(" 00AD; ; Map to nothing")
|
||||
t.append(" 1806; ; Map to nothing")
|
||||
t.append(" 034F; ; Map to nothing")
|
||||
|
||||
t.append(" 180B; ; Map to nothing")
|
||||
t.append(" 180C; ; Map to nothing")
|
||||
t.append(" 180D; ; Map to nothing")
|
||||
|
||||
t.append(" FE00; ; Map to nothing")
|
||||
t.append(" FE01; ; Map to nothing")
|
||||
t.append(" FE02; ; Map to nothing")
|
||||
t.append(" FE03; ; Map to nothing")
|
||||
t.append(" FE04; ; Map to nothing")
|
||||
t.append(" FE05; ; Map to nothing")
|
||||
t.append(" FE06; ; Map to nothing")
|
||||
t.append(" FE07; ; Map to nothing")
|
||||
t.append(" FE08; ; Map to nothing")
|
||||
t.append(" FE09; ; Map to nothing")
|
||||
t.append(" FE0A; ; Map to nothing")
|
||||
t.append(" FE0B; ; Map to nothing")
|
||||
t.append(" FE0C; ; Map to nothing")
|
||||
t.append(" FE0D; ; Map to nothing")
|
||||
t.append(" FE0E; ; Map to nothing")
|
||||
t.append(" FE0F; ; Map to nothing")
|
||||
|
||||
t.append(" FFFC; ; Map to nothing")
|
||||
|
||||
# CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE
|
||||
# TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR)
|
||||
# (U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020).
|
||||
|
||||
t.append(" 0009; 0020 ; Map to SPACE")
|
||||
t.append(" 000A; 0020 ; Map to SPACE")
|
||||
t.append(" 000B; 0020 ; Map to SPACE")
|
||||
t.append(" 000C; 0020 ; Map to SPACE")
|
||||
t.append(" 000D; 0020 ; Map to SPACE")
|
||||
t.append(" 0085; 0020 ; Map to SPACE")
|
||||
|
||||
# All other control code (e.g., Cc) points or code points with a
|
||||
# control function (e.g., Cf) are mapped to nothing. The following is
|
||||
# a complete list of these code points: U+0000-0008, 000E-001F, 007F-
|
||||
# 0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063,
|
||||
# 206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F.
|
||||
|
||||
t.append(" 0000-0008; ; Map to nothing")
|
||||
t.append(" 000E-001F; ; Map to nothing")
|
||||
t.append(" 007F-0084; ; Map to nothing")
|
||||
t.append(" 0086-009F; ; Map to nothing")
|
||||
t.append(" 06DD; ; Map to nothing")
|
||||
t.append(" 070F; ; Map to nothing")
|
||||
t.append(" 180E; ; Map to nothing")
|
||||
t.append(" 200C-200F; ; Map to nothing")
|
||||
t.append(" 202A-202E; ; Map to nothing")
|
||||
t.append(" 2060-2063; ; Map to nothing")
|
||||
t.append(" 206A-206F; ; Map to nothing")
|
||||
t.append(" FEFF; ; Map to nothing")
|
||||
t.append(" FFF9-FFFB; ; Map to nothing")
|
||||
t.append(" 1D173-1D17A; ; Map to nothing")
|
||||
t.append(" E0001; ; Map to nothing")
|
||||
t.append(" E0020-E007F; ; Map to nothing")
|
||||
|
||||
# ZERO WIDTH SPACE (U+200B) is mapped to nothing. All other code
|
||||
# points with Separator (space, line, or paragraph) property (e.g., Zs,
|
||||
# Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete
|
||||
# list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029,
|
||||
# 202F, 205F, 3000.
|
||||
|
||||
t.append(" 200B; ; Map to nothing")
|
||||
t.append(" 0020; 0020; Map to SPACE")
|
||||
t.append(" 00A0; 0020; Map to SPACE")
|
||||
t.append(" 1680; 0020; Map to SPACE")
|
||||
t.append(" 2000-200A; 0020; Map to SPACE")
|
||||
t.append(" 2028-2029; 0020; Map to SPACE")
|
||||
t.append(" 202F; 0020; Map to SPACE")
|
||||
t.append(" 205F; 0020; Map to SPACE")
|
||||
t.append(" 3000; 0020; Map to SPACE")
|
||||
|
||||
ret["rfc4518-map"] = t
|
||||
|
||||
# For case ignore, numeric, and stored prefix string matching rules,
|
||||
# characters are case folded per B.2 of [RFC3454].
|
||||
|
||||
t = []
|
||||
|
||||
#2.4. Prohibit
|
||||
|
||||
# The REPLACEMENT CHARACTER (U+FFFD) code point is prohibited.
|
||||
|
||||
t.append(" FFFD;")
|
||||
|
||||
ret["rfc4518-error"] = t
|
||||
|
||||
t = []
|
||||
|
||||
|
||||
|
||||
return ret
|
787
lib/wind/rfc4518.txt
Normal file
787
lib/wind/rfc4518.txt
Normal file
@@ -0,0 +1,787 @@
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Network Working Group K. Zeilenga
|
||||
Request for Comments: 4518 OpenLDAP Foundation
|
||||
Category: Standards Track June 2006
|
||||
|
||||
|
||||
Lightweight Directory Access Protocol (LDAP):
|
||||
Internationalized String Preparation
|
||||
|
||||
Status of This Memo
|
||||
|
||||
This document specifies an Internet standards track protocol for the
|
||||
Internet community, and requests discussion and suggestions for
|
||||
improvements. Please refer to the current edition of the "Internet
|
||||
Official Protocol Standards" (STD 1) for the standardization state
|
||||
and status of this protocol. Distribution of this memo is unlimited.
|
||||
|
||||
Copyright Notice
|
||||
|
||||
Copyright (C) The Internet Society (2006).
|
||||
|
||||
Abstract
|
||||
|
||||
The previous Lightweight Directory Access Protocol (LDAP) technical
|
||||
specifications did not precisely define how character string matching
|
||||
is to be performed. This led to a number of usability and
|
||||
interoperability problems. This document defines string preparation
|
||||
algorithms for character-based matching rules defined for use in
|
||||
LDAP.
|
||||
|
||||
1. Introduction
|
||||
|
||||
1.1. Background
|
||||
|
||||
A Lightweight Directory Access Protocol (LDAP) [RFC4510] matching
|
||||
rule [RFC4517] defines an algorithm for determining whether a
|
||||
presented value matches an attribute value in accordance with the
|
||||
criteria defined for the rule. The proposition may be evaluated to
|
||||
True, False, or Undefined.
|
||||
|
||||
True - the attribute contains a matching value,
|
||||
|
||||
False - the attribute contains no matching value,
|
||||
|
||||
Undefined - it cannot be determined whether the attribute contains
|
||||
a matching value.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 1]
|
||||
|
||||
RFC 4518 LDAP: Internationalized String Preparation June 2006
|
||||
|
||||
|
||||
For instance, the caseIgnoreMatch matching rule may be used to
|
||||
compare whether the commonName attribute contains a particular value
|
||||
without regard for case and insignificant spaces.
|
||||
|
||||
1.2. X.500 String Matching Rules
|
||||
|
||||
"X.520: Selected attribute types" [X.520] provides (among other
|
||||
things) value syntaxes and matching rules for comparing values
|
||||
commonly used in the directory [X.500]. These specifications are
|
||||
inadequate for strings composed of Unicode [Unicode] characters.
|
||||
|
||||
The caseIgnoreMatch matching rule [X.520], for example, is simply
|
||||
defined as being a case-insensitive comparison where insignificant
|
||||
spaces are ignored. For printableString, there is only one space
|
||||
character and case mapping is bijective, hence this definition is
|
||||
sufficient. However, for Unicode string types such as
|
||||
universalString, this is not sufficient. For example, a case-
|
||||
insensitive matching implementation that folded lowercase characters
|
||||
to uppercase would yield different results than an implementation
|
||||
that used uppercase to lowercase folding. Or one implementation may
|
||||
view space as referring to only SPACE (U+0020), a second
|
||||
implementation may view any character with the space separator (Zs)
|
||||
property as a space, and another implementation may view any
|
||||
character with the whitespace (WS) category as a space.
|
||||
|
||||
The lack of precise specification for character string matching has
|
||||
led to significant interoperability problems. When used in
|
||||
certificate chain validation, security vulnerabilities can arise. To
|
||||
address these problems, this document defines precise algorithms for
|
||||
preparing character strings for matching.
|
||||
|
||||
1.3. Relationship to "stringprep"
|
||||
|
||||
The character string preparation algorithms described in this
|
||||
document are based upon the "stringprep" approach [RFC3454]. In
|
||||
"stringprep", presented and stored values are first prepared for
|
||||
comparison so that a character-by-character comparison yields the
|
||||
"correct" result.
|
||||
|
||||
The approach used here is a refinement of the "stringprep" [RFC3454]
|
||||
approach. Each algorithm involves two additional preparation steps.
|
||||
|
||||
a) Prior to applying the Unicode string preparation steps outlined in
|
||||
"stringprep", the string is transcoded to Unicode.
|
||||
|
||||
b) After applying the Unicode string preparation steps outlined in
|
||||
"stringprep", the string is modified to appropriately handle
|
||||
characters insignificant to the matching rule.
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 2]
|
||||
|
||||
RFC 4518 LDAP: Internationalized String Preparation June 2006
|
||||
|
||||
|
||||
Hence, preparation of character strings for X.500 [X.500] matching
|
||||
[X.501] involves the following steps:
|
||||
|
||||
1) Transcode
|
||||
2) Map
|
||||
3) Normalize
|
||||
4) Prohibit
|
||||
5) Check Bidi (Bidirectional)
|
||||
6) Insignificant Character Handling
|
||||
|
||||
These steps are described in Section 2.
|
||||
|
||||
It is noted that while various tables of Unicode characters included
|
||||
or referenced by this specification are derived from Unicode
|
||||
[Unicode] data, these tables are to be considered definitive for the
|
||||
purpose of implementing this specification.
|
||||
|
||||
1.4. Relationship to the LDAP Technical Specification
|
||||
|
||||
This document is an integral part of the LDAP technical specification
|
||||
[RFC4510], which obsoletes the previously defined LDAP technical
|
||||
specification [RFC3377] in its entirety.
|
||||
|
||||
This document details new LDAP internationalized character string
|
||||
preparation algorithms used by [RFC4517] and possible other technical
|
||||
specifications defining LDAP syntaxes and/or matching rules.
|
||||
|
||||
1.5. Relationship to X.500
|
||||
|
||||
LDAP is defined [RFC4510] in X.500 terms as an X.500 access
|
||||
mechanism. As such, there is a strong desire for alignment between
|
||||
LDAP and X.500 syntax and semantics. The character string
|
||||
preparation algorithms described in this document are based upon
|
||||
"Internationalized String Matching Rules for X.500" [XMATCH] proposal
|
||||
to ITU/ISO Joint Study Group 2.
|
||||
|
||||
1.6. Conventions and Terms
|
||||
|
||||
The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT",
|
||||
"SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this
|
||||
document are to be interpreted as described in BCP 14 [RFC2119].
|
||||
|
||||
Character names in this document use the notation for code points and
|
||||
names from the Unicode Standard [Unicode]. For example, the letter
|
||||
"a" may be represented as either <U+0061> or <LATIN SMALL LETTER A>.
|
||||
In the lists of mappings and the prohibited characters, the "U+" is
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 3]
|
||||
|
||||
RFC 4518 LDAP: Internationalized String Preparation June 2006
|
||||
|
||||
|
||||
left off to make the lists easier to read. The comments for
|
||||
character ranges are shown in square brackets (such as "[CONTROL
|
||||
CHARACTERS]") and do not come from the standard.
|
||||
|
||||
Note: a glossary of terms used in Unicode can be found in [Glossary].
|
||||
Information on the Unicode character encoding model can be found in
|
||||
[CharModel].
|
||||
|
||||
The term "combining mark", as used in this specification, refers to
|
||||
any Unicode [Unicode] code point that has a mark property (Mn, Mc,
|
||||
Me). Appendix A provides a definitive list of combining marks.
|
||||
|
||||
2. String Preparation
|
||||
|
||||
The following six-step process SHALL be applied to each presented and
|
||||
attribute value in preparation for character string matching rule
|
||||
evaluation.
|
||||
|
||||
1) Transcode
|
||||
2) Map
|
||||
3) Normalize
|
||||
4) Prohibit
|
||||
5) Check bidi
|
||||
6) Insignificant Character Handling
|
||||
|
||||
Failure in any step causes the assertion to evaluate to Undefined.
|
||||
|
||||
The character repertoire of this process is Unicode 3.2 [Unicode].
|
||||
|
||||
Note that this six-step process specification is intended to describe
|
||||
expected matching behavior. Implementations are free to use
|
||||
alternative processes so long as the matching rule evaluation
|
||||
behavior provided is consistent with the behavior described by this
|
||||
specification.
|
||||
|
||||
2.1. Transcode
|
||||
|
||||
Each non-Unicode string value is transcoded to Unicode.
|
||||
|
||||
PrintableString [X.680] values are transcoded directly to Unicode.
|
||||
|
||||
UniversalString, UTF8String, and bmpString [X.680] values need not be
|
||||
transcoded as they are Unicode-based strings (in the case of
|
||||
bmpString, a subset of Unicode).
|
||||
|
||||
TeletexString [X.680] values are transcoded to Unicode. As there is
|
||||
no standard for mapping TeletexString values to Unicode, the mapping
|
||||
is left a local matter.
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 4]
|
||||
|
||||
RFC 4518 LDAP: Internationalized String Preparation June 2006
|
||||
|
||||
|
||||
For these and other reasons, use of TeletexString is NOT RECOMMENDED.
|
||||
|
||||
The output is the transcoded string.
|
||||
|
||||
2.2. Map
|
||||
|
||||
SOFT HYPHEN (U+00AD) and MONGOLIAN TODO SOFT HYPHEN (U+1806) code
|
||||
points are mapped to nothing. COMBINING GRAPHEME JOINER (U+034F) and
|
||||
VARIATION SELECTORs (U+180B-180D, FF00-FE0F) code points are also
|
||||
mapped to nothing. The OBJECT REPLACEMENT CHARACTER (U+FFFC) is
|
||||
mapped to nothing.
|
||||
|
||||
CHARACTER TABULATION (U+0009), LINE FEED (LF) (U+000A), LINE
|
||||
TABULATION (U+000B), FORM FEED (FF) (U+000C), CARRIAGE RETURN (CR)
|
||||
(U+000D), and NEXT LINE (NEL) (U+0085) are mapped to SPACE (U+0020).
|
||||
|
||||
All other control code (e.g., Cc) points or code points with a
|
||||
control function (e.g., Cf) are mapped to nothing. The following is
|
||||
a complete list of these code points: U+0000-0008, 000E-001F, 007F-
|
||||
0084, 0086-009F, 06DD, 070F, 180E, 200C-200F, 202A-202E, 2060-2063,
|
||||
206A-206F, FEFF, FFF9-FFFB, 1D173-1D17A, E0001, E0020-E007F.
|
||||
|
||||
ZERO WIDTH SPACE (U+200B) is mapped to nothing. All other code
|
||||
points with Separator (space, line, or paragraph) property (e.g., Zs,
|
||||
Zl, or Zp) are mapped to SPACE (U+0020). The following is a complete
|
||||
list of these code points: U+0020, 00A0, 1680, 2000-200A, 2028-2029,
|
||||
202F, 205F, 3000.
|
||||
|
||||
For case ignore, numeric, and stored prefix string matching rules,
|
||||
characters are case folded per B.2 of [RFC3454].
|
||||
|
||||
The output is the mapped string.
|
||||
|
||||
2.3. Normalize
|
||||
|
||||
The input string is to be normalized to Unicode Form KC
|
||||
(compatibility composed) as described in [UAX15]. The output is the
|
||||
normalized string.
|
||||
|
||||
2.4. Prohibit
|
||||
|
||||
All Unassigned code points are prohibited. Unassigned code points
|
||||
are listed in Table A.1 of [RFC3454].
|
||||
|
||||
Characters that, per Section 5.8 of [RFC3454], change display
|
||||
properties or are deprecated are prohibited. These characters are
|
||||
listed in Table C.8 of [RFC3454].
|
||||
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 5]
|
||||
|
||||
RFC 4518 LDAP: Internationalized String Preparation June 2006
|
||||
|
||||
|
||||
Private Use code points are prohibited. These characters are listed
|
||||
in Table C.3 of [RFC3454].
|
||||
|
||||
All non-character code points are prohibited. These code points are
|
||||
listed in Table C.4 of [RFC3454].
|
||||
|
||||
Surrogate codes are prohibited. These characters are listed in Table
|
||||
C.5 of [RFC3454].
|
||||
|
||||
The REPLACEMENT CHARACTER (U+FFFD) code point is prohibited.
|
||||
|
||||
The step fails if the input string contains any prohibited code
|
||||
point. Otherwise, the output is the input string.
|
||||
|
||||
2.5. Check bidi
|
||||
|
||||
Bidirectional characters are ignored.
|
||||
|
||||
2.6. Insignificant Character Handling
|
||||
|
||||
In this step, the string is modified to ensure proper handling of
|
||||
characters insignificant to the matching rule. This modification
|
||||
differs from matching rule to matching rule.
|
||||
|
||||
Section 2.6.1 applies to case ignore and exact string matching.
|
||||
Section 2.6.2 applies to numericString matching.
|
||||
Section 2.6.3 applies to telephoneNumber matching.
|
||||
|
||||
2.6.1. Insignificant Space Handling
|
||||
|
||||
For the purposes of this section, a space is defined to be the SPACE
|
||||
(U+0020) code point followed by no combining marks.
|
||||
|
||||
NOTE - The previous steps ensure that the string cannot contain
|
||||
any code points in the separator class, other than SPACE
|
||||
(U+0020).
|
||||
|
||||
For input strings that are attribute values or non-substring
|
||||
assertion values: If the input string contains no non-space
|
||||
character, then the output is exactly two SPACEs. Otherwise (the
|
||||
input string contains at least one non-space character), the string
|
||||
is modified such that the string starts with exactly one space
|
||||
character, ends with exactly one SPACE character, and any inner
|
||||
(non-empty) sequence of space characters is replaced with exactly two
|
||||
SPACE characters. For instance, the input strings
|
||||
"foo<SPACE>bar<SPACE><SPACE>", result in the output
|
||||
"<SPACE>foo<SPACE><SPACE>bar<SPACE>".
|
||||
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 6]
|
||||
|
||||
RFC 4518 LDAP: Internationalized String Preparation June 2006
|
||||
|
||||
|
||||
For input strings that are substring assertion values: If the string
|
||||
being prepared contains no non-space characters, then the output
|
||||
string is exactly one SPACE. Otherwise, the following steps are
|
||||
taken:
|
||||
|
||||
- If the input string is an initial substring, it is modified to
|
||||
start with exactly one SPACE character;
|
||||
|
||||
- If the input string is an initial or an any substring that ends in
|
||||
one or more space characters, it is modified to end with exactly
|
||||
one SPACE character;
|
||||
|
||||
- If the input string is an any or a final substring that starts in
|
||||
one or more space characters, it is modified to start with exactly
|
||||
one SPACE character; and
|
||||
|
||||
- If the input string is a final substring, it is modified to end
|
||||
with exactly one SPACE character.
|
||||
|
||||
For instance, for the input string "foo<SPACE>bar<SPACE><SPACE>" as
|
||||
an initial substring, the output would be
|
||||
"<SPACE>foo<SPACE><SPACE>bar<SPACE>". As an any or final substring,
|
||||
the same input would result in "foo<SPACE>bar<SPACE>".
|
||||
|
||||
Appendix B discusses the rationale for the behavior.
|
||||
|
||||
2.6.2. numericString Insignificant Character Handling
|
||||
|
||||
For the purposes of this section, a space is defined to be the SPACE
|
||||
(U+0020) code point followed by no combining marks.
|
||||
|
||||
All spaces are regarded as insignificant and are to be removed.
|
||||
|
||||
For example, removal of spaces from the Form KC string:
|
||||
"<SPACE><SPACE>123<SPACE><SPACE>456<SPACE><SPACE>"
|
||||
would result in the output string:
|
||||
"123456"
|
||||
and the Form KC string:
|
||||
"<SPACE><SPACE><SPACE>"
|
||||
would result in the output string:
|
||||
"" (an empty string).
|
||||
|
||||
2.6.3. telephoneNumber Insignificant Character Handling
|
||||
|
||||
For the purposes of this section, a hyphen is defined to be a
|
||||
HYPHEN-MINUS (U+002D), ARMENIAN HYPHEN (U+058A), HYPHEN (U+2010),
|
||||
NON-BREAKING HYPHEN (U+2011), MINUS SIGN (U+2212), SMALL HYPHEN-MINUS
|
||||
(U+FE63), or FULLWIDTH HYPHEN-MINUS (U+FF0D) code point followed by
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 7]
|
||||
|
||||
RFC 4518 LDAP: Internationalized String Preparation June 2006
|
||||
|
||||
|
||||
no combining marks and a space is defined to be the SPACE (U+0020)
|
||||
code point followed by no combining marks.
|
||||
|
||||
All hyphens and spaces are considered insignificant and are to be
|
||||
removed.
|
||||
|
||||
For example, removal of hyphens and spaces from the Form KC string:
|
||||
"<SPACE><HYPHEN>123<SPACE><SPACE>456<SPACE><HYPHEN>"
|
||||
would result in the output string:
|
||||
"123456"
|
||||
and the Form KC string:
|
||||
"<HYPHEN><HYPHEN><HYPHEN>"
|
||||
would result in the (empty) output string:
|
||||
"".
|
||||
|
||||
3. Security Considerations
|
||||
|
||||
"Preparation of Internationalized Strings ("stringprep")" [RFC3454]
|
||||
security considerations generally apply to the algorithms described
|
||||
here.
|
||||
|
||||
4. Acknowledgements
|
||||
|
||||
The approach used in this document is based upon design principles
|
||||
and algorithms described in "Preparation of Internationalized Strings
|
||||
('stringprep')" [RFC3454] by Paul Hoffman and Marc Blanchet. Some
|
||||
additional guidance was drawn from Unicode Technical Standards,
|
||||
Technical Reports, and Notes.
|
||||
|
||||
This document is a product of the IETF LDAP Revision (LDAPBIS)
|
||||
Working Group.
|
||||
|
||||
5. References
|
||||
|
||||
5.1. Normative References
|
||||
|
||||
[RFC2119] Bradner, S., "Key words for use in RFCs to Indicate
|
||||
Requirement Levels", BCP 14, RFC 2119, March 1997.
|
||||
|
||||
[RFC3454] Hoffman, P. and M. Blanchet, "Preparation of
|
||||
Internationalized Strings ("stringprep")", RFC 3454,
|
||||
December 2002.
|
||||
|
||||
[RFC4510] Zeilenga, K., "Lightweight Directory Access Protocol
|
||||
(LDAP): Technical Specification Road Map", RFC 4510,
|
||||
June 2006.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 8]
|
||||
|
||||
RFC 4518 LDAP: Internationalized String Preparation June 2006
|
||||
|
||||
|
||||
[RFC4517] Legg, S., Ed., "Lightweight Directory Access Protocol
|
||||
(LDAP): Syntaxes and Matching Rules", RFC 4517, June
|
||||
2006.
|
||||
|
||||
[Unicode] The Unicode Consortium, "The Unicode Standard, Version
|
||||
3.2.0" is defined by "The Unicode Standard, Version
|
||||
3.0" (Reading, MA, Addison-Wesley, 2000. ISBN 0-201-
|
||||
61633-5), as amended by the "Unicode Standard Annex
|
||||
#27: Unicode 3.1"
|
||||
(http://www.unicode.org/reports/tr27/) and by the
|
||||
"Unicode Standard Annex #28: Unicode 3.2"
|
||||
(http://www.unicode.org/reports/tr28/).
|
||||
|
||||
[UAX15] Davis, M. and M. Duerst, "Unicode Standard Annex #15:
|
||||
Unicode Normalization Forms, Version 3.2.0".
|
||||
<http://www.unicode.org/unicode/reports/tr15/tr15-
|
||||
22.html>, March 2002.
|
||||
|
||||
[X.680] International Telecommunication Union -
|
||||
Telecommunication Standardization Sector, "Abstract
|
||||
Syntax Notation One (ASN.1) - Specification of Basic
|
||||
Notation", X.680(2002) (also ISO/IEC 8824-1:2002).
|
||||
|
||||
5.2. Informative References
|
||||
|
||||
[X.500] International Telecommunication Union -
|
||||
Telecommunication Standardization Sector, "The
|
||||
Directory -- Overview of concepts, models and
|
||||
services," X.500(1993) (also ISO/IEC 9594-1:1994).
|
||||
|
||||
[X.501] International Telecommunication Union -
|
||||
Telecommunication Standardization Sector, "The
|
||||
Directory -- Models," X.501(1993) (also ISO/IEC 9594-
|
||||
2:1994).
|
||||
|
||||
[X.520] International Telecommunication Union -
|
||||
Telecommunication Standardization Sector, "The
|
||||
Directory: Selected Attribute Types", X.520(1993) (also
|
||||
ISO/IEC 9594-6:1994).
|
||||
|
||||
[Glossary] The Unicode Consortium, "Unicode Glossary",
|
||||
<http://www.unicode.org/glossary/>.
|
||||
|
||||
[CharModel] Whistler, K. and M. Davis, "Unicode Technical Report
|
||||
#17, Character Encoding Model", UTR17,
|
||||
<http://www.unicode.org/unicode/reports/tr17/>, August
|
||||
2000.
|
||||
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 9]
|
||||
|
||||
RFC 4518 LDAP: Internationalized String Preparation June 2006
|
||||
|
||||
|
||||
[RFC3377] Hodges, J. and R. Morgan, "Lightweight Directory Access
|
||||
Protocol (v3): Technical Specification", RFC 3377,
|
||||
September 2002.
|
||||
|
||||
[RFC4515] Smith, M., Ed. and T. Howes, "Lightweight Directory
|
||||
Access Protocol (LDAP): String Representation of Search
|
||||
Filters", RFC 4515, June 2006.
|
||||
|
||||
[XMATCH] Zeilenga, K., "Internationalized String Matching Rules
|
||||
for X.500", Work in Progress.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 10]
|
||||
|
||||
RFC 4518 LDAP: Internationalized String Preparation June 2006
|
||||
|
||||
|
||||
Appendix A. Combining Marks
|
||||
|
||||
This appendix is normative.
|
||||
|
||||
This table was derived from Unicode [Unicode] data files; it lists
|
||||
all code points with the Mn, Mc, or Me properties. This table is to
|
||||
be considered definitive for the purposes of implementation of this
|
||||
specification.
|
||||
|
||||
0300-034F 0360-036F 0483-0486 0488-0489 0591-05A1
|
||||
05A3-05B9 05BB-05BC 05BF 05C1-05C2 05C4 064B-0655 0670
|
||||
06D6-06DC 06DE-06E4 06E7-06E8 06EA-06ED 0711 0730-074A
|
||||
07A6-07B0 0901-0903 093C 093E-094F 0951-0954 0962-0963
|
||||
0981-0983 09BC 09BE-09C4 09C7-09C8 09CB-09CD 09D7
|
||||
09E2-09E3 0A02 0A3C 0A3E-0A42 0A47-0A48 0A4B-0A4D
|
||||
0A70-0A71 0A81-0A83 0ABC 0ABE-0AC5 0AC7-0AC9 0ACB-0ACD
|
||||
0B01-0B03 0B3C 0B3E-0B43 0B47-0B48 0B4B-0B4D 0B56-0B57
|
||||
0B82 0BBE-0BC2 0BC6-0BC8 0BCA-0BCD 0BD7 0C01-0C03
|
||||
0C3E-0C44 0C46-0C48 0C4A-0C4D 0C55-0C56 0C82-0C83
|
||||
0CBE-0CC4 0CC6-0CC8 0CCA-0CCD 0CD5-0CD6 0D02-0D03
|
||||
0D3E-0D43 0D46-0D48 0D4A-0D4D 0D57 0D82-0D83 0DCA
|
||||
0DCF-0DD4 0DD6 0DD8-0DDF 0DF2-0DF3 0E31 0E34-0E3A
|
||||
0E47-0E4E 0EB1 0EB4-0EB9 0EBB-0EBC 0EC8-0ECD 0F18-0F19
|
||||
0F35 0F37 0F39 0F3E-0F3F 0F71-0F84 0F86-0F87 0F90-0F97
|
||||
0F99-0FBC 0FC6 102C-1032 1036-1039 1056-1059 1712-1714
|
||||
1732-1734 1752-1753 1772-1773 17B4-17D3 180B-180D 18A9
|
||||
20D0-20EA 302A-302F 3099-309A FB1E FE00-FE0F FE20-FE23
|
||||
1D165-1D169 1D16D-1D172 1D17B-1D182 1D185-1D18B
|
||||
1D1AA-1D1AD
|
||||
|
||||
Appendix B. Substrings Matching
|
||||
|
||||
This appendix is non-normative.
|
||||
|
||||
In the absence of substrings matching, the insignificant space
|
||||
handling for case ignore/exact matching could be simplified.
|
||||
Specifically, the handling could be to require that all sequences of
|
||||
one or more spaces be replaced with one space and, if the string
|
||||
contains non-space characters, removal of all leading spaces and
|
||||
trailing spaces.
|
||||
|
||||
In the presence of substrings matching, this simplified space
|
||||
handling would lead to unexpected and undesirable matching behavior.
|
||||
For instance:
|
||||
|
||||
1) (CN=foo\20*\20bar) would match the CN value "foobar";
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 11]
|
||||
|
||||
RFC 4518 LDAP: Internationalized String Preparation June 2006
|
||||
|
||||
|
||||
2) (CN=*\20foobar\20*) would match "foobar", but
|
||||
(CN=*\20*foobar*\20*) would not.
|
||||
|
||||
Note to readers not familiar with LDAP substrings matching: the LDAP
|
||||
filter [RFC4515] assertion (CN=A*B*C) says to "match any value (of
|
||||
the attribute CN) that begins with A, contains B after A, ends with C
|
||||
where C is also after B."
|
||||
|
||||
The first case illustrates that this simplified space handling would
|
||||
cause leading and trailing spaces in substrings of the string to be
|
||||
regarded as insignificant. However, only leading and trailing (as
|
||||
well as multiple consecutive spaces) of the string (as a whole) are
|
||||
insignificant.
|
||||
|
||||
The second case illustrates that this simplified space handling would
|
||||
cause sub-partitioning failures. That is, if a prepared any
|
||||
substring matches a partition of the attribute value, then an
|
||||
assertion constructed by subdividing that substring into multiple
|
||||
substrings should also match.
|
||||
|
||||
In designing an appropriate approach for space handling for
|
||||
substrings matching, one must study key aspects of X.500 case
|
||||
exact/ignore matching. X.520 [X.520] says:
|
||||
|
||||
The [substrings] rule returns TRUE if there is a partitioning of
|
||||
the attribute value (into portions) such that:
|
||||
|
||||
- the specified substrings (initial, any, final) match
|
||||
different portions of the value in the order of the strings
|
||||
sequence;
|
||||
|
||||
- initial, if present, matches the first portion of the value;
|
||||
|
||||
- final, if present, matches the last portion of the value;
|
||||
|
||||
- any, if present, matches some arbitrary portion of the
|
||||
value.
|
||||
|
||||
That is, the substrings assertion (CN=foo\20*\20bar) matches the
|
||||
attribute value "foo<SPACE><SPACE>bar" as the value can be
|
||||
partitioned into the portions "foo<SPACE>" and "<SPACE>bar" meeting
|
||||
the above requirements.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 12]
|
||||
|
||||
RFC 4518 LDAP: Internationalized String Preparation June 2006
|
||||
|
||||
|
||||
X.520 also says:
|
||||
|
||||
[T]he following spaces are regarded as not significant:
|
||||
|
||||
- leading spaces (i.e., those preceding the first character
|
||||
that is not a space);
|
||||
|
||||
- trailing spaces (i.e., those following the last character
|
||||
that is not a space);
|
||||
|
||||
- multiple consecutive spaces (these are taken as equivalent
|
||||
to a single space character).
|
||||
|
||||
This statement applies to the assertion values and attribute values
|
||||
as whole strings, and not individually to substrings of an assertion
|
||||
value. In particular, the statements should be taken to mean that if
|
||||
an assertion value and attribute value match without any
|
||||
consideration to insignificant characters, then that assertion value
|
||||
should also match any attribute value that differs only by inclusion
|
||||
nor removal of insignificant characters.
|
||||
|
||||
Hence the assertion (CN=foo\20*\20bar) matches
|
||||
"foo<SPACE><SPACE><SPACE>bar" and "foo<SPACE>bar" as these values
|
||||
only differ from "foo<SPACE><SPACE>bar" by the inclusion or removal
|
||||
of insignificant spaces.
|
||||
|
||||
Astute readers of this text will also note that there are special
|
||||
cases where the specified space handling does not ignore spaces that
|
||||
could be considered insignificant. For instance, the assertion
|
||||
(CN=\20*\20*\20) does not match "<SPACE><SPACE><SPACE>"
|
||||
(insignificant spaces present in value) or " " (insignificant spaces
|
||||
not present in value). However, as these cases have no practical
|
||||
application that cannot be met by simple assertions, e.g., (cn=\20),
|
||||
and this minor anomaly can only be fully addressed by a preparation
|
||||
algorithm to be used in conjunction with character-by-character
|
||||
partitioning and matching, the anomaly is considered acceptable.
|
||||
|
||||
Author's Address
|
||||
|
||||
Kurt D. Zeilenga
|
||||
OpenLDAP Foundation
|
||||
|
||||
EMail: Kurt@OpenLDAP.org
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 13]
|
||||
|
||||
RFC 4518 LDAP: Internationalized String Preparation June 2006
|
||||
|
||||
|
||||
Full Copyright Statement
|
||||
|
||||
Copyright (C) The Internet Society (2006).
|
||||
|
||||
This document is subject to the rights, licenses and restrictions
|
||||
contained in BCP 78, and except as set forth therein, the authors
|
||||
retain all their rights.
|
||||
|
||||
This document and the information contained herein are provided on an
|
||||
"AS IS" basis and THE CONTRIBUTOR, THE ORGANIZATION HE/SHE REPRESENTS
|
||||
OR IS SPONSORED BY (IF ANY), THE INTERNET SOCIETY AND THE INTERNET
|
||||
ENGINEERING TASK FORCE DISCLAIM ALL WARRANTIES, EXPRESS OR IMPLIED,
|
||||
INCLUDING BUT NOT LIMITED TO ANY WARRANTY THAT THE USE OF THE
|
||||
INFORMATION HEREIN WILL NOT INFRINGE ANY RIGHTS OR ANY IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE.
|
||||
|
||||
Intellectual Property
|
||||
|
||||
The IETF takes no position regarding the validity or scope of any
|
||||
Intellectual Property Rights or other rights that might be claimed to
|
||||
pertain to the implementation or use of the technology described in
|
||||
this document or the extent to which any license under such rights
|
||||
might or might not be available; nor does it represent that it has
|
||||
made any independent effort to identify any such rights. Information
|
||||
on the procedures with respect to rights in RFC documents can be
|
||||
found in BCP 78 and BCP 79.
|
||||
|
||||
Copies of IPR disclosures made to the IETF Secretariat and any
|
||||
assurances of licenses to be made available, or the result of an
|
||||
attempt made to obtain a general license or permission for the use of
|
||||
such proprietary rights by implementers or users of this
|
||||
specification can be obtained from the IETF on-line IPR repository at
|
||||
http://www.ietf.org/ipr.
|
||||
|
||||
The IETF invites any interested party to bring to its attention any
|
||||
copyrights, patents or patent applications, or other proprietary
|
||||
rights that may cover technology that may be required to implement
|
||||
this standard. Please address the information to the IETF at
|
||||
ietf-ipr@ietf.org.
|
||||
|
||||
Acknowledgement
|
||||
|
||||
Funding for the RFC Editor function is provided by the IETF
|
||||
Administrative Support Activity (IASA).
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Zeilenga Standards Track [Page 14]
|
||||
|
117
lib/wind/stringprep.c
Normal file
117
lib/wind/stringprep.c
Normal file
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
* Copyright (c) 2004, 2006, 2008 Kungliga Tekniska H<>gskolan
|
||||
* (Royal Institute of Technology, Stockholm, Sweden).
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the Institute nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
#include "windlocl.h"
|
||||
#include <stdlib.h>
|
||||
#include <strings.h>
|
||||
|
||||
RCSID("$Id$");
|
||||
|
||||
/**
|
||||
* Process a input UCS4 string according a string-prep profile.
|
||||
*
|
||||
* @param in input UCS4 string to process
|
||||
* @param in_len length of the input string
|
||||
* @param out output UCS4 string
|
||||
* @param out_len length of the output string.
|
||||
* @param flags stringprep profile.
|
||||
*
|
||||
* @return returns 0 on success, an wind error code otherwise
|
||||
* @ingroup wind
|
||||
*/
|
||||
|
||||
int
|
||||
wind_stringprep(const uint32_t *in, size_t in_len,
|
||||
uint32_t *out, size_t *out_len,
|
||||
wind_profile_flags flags)
|
||||
{
|
||||
size_t tmp_len = in_len * 3;
|
||||
uint32_t *tmp = malloc(tmp_len * sizeof(uint32_t));
|
||||
int ret;
|
||||
|
||||
if (tmp == NULL)
|
||||
return -1;
|
||||
|
||||
ret = _wind_stringprep_map(in, in_len, tmp, &tmp_len, flags);
|
||||
if (ret) {
|
||||
free(tmp);
|
||||
return ret;
|
||||
}
|
||||
ret = _wind_stringprep_normalize(tmp, tmp_len, out, out_len);
|
||||
free(tmp);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = _wind_stringprep_prohibited(out, *out_len, flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
ret = _wind_stringprep_testbidi(out, *out_len, flags);
|
||||
if (ret)
|
||||
return ret;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct {
|
||||
const char *name;
|
||||
wind_profile_flags flags;
|
||||
} profiles[] = {
|
||||
{ "nameprep", WIND_PROFILE_NAME },
|
||||
{ "saslprep", WIND_PROFILE_SASL },
|
||||
{ "ldapprep", WIND_PROFILE_LDAP }
|
||||
};
|
||||
|
||||
/**
|
||||
* Try to find the profile given a name.
|
||||
*
|
||||
* @param name name of the profile.
|
||||
* @param flags the resulting profile.
|
||||
*
|
||||
* @return returns 0 on success, an wind error code otherwise
|
||||
* @ingroup wind
|
||||
*/
|
||||
|
||||
int
|
||||
wind_profile(const char *name, wind_profile_flags *flags)
|
||||
{
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < sizeof(profiles)/sizeof(profiles[0]); i++) {
|
||||
if (strcasecmp(profiles[i].name, name) == 0) {
|
||||
*flags = profiles[i].flags;
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
90
lib/wind/stringprep.py
Normal file
90
lib/wind/stringprep.py
Normal file
@@ -0,0 +1,90 @@
|
||||
#!/usr/local/bin/python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
|
||||
# $Id$
|
||||
|
||||
# Copyright (c) 2008 Kungliga Tekniska H<>gskolan
|
||||
# (Royal Institute of Technology, Stockholm, Sweden).
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the Institute nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
# SUCH DAMAGE.
|
||||
|
||||
import re
|
||||
import string
|
||||
|
||||
def _merge_table(res, source):
|
||||
for table in source.keys():
|
||||
res[table] = res.get(table, []) + source.get(table, [])
|
||||
|
||||
name_error = ['C.1.2', 'C.2.2', 'C.3', 'C.4', 'C.5', 'C.6', 'C.7', 'C.8', 'C.9']
|
||||
ldap_error = ['A.1', 'C.3', 'C.4', 'C.5', 'C.8', 'rfc4518-error' ]
|
||||
sasl_error = ['C.1.2', 'C.2.1', 'C.2.2', 'C.3', 'C.4', 'C.5', 'C.6', 'C.7', 'C.8', 'C.9']
|
||||
|
||||
name_map = ['B.1', 'B.2']
|
||||
ldap_map = ['rfc4518-map', 'B.2']
|
||||
sasl_map = ['C.1.2', 'B.1']
|
||||
|
||||
def symbols(tabledict, tables):
|
||||
"""return CPP symbols to use for this symbols"""
|
||||
list = []
|
||||
for x in tables:
|
||||
list = list + tabledict.get(x, [])
|
||||
if len(list) == 0:
|
||||
return ""
|
||||
return "|".join(map(lambda x: "WIND_PROFILE_%s" % (string.upper(x)), list))
|
||||
|
||||
def get_errorlist():
|
||||
d = dict()
|
||||
_merge_table(d, dict(map(lambda x: [x, ['name']], name_error)))
|
||||
_merge_table(d, dict(map(lambda x: [x, ['ldap']], ldap_error)))
|
||||
_merge_table(d, dict(map(lambda x: [x, ['sasl']], sasl_error)))
|
||||
return d
|
||||
|
||||
def get_maplist():
|
||||
d = dict()
|
||||
_merge_table(d, dict(map(lambda x: [x, ['name']], name_map)))
|
||||
_merge_table(d, dict(map(lambda x: [x, ['ldap']], ldap_map)))
|
||||
_merge_table(d, dict(map(lambda x: [x, ['sasl']], sasl_map)))
|
||||
return d
|
||||
|
||||
def sort_merge_trans(trans):
|
||||
trans.sort()
|
||||
ret = []
|
||||
last = 0
|
||||
for x in trans:
|
||||
if last:
|
||||
if last[0] == x[0]:
|
||||
last = (last[0], last[1], last[2], last[3] + x[3])
|
||||
else:
|
||||
ret.append(last)
|
||||
last = x
|
||||
else:
|
||||
last = x
|
||||
if last:
|
||||
ret.append(last)
|
||||
return ret
|
83
lib/wind/test-bidi.c
Normal file
83
lib/wind/test-bidi.c
Normal file
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
* (Royal Institute of Technology, Stockholm, Sweden).
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the Institute nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include "windlocl.h"
|
||||
|
||||
RCSID("$Id$");
|
||||
|
||||
#define MAX_LENGTH 10
|
||||
|
||||
struct test {
|
||||
unsigned len;
|
||||
uint32_t vals[MAX_LENGTH];
|
||||
};
|
||||
|
||||
static struct test passing_cases[] = {
|
||||
{0, {}},
|
||||
{1, {0x0041}},
|
||||
{1, {0x05be}},
|
||||
};
|
||||
|
||||
static struct test failing_cases[] = {
|
||||
{2, {0x05be, 0x0041}},
|
||||
{3, {0x05be, 0x0041, 0x05be}},
|
||||
};
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned failures = 0;
|
||||
|
||||
for (i = 0; i < sizeof(passing_cases)/sizeof(passing_cases[0]); ++i) {
|
||||
const struct test *t = &passing_cases[i];
|
||||
if (_wind_stringprep_testbidi(t->vals, t->len, WIND_PROFILE_NAME)) {
|
||||
printf ("passing case %u failed\n", i);
|
||||
++failures;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < sizeof(failing_cases)/sizeof(failing_cases[0]); ++i) {
|
||||
const struct test *t = &failing_cases[i];
|
||||
if (!_wind_stringprep_testbidi(t->vals, t->len, WIND_PROFILE_NAME)) {
|
||||
printf ("failing case %u passed\n", i);
|
||||
++failures;
|
||||
}
|
||||
}
|
||||
|
||||
return failures != 0;
|
||||
}
|
99
lib/wind/test-map.c
Normal file
99
lib/wind/test-map.c
Normal file
@@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
* (Royal Institute of Technology, Stockholm, Sweden).
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the Institute nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <err.h>
|
||||
|
||||
#include "windlocl.h"
|
||||
|
||||
RCSID("$Id$");
|
||||
|
||||
#define MAX_LENGTH 2
|
||||
|
||||
struct example {
|
||||
uint32_t in[MAX_LENGTH];
|
||||
size_t in_len;
|
||||
uint32_t out[MAX_LENGTH];
|
||||
size_t out_len;
|
||||
};
|
||||
|
||||
static struct example cases[] = {
|
||||
{{}, 0, {}, 0},
|
||||
{{0x0041}, 1, {0x0061}, 1},
|
||||
{{0x0061}, 1, {0x0061}, 1},
|
||||
{{0x00AD}, 1, {}, 0},
|
||||
{{0x00DF}, 1, {0x0073, 0x0073}, 2}
|
||||
};
|
||||
|
||||
static int
|
||||
try(const struct example *c)
|
||||
{
|
||||
int ret;
|
||||
size_t out_len = c->out_len;
|
||||
uint32_t *tmp = malloc(out_len * sizeof(uint32_t));
|
||||
if (tmp == NULL)
|
||||
err(1, "malloc");
|
||||
ret = _wind_stringprep_map(c->in, c->in_len, tmp, &out_len, WIND_PROFILE_NAME);
|
||||
if (ret) {
|
||||
printf("wind_stringprep_map failed\n");
|
||||
return 1;
|
||||
}
|
||||
if (out_len != c->out_len) {
|
||||
printf("wrong out len\n");
|
||||
free(tmp);
|
||||
return 1;
|
||||
}
|
||||
if (memcmp(c->out, tmp, out_len * sizeof(uint32_t)) != 0) {
|
||||
printf("wrong out data\n");
|
||||
free(tmp);
|
||||
return 1;
|
||||
}
|
||||
free(tmp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned failures = 0;
|
||||
|
||||
for (i = 0; i < sizeof(cases)/sizeof(cases[0]); ++i)
|
||||
failures += try(&cases[i]);
|
||||
return failures != 0;
|
||||
}
|
161
lib/wind/test-normalize.c
Normal file
161
lib/wind/test-normalize.c
Normal file
@@ -0,0 +1,161 @@
|
||||
/*
|
||||
* Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
* (Royal Institute of Technology, Stockholm, Sweden).
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the Institute nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <err.h>
|
||||
|
||||
#include "windlocl.h"
|
||||
|
||||
RCSID("$Id$");
|
||||
|
||||
#define MAX_LENGTH 20
|
||||
|
||||
static size_t
|
||||
parse_vector(char *buf, uint32_t *v)
|
||||
{
|
||||
char *last;
|
||||
unsigned ret = 0;
|
||||
const char *n;
|
||||
unsigned u;
|
||||
|
||||
for(n = strtok_r(buf, " ", &last);
|
||||
n != NULL;
|
||||
n = strtok_r(NULL, " ", &last)) {
|
||||
if (ret >= MAX_LENGTH) {
|
||||
errx(1, "increase MAX_LENGTH");
|
||||
}
|
||||
if (sscanf(n, "%x", &u) != 1) {
|
||||
errx(1, "failed to parse hex: %s", n);
|
||||
}
|
||||
v[ret] = u;
|
||||
++ret;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
test(char *buf)
|
||||
{
|
||||
char *last;
|
||||
char *c;
|
||||
uint32_t in[MAX_LENGTH];
|
||||
size_t in_len;
|
||||
uint32_t out[MAX_LENGTH];
|
||||
size_t out_len;
|
||||
uint32_t *tmp;
|
||||
size_t norm_len;
|
||||
int ret;
|
||||
|
||||
c = strtok_r(buf, ";", &last);
|
||||
if (c == NULL)
|
||||
return 0;
|
||||
|
||||
in_len = parse_vector(c, in);
|
||||
if (strtok_r(NULL, ";", &last) == NULL)
|
||||
return 0;
|
||||
if (strtok_r(NULL, ";", &last) == NULL)
|
||||
return 0;
|
||||
c = strtok_r(NULL, ";", &last);
|
||||
if (c == NULL)
|
||||
return 0;
|
||||
out_len = parse_vector(c, out);
|
||||
if (strtok_r(NULL, ";", &last) == NULL)
|
||||
return 0;
|
||||
c = strtok_r(NULL, ";", &last);
|
||||
if (c == NULL)
|
||||
return 0;
|
||||
|
||||
norm_len = MAX_LENGTH;
|
||||
tmp = malloc(norm_len * sizeof(size_t));
|
||||
if (tmp == NULL)
|
||||
err(1, "malloc");
|
||||
ret = _wind_stringprep_normalize(in, in_len, tmp, &norm_len);
|
||||
if (ret) {
|
||||
printf("wind_stringprep_normalize %s failed\n", c);
|
||||
free(tmp);
|
||||
return 1;
|
||||
}
|
||||
if (out_len != norm_len) {
|
||||
printf("wrong out len (%s)\n", c);
|
||||
free(tmp);
|
||||
return 1;
|
||||
}
|
||||
if (memcmp(out, tmp, out_len * sizeof(uint32_t)) != 0) {
|
||||
printf("wrong out data (%s)\n", c);
|
||||
free(tmp);
|
||||
return 1;
|
||||
}
|
||||
free(tmp);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char **argv)
|
||||
{
|
||||
FILE *f;
|
||||
char buf[1024];
|
||||
char filename[256] = "NormalizationTest.txt";
|
||||
unsigned failures = 0;
|
||||
|
||||
if (argc > 2)
|
||||
errx(1, "usage: %s [file]", argv[0]);
|
||||
else if (argc == 2)
|
||||
strlcpy(filename, argv[1], sizeof(filename));
|
||||
|
||||
f = fopen(filename, "r");
|
||||
if (f == NULL) {
|
||||
const char *srcdir = getenv("srcdir");
|
||||
if (srcdir != NULL) {
|
||||
char longname[256];
|
||||
snprintf(longname, sizeof(longname), "%s/%s", srcdir, filename);
|
||||
f = fopen(longname, "r");
|
||||
}
|
||||
if (f == NULL)
|
||||
err(1, "open %s", filename);
|
||||
}
|
||||
while (fgets(buf, sizeof(buf), f) != NULL) {
|
||||
if (buf[0] == '#')
|
||||
continue;
|
||||
if (buf[0] == '@') {
|
||||
continue;
|
||||
}
|
||||
failures += test(buf);
|
||||
}
|
||||
fclose(f);
|
||||
return failures != 0;
|
||||
}
|
140
lib/wind/test-prohibited.c
Normal file
140
lib/wind/test-prohibited.c
Normal file
@@ -0,0 +1,140 @@
|
||||
/*
|
||||
* Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
* (Royal Institute of Technology, Stockholm, Sweden).
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the Institute nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include "windlocl.h"
|
||||
|
||||
RCSID("$Id$");
|
||||
|
||||
static uint32_t positives[] = {
|
||||
0x00A0, 0x3000,
|
||||
0x0080, 0x009F, 0x206A, 0x206F, 0xFEFF,
|
||||
0xFFF9, 0xFFFD, 0xFFFE, 0xFFFF,
|
||||
0x1D173, 0x1D17A,
|
||||
0xE000, 0xF8FF, 0xF0000, 0xFFFFD,
|
||||
0x100000, 0x10FFFD,
|
||||
0xFDD0, 0xFDEF,
|
||||
0xFFFE, 0xFFFF,
|
||||
0x1FFFE, 0x1FFFF,
|
||||
0x2FFFE, 0x2FFFF,
|
||||
0x3FFFE, 0x3FFFF,
|
||||
0x4FFFE, 0x4FFFF,
|
||||
0x5FFFE, 0x5FFFF,
|
||||
0x6FFFE, 0x6FFFF,
|
||||
0x7FFFE, 0x7FFFF,
|
||||
0x8FFFE, 0x8FFFF,
|
||||
0x9FFFE, 0x9FFFF,
|
||||
0xAFFFE, 0xAFFFF,
|
||||
0xBFFFE, 0xBFFFF,
|
||||
0xCFFFE, 0xCFFFF,
|
||||
0xDFFFE, 0xDFFFF,
|
||||
0xEFFFE, 0xEFFFF,
|
||||
0xFFFFE, 0xFFFFF,
|
||||
0x10FFFE, 0x10FFFF,
|
||||
0xD800, 0xDFFF,
|
||||
0xFFF9,
|
||||
0xFFFA,
|
||||
0xFFFB,
|
||||
0xFFFC,
|
||||
0x2FF0, 0x2FFB,
|
||||
0x0340,
|
||||
0x0341,
|
||||
0x200E,
|
||||
0x200F,
|
||||
0x202A,
|
||||
0x202B,
|
||||
0x202C,
|
||||
0x202D,
|
||||
0x202E,
|
||||
0x206A,
|
||||
0x206B,
|
||||
0x206C,
|
||||
0x206D,
|
||||
0x206E,
|
||||
0x206F,
|
||||
0xE0001,
|
||||
0xE0020,
|
||||
0xE007F,
|
||||
};
|
||||
|
||||
static uint32_t negatives[] = {
|
||||
0x0000, 0x001F, 0x007F,
|
||||
0x0020, 0x2069, 0x2070, 0x0FFF8,
|
||||
0x1D172, 0x1D17B,
|
||||
0xF900,
|
||||
0xFDCF, 0xFDF0,
|
||||
0x10000,
|
||||
0x1FFFD, 0x20000,
|
||||
0x2FFFD, 0x30000,
|
||||
0x3FFFD, 0x40000,
|
||||
0x4FFFD, 0x50000,
|
||||
0x5FFFD, 0x60000,
|
||||
0x6FFFD, 0x70000,
|
||||
0x7FFFD, 0x80000,
|
||||
0x8FFFD, 0x90000,
|
||||
0x9FFFD, 0xA0000,
|
||||
0xAFFFD, 0xB0000,
|
||||
0xBFFFD, 0xC0000,
|
||||
0xCFFFD, 0xD0000,
|
||||
0xDFFFD, 0xE0000,
|
||||
0xEFFFD,
|
||||
0x110000,
|
||||
0xD7FF,
|
||||
0xFFF8,
|
||||
0x2FEF, 0x2FFC,
|
||||
};
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned failures = 0;
|
||||
|
||||
for (i = 0; i < sizeof(positives)/sizeof(positives[0]); ++i)
|
||||
if (!_wind_stringprep_error(positives[i], WIND_PROFILE_NAME)) {
|
||||
printf ("code-point 0x%x not marked as prohibited\n",
|
||||
positives[i]);
|
||||
++failures;
|
||||
}
|
||||
|
||||
for (i = 0; i < sizeof(negatives)/sizeof(negatives[0]); ++i)
|
||||
if (_wind_stringprep_error(negatives[i], WIND_PROFILE_NAME)) {
|
||||
printf ("code-point 0x%x not marked as non-prohibited\n",
|
||||
negatives[i]);
|
||||
++failures;
|
||||
}
|
||||
return failures != 0;
|
||||
}
|
80
lib/wind/test-punycode.c
Normal file
80
lib/wind/test-punycode.c
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
* (Royal Institute of Technology, Stockholm, Sweden).
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the Institute nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "windlocl.h"
|
||||
#include "punycode_examples.h"
|
||||
|
||||
RCSID("$Id$");
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
unsigned i;
|
||||
unsigned failures = 0;
|
||||
|
||||
for (i = 0; i < punycode_examples_size; ++i) {
|
||||
char buf[256];
|
||||
int ret;
|
||||
const struct punycode_example *e = &punycode_examples[i];
|
||||
size_t len;
|
||||
|
||||
len = sizeof(buf);
|
||||
ret = wind_punycode_toascii(e->val, e->len, buf, &len);
|
||||
if (ret < 0) {
|
||||
printf("punycode %u (%s) failed: %d\n", i, e->description, ret);
|
||||
++failures;
|
||||
continue;
|
||||
}
|
||||
if (len != strlen(e->pc)) {
|
||||
printf("punycode %u (%s) wrong len, actual: %u, expected: %u\n",
|
||||
i, e->description,
|
||||
(unsigned int)len, (unsigned int)strlen(e->pc));
|
||||
++failures;
|
||||
continue;
|
||||
}
|
||||
if (strncasecmp(buf, e->pc, len) != 0) {
|
||||
printf("punycode %u (%s) wrong contents, "
|
||||
"actual: \"%.*s\", expected: \"%s\"\n",
|
||||
i, e->description, (unsigned int)len, buf, e->pc);
|
||||
++failures;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return failures != 0;
|
||||
}
|
161
lib/wind/test-utf8.c
Normal file
161
lib/wind/test-utf8.c
Normal file
@@ -0,0 +1,161 @@
|
||||
/*
|
||||
* Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
* (Royal Institute of Technology, Stockholm, Sweden).
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the Institute nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <err.h>
|
||||
#include "windlocl.h"
|
||||
|
||||
RCSID("$Id$");
|
||||
|
||||
static const char *failing_testcases[] = {
|
||||
"\x80",
|
||||
"\xFF",
|
||||
"\xC0",
|
||||
"\xDF",
|
||||
"\xE0",
|
||||
"\xEF",
|
||||
"\xF0",
|
||||
"\xF7",
|
||||
"\xC0\x01",
|
||||
"\xC0\x7F",
|
||||
"\xC0\xFF",
|
||||
"\xC0\x80\x80",
|
||||
"\xE0\x01",
|
||||
"\xE0\x7F",
|
||||
"\xE0\x80",
|
||||
"\xE0\xFF",
|
||||
"\xE0\x80\x20",
|
||||
"\xE0\x80\xFF",
|
||||
"\xE0\x80\x80\x80",
|
||||
"\xF0\x01",
|
||||
"\xF0\x80",
|
||||
"\xF0\x80\x01",
|
||||
"\xF0\x80\x80",
|
||||
"\xF0\x80\x80\x01",
|
||||
"\xF0\x80\x80\xFF",
|
||||
NULL
|
||||
};
|
||||
|
||||
#define MAX_LENGTH 10
|
||||
|
||||
struct testcase {
|
||||
const char *utf8_str;
|
||||
size_t len;
|
||||
uint32_t u[MAX_LENGTH];
|
||||
int invalid_ucs2;
|
||||
};
|
||||
|
||||
static const struct testcase testcases[] = {
|
||||
{"", 0, {}},
|
||||
{"\x01", 1, {1}},
|
||||
{"\x7F", 1, {0x7F}},
|
||||
{"\x01\x7F", 2, {0x01, 0x7F}},
|
||||
{"\xC0\x80", 1, {0}},
|
||||
{"\xC0\x81", 1, {1}},
|
||||
{"\xC1\x80", 1, {0x40}},
|
||||
{"\xDF\xBF", 1, {0x7FF}},
|
||||
{"\xE0\x80\x80", 1, {0}},
|
||||
{"\xE0\x80\x81", 1, {1}},
|
||||
{"\xE0\x81\x80", 1, {0x40}},
|
||||
{"\xE1\x80\x80", 1, {0x1000}},
|
||||
{"\xEF\xBF\xBF", 1, {0xFFFF}},
|
||||
{"\xF0\x80\x80\x80", 1, {0}},
|
||||
{"\xF0\x80\x80\x81", 1, {1}},
|
||||
{"\xF0\x80\x81\x80", 1, {0x40}},
|
||||
{"\xF0\x81\x80\x80", 1, {0x1000}},
|
||||
{"\xF1\x80\x80\x80", 1, {0x40000}},
|
||||
{"\xF7\xBF\xBF\xBF", 1, {0X1FFFFF}, 1},
|
||||
};
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
unsigned failures = 0;
|
||||
unsigned i;
|
||||
const char **s;
|
||||
int ret;
|
||||
size_t len, len2;
|
||||
uint32_t u[MAX_LENGTH];
|
||||
char str[MAX_LENGTH * 4];
|
||||
|
||||
for (s = failing_testcases; *s != NULL; ++s) {
|
||||
len = MAX_LENGTH;
|
||||
ret = wind_utf8ucs4(*s, u, &len);
|
||||
if (ret != -1) {
|
||||
printf("utf8 decode of \"%s\" should have failed\n", *s);
|
||||
++failures;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < sizeof(testcases)/sizeof(testcases[0]); ++i) {
|
||||
const struct testcase *t = &testcases[i];
|
||||
|
||||
len = MAX_LENGTH,
|
||||
ret = wind_utf8ucs4(t->utf8_str, u, &len);
|
||||
if (ret) {
|
||||
printf("utf8 decode of \"%s\" should have succeeded\n",
|
||||
t->utf8_str);
|
||||
++failures;
|
||||
continue;
|
||||
}
|
||||
if (len != t->len) {
|
||||
printf("utf8 decode of \"%s\" has wrong length: "
|
||||
"expected: %u, actual: %u\n",
|
||||
t->utf8_str, (unsigned int)t->len, (unsigned int)len);
|
||||
++failures;
|
||||
continue;
|
||||
}
|
||||
if (memcmp(t->u, u, len * sizeof(uint32_t)) != 0) {
|
||||
printf("utf8 decode of \"%s\" has wrong data\n",
|
||||
t->utf8_str);
|
||||
++failures;
|
||||
continue;
|
||||
}
|
||||
if (t->invalid_ucs2 == 0) {
|
||||
len2 = sizeof(str);
|
||||
ret = wind_ucs4utf8(u, len, str, &len2);
|
||||
if (ret) {
|
||||
printf("ucs4 decode of \"%s\" should have succeeded\n",
|
||||
t->utf8_str);
|
||||
++failures;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return failures != 0;
|
||||
}
|
338
lib/wind/utf8.c
Normal file
338
lib/wind/utf8.c
Normal file
@@ -0,0 +1,338 @@
|
||||
/*
|
||||
* Copyright (c) 2004, 2006, 2007, 2008 Kungliga Tekniska H<>gskolan
|
||||
* (Royal Institute of Technology, Stockholm, Sweden).
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the Institute nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
#include "windlocl.h"
|
||||
|
||||
RCSID("$Id$");
|
||||
|
||||
/**
|
||||
* Convert an UTF-8 string to an UCS4 string.
|
||||
*
|
||||
* @param in an UTF-8 string to convert.
|
||||
* @param out the resulting UCS4 strint, must be at least
|
||||
* wind_utf8ucs4_length() long. If out is NULL, the function will
|
||||
* calculate the needed space for the out variable (just like
|
||||
* wind_utf8ucs4_length()).
|
||||
* @param out_len before processing out_len should be the length of
|
||||
* the out variable, after processing it will be the length of the out
|
||||
* string.
|
||||
*
|
||||
* @return returns 0 on success, an wind error code otherwise
|
||||
* @ingroup wind
|
||||
*/
|
||||
|
||||
int
|
||||
wind_utf8ucs4(const char *in, uint32_t *out, size_t *out_len)
|
||||
{
|
||||
const unsigned char *p;
|
||||
size_t o = 0;
|
||||
|
||||
for (p = (const unsigned char *)in; *p != '\0'; ++p) {
|
||||
unsigned c = *p;
|
||||
uint32_t u;
|
||||
|
||||
if (c & 0x80) {
|
||||
if ((c & 0xE0) == 0xC0) {
|
||||
const unsigned c2 = *++p;
|
||||
if ((c2 & 0xC0) == 0x80) {
|
||||
u = ((c & 0x1F) << 6)
|
||||
| (c2 & 0x3F);
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else if ((c & 0xF0) == 0xE0) {
|
||||
const unsigned c2 = *++p;
|
||||
if ((c2 & 0xC0) == 0x80) {
|
||||
const unsigned c3 = *++p;
|
||||
if ((c3 & 0xC0) == 0x80) {
|
||||
u = ((c & 0x0F) << 12)
|
||||
| ((c2 & 0x3F) << 6)
|
||||
| (c3 & 0x3F);
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else if ((c & 0xF8) == 0xF0) {
|
||||
const unsigned c2 = *++p;
|
||||
if ((c2 & 0xC0) == 0x80) {
|
||||
const unsigned c3 = *++p;
|
||||
if ((c3 & 0xC0) == 0x80) {
|
||||
const unsigned c4 = *++p;
|
||||
if ((c4 & 0xC0) == 0x80) {
|
||||
u = ((c & 0x07) << 18)
|
||||
| ((c2 & 0x3F) << 12)
|
||||
| ((c3 & 0x3F) << 6)
|
||||
| (c4 & 0x3F);
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
u = c;
|
||||
}
|
||||
if (out) {
|
||||
if (o >= *out_len)
|
||||
return -1;
|
||||
out[o++] = u;
|
||||
}
|
||||
}
|
||||
*out_len = o;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the length of from converting a UTF-8 string to a UCS4
|
||||
* string.
|
||||
*
|
||||
* @param in an UTF-8 string to convert.
|
||||
* @param out_len the length of the resulting UCS4 string.
|
||||
*
|
||||
* @return returns 0 on success, an wind error code otherwise
|
||||
* @ingroup wind
|
||||
*/
|
||||
|
||||
int
|
||||
wind_utf8ucs4_length(const char *in, size_t *out_len)
|
||||
{
|
||||
return wind_utf8ucs4(in, NULL, out_len);
|
||||
}
|
||||
|
||||
static const char first_char[4] =
|
||||
{ 0x00, 0xC0, 0xE0, 0xF0 };
|
||||
|
||||
/**
|
||||
* Convert an UCS4 string to a UTF-8 string.
|
||||
*
|
||||
* @param in an UCS4 string to convert.
|
||||
* @param in_len the length input array.
|
||||
* @param out the resulting UTF-8 strint, must be at least
|
||||
* wind_ucs4utf8_length() long. If out is NULL, the function will
|
||||
* calculate the needed space for the out variable (just like
|
||||
* wind_ucs4utf8_length()).
|
||||
* @param out_len before processing out_len should be the length of
|
||||
* the out variable, after processing it will be the length of the out
|
||||
* string.
|
||||
*
|
||||
* @return returns 0 on success, an wind error code otherwise
|
||||
* @ingroup wind
|
||||
*/
|
||||
|
||||
int
|
||||
wind_ucs4utf8(const uint32_t *in, size_t in_len, char *out, size_t *out_len)
|
||||
{
|
||||
uint32_t ch;
|
||||
size_t i, len, o;
|
||||
|
||||
for (o = 0, i = 0; i < in_len; i++) {
|
||||
ch = in[i];
|
||||
|
||||
if (ch < 0x80) {
|
||||
len = 1;
|
||||
} else if (ch < 0x800) {
|
||||
len = 2;
|
||||
} else if (ch < 0x10000) {
|
||||
len = 3;
|
||||
} else if (ch <= 0x10FFFF) {
|
||||
len = 4;
|
||||
} else
|
||||
return -1;
|
||||
|
||||
o += len;
|
||||
|
||||
if (out) {
|
||||
if (o >= *out_len)
|
||||
return -1;
|
||||
|
||||
switch(len) {
|
||||
case 4:
|
||||
out[3] = (ch | 0x80) & 0xbf;
|
||||
ch = ch << 6;
|
||||
case 3:
|
||||
out[2] = (ch | 0x80) & 0xbf;
|
||||
ch = ch << 6;
|
||||
case 2:
|
||||
out[1] = (ch | 0x80) & 0xbf;
|
||||
ch = ch << 6;
|
||||
case 1:
|
||||
out[0] = ch | first_char[len - 1];
|
||||
}
|
||||
}
|
||||
out += len;
|
||||
}
|
||||
if (out) {
|
||||
if (o + 1 >= *out_len)
|
||||
return -1;
|
||||
*out = '\0';
|
||||
}
|
||||
*out_len = o;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the length of from converting a UCS4 string to an UTF-8 string.
|
||||
*
|
||||
* @param in an UCS4 string to convert.
|
||||
* @param in_len the length of UCS4 string to convert.
|
||||
* @param out_len the length of the resulting UTF-8 string.
|
||||
*
|
||||
* @return returns 0 on success, an wind error code otherwise
|
||||
* @ingroup wind
|
||||
*/
|
||||
|
||||
int
|
||||
wind_ucs4utf8_length(const uint32_t *in, size_t in_len, size_t *out_len)
|
||||
{
|
||||
return wind_ucs4utf8(in, in_len, NULL, out_len);
|
||||
}
|
||||
|
||||
/**
|
||||
* Read in an UCS2 from a buffer.
|
||||
*
|
||||
* @param ptr The input buffer to read from
|
||||
* @param len the length of the input buffer, must be an even number.
|
||||
* @param out the output UCS2, the array must be at least out/2 long.
|
||||
*
|
||||
* @return returns 0 on success, an wind error code otherwise.
|
||||
* @ingroup wind
|
||||
*/
|
||||
|
||||
ssize_t
|
||||
_wind_ucs2read(void *ptr, size_t len, uint16_t *out)
|
||||
{
|
||||
unsigned char *p = ptr;
|
||||
int little = 1;
|
||||
|
||||
if (len & 1)
|
||||
return -1;
|
||||
/* check for BOM */
|
||||
|
||||
while (len) {
|
||||
if (little)
|
||||
*out = (p[1] << 8) + p[0];
|
||||
else
|
||||
*out = (p[0] << 8) + p[1];
|
||||
out++; p += 2;
|
||||
}
|
||||
return (p - (unsigned char *)ptr) >> 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert an UCS2 string to a UTF-8 string.
|
||||
*
|
||||
* @param in an UCS2 string to convert.
|
||||
* @param in_len the length of the in UCS2 string.
|
||||
* @param out the resulting UTF-8 strint, must be at least
|
||||
* wind_ucs2utf8_length() long. If out is NULL, the function will
|
||||
* calculate the needed space for the out variable (just like
|
||||
* wind_ucs2utf8_length()).
|
||||
* @param out_len before processing out_len should be the length of
|
||||
* the out variable, after processing it will be the length of the out
|
||||
* string.
|
||||
*
|
||||
* @return returns 0 on success, an wind error code otherwise
|
||||
* @ingroup wind
|
||||
*/
|
||||
|
||||
int
|
||||
wind_ucs2utf8(const uint16_t *in, size_t in_len, char *out, size_t *out_len)
|
||||
{
|
||||
uint16_t ch;
|
||||
size_t i, len, o;
|
||||
|
||||
for (o = 0, i = 0; i < in_len; i++) {
|
||||
ch = in[i];
|
||||
|
||||
if (ch < 0x80) {
|
||||
len = 1;
|
||||
} else if (ch < 0x800) {
|
||||
len = 2;
|
||||
} else
|
||||
len = 3;
|
||||
|
||||
o += len;
|
||||
|
||||
if (out) {
|
||||
if (o >= *out_len)
|
||||
return -1;
|
||||
|
||||
switch(len) {
|
||||
case 3:
|
||||
out[2] = (ch | 0x80) & 0xbf;
|
||||
ch = ch << 6;
|
||||
case 2:
|
||||
out[1] = (ch | 0x80) & 0xbf;
|
||||
ch = ch << 6;
|
||||
case 1:
|
||||
out[0] = ch | first_char[len - 1];
|
||||
}
|
||||
}
|
||||
out += len;
|
||||
}
|
||||
if (out) {
|
||||
if (o + 1 >= *out_len)
|
||||
return -1;
|
||||
*out = '\0';
|
||||
}
|
||||
*out_len = o;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate the length of from converting a UCS2 string to an UTF-8 string.
|
||||
*
|
||||
* @param in an UCS2 string to convert.
|
||||
* @param in_len an UCS2 string length to convert.
|
||||
* @param out_len the length of the resulting UTF-8 string.
|
||||
*
|
||||
* @return returns 0 on success, an wind error code otherwise
|
||||
* @ingroup wind
|
||||
*/
|
||||
|
||||
int
|
||||
wind_ucs2utf8_length(const uint16_t *in, size_t in_len, size_t *out_len)
|
||||
{
|
||||
return wind_ucs2utf8(in, in_len, NULL, out_len);
|
||||
}
|
48
lib/wind/util.py
Normal file
48
lib/wind/util.py
Normal file
@@ -0,0 +1,48 @@
|
||||
#!/usr/local/bin/python
|
||||
# -*- coding: iso-8859-1 -*-
|
||||
|
||||
# $Id$
|
||||
|
||||
# Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
# (Royal Institute of Technology, Stockholm, Sweden).
|
||||
# All rights reserved.
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions
|
||||
# are met:
|
||||
#
|
||||
# 1. Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
#
|
||||
# 2. Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
#
|
||||
# 3. Neither the name of the Institute nor the names of its contributors
|
||||
# may be used to endorse or promote products derived from this software
|
||||
# without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
# SUCH DAMAGE.
|
||||
|
||||
def subList(l, sl) :
|
||||
"""return the index of sl in l or None"""
|
||||
lLen = len(l)
|
||||
slLen = len(sl)
|
||||
for i in range(lLen - slLen + 1):
|
||||
j = 0
|
||||
while j < slLen and l[i + j] == sl[j]:
|
||||
j += 1
|
||||
if j == slLen:
|
||||
return i
|
||||
return None
|
||||
|
24
lib/wind/version-script.map
Normal file
24
lib/wind/version-script.map
Normal file
@@ -0,0 +1,24 @@
|
||||
# $Id$
|
||||
|
||||
HEIMDAL_WIND_1.0 {
|
||||
global:
|
||||
wind_stringprep;
|
||||
wind_profile;
|
||||
wind_punycode_toascii;
|
||||
wind_utf8ucs4;
|
||||
wind_utf8ucs4_length;
|
||||
wind_ucs4utf8;
|
||||
wind_ucs4utf8_length;
|
||||
wind_ucs2utf8;
|
||||
wind_ucs2utf8_length;
|
||||
# testing
|
||||
_wind_combining_class;
|
||||
_wind_stringprep_testbidi;
|
||||
_wind_stringprep_error;
|
||||
_wind_stringprep_prohibited;
|
||||
_wind_stringprep_map;
|
||||
_wind_stringprep_normalize;
|
||||
_wind_ucs2read;
|
||||
local:
|
||||
*;
|
||||
};
|
65
lib/wind/wind.h
Normal file
65
lib/wind/wind.h
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
* Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
* (Royal Institute of Technology, Stockholm, Sweden).
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the Institute nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* $Id$ */
|
||||
|
||||
#ifndef _WIND_H_
|
||||
#define _WIND_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef unsigned int wind_profile_flags;
|
||||
|
||||
#define WIND_PROFILE_NAME 1
|
||||
#define WIND_PROFILE_LDAP 2
|
||||
#define WIND_PROFILE_SASL 4
|
||||
|
||||
int wind_stringprep(const unsigned *in, size_t in_len,
|
||||
unsigned *out, size_t *out_len,
|
||||
wind_profile_flags flags);
|
||||
int wind_profile(const char *, wind_profile_flags *);
|
||||
|
||||
int wind_punycode_toascii(const uint32_t *in, size_t in_len,
|
||||
char *out, size_t *out_len);
|
||||
|
||||
int wind_utf8ucs4(const char *, uint32_t *, size_t *);
|
||||
int wind_utf8ucs4_length(const char *, size_t *);
|
||||
|
||||
int wind_ucs4utf8(const uint32_t *, size_t, char *, size_t *);
|
||||
int wind_ucs4utf8_length(const uint32_t *, size_t, size_t *);
|
||||
|
||||
int wind_ucs2utf8(const uint16_t *, size_t, char *, size_t *);
|
||||
int wind_ucs2utf8_length(const uint16_t *, size_t, size_t *);
|
||||
|
||||
#endif /* _WIND_H_ */
|
14
lib/wind/wind_err.et
Normal file
14
lib/wind/wind_err.et
Normal file
@@ -0,0 +1,14 @@
|
||||
#
|
||||
# Error messages for the wind library
|
||||
#
|
||||
# This might look like a com_err file, but is not
|
||||
#
|
||||
id "$Id$"
|
||||
|
||||
error_table wind
|
||||
|
||||
prefix WIND
|
||||
error_code NONE, "No error"
|
||||
error_code FOO, "Random error"
|
||||
|
||||
end
|
62
lib/wind/windlocl.h
Normal file
62
lib/wind/windlocl.h
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2004 Kungliga Tekniska H<>gskolan
|
||||
* (Royal Institute of Technology, Stockholm, Sweden).
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the Institute nor the names of its contributors
|
||||
* may be used to endorse or promote products derived from this software
|
||||
* without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* $Id$ */
|
||||
|
||||
#ifndef _WINDLOCL_H_
|
||||
#define _WINDLOCL_H_
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include <config.h>
|
||||
#endif
|
||||
|
||||
#include "wind.h"
|
||||
|
||||
int _wind_combining_class(uint32_t);
|
||||
|
||||
int _wind_stringprep_testbidi(const uint32_t *, size_t, wind_profile_flags);
|
||||
|
||||
int _wind_stringprep_error(const uint32_t, wind_profile_flags);
|
||||
|
||||
int _wind_stringprep_prohibited(const uint32_t *, size_t, wind_profile_flags);
|
||||
|
||||
int _wind_stringprep_map(const uint32_t *, size_t,
|
||||
uint32_t *, size_t *,
|
||||
wind_profile_flags);
|
||||
|
||||
int _wind_stringprep_normalize(const uint32_t *in, size_t in_len,
|
||||
uint32_t *out, size_t *out_len);
|
||||
|
||||
ssize_t _wind_ucs2read(void *, size_t, uint16_t *);
|
||||
|
||||
#endif /* _WINDLOCL_H_ */
|
Reference in New Issue
Block a user