diff --git a/kanji/09af7.svg b/kanji/09af7.svg index be4cbc3d4..0f1758312 100644 --- a/kanji/09af7.svg +++ b/kanji/09af7.svg @@ -73,7 +73,7 @@ kvg:type CDATA #IMPLIED > - + diff --git a/kanjivg.py b/kanjivg.py index 1b0a9e541..39c7f126a 100644 --- a/kanjivg.py +++ b/kanjivg.py @@ -42,7 +42,12 @@ See http://creativecommons.org/licenses/by-sa/3.0/ for more details.""" def isKanji(v): return (v >= 0x4E00 and v <= 0x9FC3) or (v >= 0x3400 and v <= 0x4DBF) or (v >= 0xF900 and v <= 0xFAD9) or (v >= 0x2E80 and v <= 0x2EFF) or (v >= 0x20000 and v <= 0x2A6DF) -# Returns the unicode of a character in a unicode string, taking surrogate pairs into account +# Returns the unicode of a character in a unicode string, taking +# surrogate pairs into account + +# Why do we need to worry about surrogate pairs? This doesn't occur in +# KanjiVG. + def realord(s, pos = 0): if s == None: return None code = ord(s[pos]) @@ -311,7 +316,7 @@ class KanjisHandler(BasicHandler): # The group must not exist else: if (group.element + str(group.number)) in self.compCpt: - print("%s: Duplicate numbered group" % (self.kanji.kId())) + print("%s: Duplicate numbered group %d" % (self.kanji.kId(), group.number)) self.compCpt[group.element + str(group.number)] = group.part # No number, just a part - groups restart with part 1, otherwise must # increase correctly @@ -321,7 +326,7 @@ class KanjisHandler(BasicHandler): if group.element not in self.compCpt: print("%s: Incorrectly started multi-part group" % (self.kanji.kId())) elif self.compCpt[group.element] != group.part - 1: - print("%s: Incorrectly splitted multi-part group" % (self.kanji.kId())) + print("%s: Incorrectly split multi-part group for %s - %d" % (self.kanji.kId(),group.element,group.part)) self.compCpt[group.element] = group.part def handle_end_g(self): @@ -395,7 +400,7 @@ class SVGHandler(BasicHandler): # The group must not exist else: if self.compCpt.has_key(group.element + str(group.number)): - print("%s: Duplicate numbered group" % (self.currentKanji.kId())) + print("%s: Duplicate numbered group %d" % (self.currentKanji.kId(), group.number)) self.compCpt[group.element + str(group.number)] = group.part # No number, just a part - groups restart with part 1, otherwise must # increase correctly diff --git a/kvg-lookup.py b/kvg-lookup.py index 84c6facc8..f21d0bbd8 100755 --- a/kvg-lookup.py +++ b/kvg-lookup.py @@ -31,9 +31,9 @@ helpString = """Usage: %s [...elementN] Recognized commands: find-svg Find and view summary of an SVG file for the given - element in ./kanji/ directory. + element in the /kanji directory. find-xml Find and view summary of a entry for - the given element from ./kanjivg.xml file. + the given element from the file kanjivg.xml Parameters: element May either be the singular character, e.g. 並 or its @@ -130,6 +130,12 @@ if __name__ == "__main__": print(helpString) sys.exit(0) + + if sys.argv[1] == "find-xml": + if not os.path.exists("./kanjivg.xml"): + print("Re-run \"./kvg.py release\" to regenerate kanjivg.xml") + sys.exit(1) + action = actions[sys.argv[1]][0] args = sys.argv[2:] diff --git a/utils.py b/utils.py index 8b7b9aaf2..75b9f0b65 100644 --- a/utils.py +++ b/utils.py @@ -11,6 +11,11 @@ else: def unicode(s): return s +# Given a string "id", either turn it into its numerical Unicode +# value, or if it appears to be a hexadecimal string, convert that +# into a number, then convert the number into a five digit hexadecimal +# for use as a file name. + def canonicalId(id): if isinstance(id, str): idLen = len(id) @@ -61,10 +66,14 @@ class SvgFileInfo: raise Exception("File does not contain 1 kanji entry. (%s)" % (self.path)) return parsed[0] +# Parse kanjivg.xml + def parseXmlFile(path, handler): from xml.sax import parse parse(path, handler) +# Make a list of the SVG files in kanji + def listSvgFiles(dir): list = [] for f in os.listdir(dir): @@ -73,6 +82,8 @@ def listSvgFiles(dir): list.append(sfi) return list +# Read in the file kanjivg.xml + def readXmlFile(path, KanjisHandler=None): if KanjisHandler is None: from kanjivg import KanjisHandler