diff --git a/kanji/09af7.svg b/kanji/09af7.svg
index be4cbc3d4..0f1758312 100644
--- a/kanji/09af7.svg
+++ b/kanji/09af7.svg
@@ -73,7 +73,7 @@ kvg:type CDATA #IMPLIED >
-
+
diff --git a/kanjivg.py b/kanjivg.py
index 1b0a9e541..39c7f126a 100644
--- a/kanjivg.py
+++ b/kanjivg.py
@@ -42,7 +42,12 @@ See http://creativecommons.org/licenses/by-sa/3.0/ for more details."""
def isKanji(v):
return (v >= 0x4E00 and v <= 0x9FC3) or (v >= 0x3400 and v <= 0x4DBF) or (v >= 0xF900 and v <= 0xFAD9) or (v >= 0x2E80 and v <= 0x2EFF) or (v >= 0x20000 and v <= 0x2A6DF)
-# Returns the unicode of a character in a unicode string, taking surrogate pairs into account
+# Returns the unicode of a character in a unicode string, taking
+# surrogate pairs into account
+
+# Why do we need to worry about surrogate pairs? This doesn't occur in
+# KanjiVG.
+
def realord(s, pos = 0):
if s == None: return None
code = ord(s[pos])
@@ -311,7 +316,7 @@ class KanjisHandler(BasicHandler):
# The group must not exist
else:
if (group.element + str(group.number)) in self.compCpt:
- print("%s: Duplicate numbered group" % (self.kanji.kId()))
+ print("%s: Duplicate numbered group %d" % (self.kanji.kId(), group.number))
self.compCpt[group.element + str(group.number)] = group.part
# No number, just a part - groups restart with part 1, otherwise must
# increase correctly
@@ -321,7 +326,7 @@ class KanjisHandler(BasicHandler):
if group.element not in self.compCpt:
print("%s: Incorrectly started multi-part group" % (self.kanji.kId()))
elif self.compCpt[group.element] != group.part - 1:
- print("%s: Incorrectly splitted multi-part group" % (self.kanji.kId()))
+ print("%s: Incorrectly split multi-part group for %s - %d" % (self.kanji.kId(),group.element,group.part))
self.compCpt[group.element] = group.part
def handle_end_g(self):
@@ -395,7 +400,7 @@ class SVGHandler(BasicHandler):
# The group must not exist
else:
if self.compCpt.has_key(group.element + str(group.number)):
- print("%s: Duplicate numbered group" % (self.currentKanji.kId()))
+ print("%s: Duplicate numbered group %d" % (self.currentKanji.kId(), group.number))
self.compCpt[group.element + str(group.number)] = group.part
# No number, just a part - groups restart with part 1, otherwise must
# increase correctly
diff --git a/kvg-lookup.py b/kvg-lookup.py
index 84c6facc8..f21d0bbd8 100755
--- a/kvg-lookup.py
+++ b/kvg-lookup.py
@@ -31,9 +31,9 @@ helpString = """Usage: %s [...elementN]
Recognized commands:
find-svg Find and view summary of an SVG file for the given
- element in ./kanji/ directory.
+ element in the /kanji directory.
find-xml Find and view summary of a entry for
- the given element from ./kanjivg.xml file.
+ the given element from the file kanjivg.xml
Parameters:
element May either be the singular character, e.g. 並 or its
@@ -130,6 +130,12 @@ if __name__ == "__main__":
print(helpString)
sys.exit(0)
+
+ if sys.argv[1] == "find-xml":
+ if not os.path.exists("./kanjivg.xml"):
+ print("Re-run \"./kvg.py release\" to regenerate kanjivg.xml")
+ sys.exit(1)
+
action = actions[sys.argv[1]][0]
args = sys.argv[2:]
diff --git a/utils.py b/utils.py
index 8b7b9aaf2..75b9f0b65 100644
--- a/utils.py
+++ b/utils.py
@@ -11,6 +11,11 @@ else:
def unicode(s):
return s
+# Given a string "id", either turn it into its numerical Unicode
+# value, or if it appears to be a hexadecimal string, convert that
+# into a number, then convert the number into a five digit hexadecimal
+# for use as a file name.
+
def canonicalId(id):
if isinstance(id, str):
idLen = len(id)
@@ -61,10 +66,14 @@ class SvgFileInfo:
raise Exception("File does not contain 1 kanji entry. (%s)" % (self.path))
return parsed[0]
+# Parse kanjivg.xml
+
def parseXmlFile(path, handler):
from xml.sax import parse
parse(path, handler)
+# Make a list of the SVG files in kanji
+
def listSvgFiles(dir):
list = []
for f in os.listdir(dir):
@@ -73,6 +82,8 @@ def listSvgFiles(dir):
list.append(sfi)
return list
+# Read in the file kanjivg.xml
+
def readXmlFile(path, KanjisHandler=None):
if KanjisHandler is None:
from kanjivg import KanjisHandler