mirror of
https://github.com/KanjiVG/kanjivg.git
synced 2026-04-24 22:36:11 +02:00
First version of new format finalized
This commit is contained in:
+2
-1
@@ -2,4 +2,5 @@
|
||||
kanjivg.xml
|
||||
*.pyc
|
||||
kanjivg-????????.xml.gz
|
||||
output
|
||||
kanjivg
|
||||
kanjivgMismatch
|
||||
|
||||
+22
-89
@@ -55,13 +55,13 @@ def realchr(i):
|
||||
else: return unichr(((i - 0x10000) >> 10) + 0xD800) + unichr(0xDC00 + (i & 0x3ff))
|
||||
|
||||
class Kanji:
|
||||
"""Describes a kanji. The root stroke group is accessible from the root member."""
|
||||
"""Describes a kanji. The root stroke group is accessible from the strokes member."""
|
||||
def __init__(self, code, variant):
|
||||
# Unicode of char being represented (int)
|
||||
self.code = code
|
||||
# Variant of the character, if any
|
||||
self.variant = variant
|
||||
self.root = None
|
||||
self.strokes = None
|
||||
|
||||
# String identifier used to uniquely identify the kanji
|
||||
def kId(self):
|
||||
@@ -69,14 +69,21 @@ class Kanji:
|
||||
if self.variant: ret += "-%s" % (self.variant,)
|
||||
return ret
|
||||
|
||||
def toSVG(self, out, indent = 0):
|
||||
self.root.toSVG(out, self.kId(), [0], [1])
|
||||
def outputStrokesNumbers(self, out, indent = 0):
|
||||
strokes = self.getStrokes()
|
||||
cpt = 1
|
||||
for stroke in strokes:
|
||||
stroke.numberToSVG(out, cpt, indent + 1)
|
||||
cpt += 1
|
||||
|
||||
def outputStrokes(self, out, indent = 0):
|
||||
self.strokes.toSVG(out, self.kId(), [0], [1])
|
||||
|
||||
def simplify(self):
|
||||
self.root.simplify()
|
||||
self.strokes.simplify()
|
||||
|
||||
def getStrokes(self):
|
||||
return self.root.getStrokes()
|
||||
return self.strokes.getStrokes()
|
||||
|
||||
|
||||
class StrokeGr:
|
||||
@@ -200,6 +207,11 @@ class Stroke:
|
||||
def __init__(self, parent):
|
||||
self.stype = None
|
||||
self.svg = None
|
||||
self.numberPos = None
|
||||
|
||||
def numberToSVG(self, out, number, indent = 0):
|
||||
if self.numberPos:
|
||||
out.write("\t" * indent + '<text transform="matrix(1 0 0 1 %.2f %.2f)">%d</text>\n' % (self.numberPos[0], self.numberPos[1], number))
|
||||
|
||||
def toSVG(self, out, rootId, groupCpt, strCpt, indent = 0):
|
||||
pid = rootId + "-s" + str(strCpt[0])
|
||||
@@ -207,85 +219,6 @@ class Stroke:
|
||||
if not self.svg: out.write("\t" * indent + '<path id="%s" d="" kanjivg:type="%s"/>\n' % (pid, self.stype))
|
||||
else: out.write("\t" * indent + '<path id="%s" kanjivg:type="%s" d="%s"/>\n' % (pid, self.stype, self.svg))
|
||||
|
||||
|
||||
class StructuredKanji:
|
||||
"""A more structured format for the kanji, where all the parts of groups are grouped together."""
|
||||
def __init__(self, kanji):
|
||||
self.components = []
|
||||
self.strokes = []
|
||||
|
||||
stk = []
|
||||
self.__buildStructure(kanji.root, stk, None)
|
||||
|
||||
def __mostCommonAncestor(self, np, npp):
|
||||
# Update the parent to the most common parent of all parts
|
||||
npSave = np
|
||||
if np != None:
|
||||
while np != npp:
|
||||
np = np.parent
|
||||
if np == None:
|
||||
npp = npp.parent
|
||||
np = npSave
|
||||
return np
|
||||
|
||||
def __buildStructure(self, group, stk, parent):
|
||||
# Find the component if it exists already, or create it as needed
|
||||
# Number exists and part is > 1, we must find a component which number matches.
|
||||
newParent = None
|
||||
if group.number > 0 and group.part > 1:
|
||||
for component in self.components:
|
||||
if component.element == group.element and component.number == group.number:
|
||||
newParent = component
|
||||
component.parent = self.__mostCommonAncestor(component.parent, parent)
|
||||
break
|
||||
# Should never happen
|
||||
if not newParent: raise Exception("Unable to find component!")
|
||||
# No number but a part, we need the latest component which element matches
|
||||
elif group.part > 1:
|
||||
for component in self.components:
|
||||
if component.element == group.element:
|
||||
newParent = component
|
||||
component.parent = self.__mostCommonAncestor(component.parent, parent)
|
||||
break
|
||||
if not newParent: raise Exception("Unable to find component!")
|
||||
# Either a single part component or a first part - we need to create the component
|
||||
else:
|
||||
# Only do that if the current group has an element
|
||||
if group.element:
|
||||
newParent = StructuredStrokeGroup(parent, group.element, group.original, group.number)
|
||||
self.components.append(newParent)
|
||||
# Else keep the same parent
|
||||
else: newParent = parent
|
||||
|
||||
if newParent != parent: stk.append(newParent)
|
||||
|
||||
# Add the found group as a child of its parent
|
||||
if parent: parent.childs.append(newParent)
|
||||
|
||||
# Now parse the childs of the group
|
||||
for child in group.childs:
|
||||
# Another group - we need to call ourselves recursively to build it
|
||||
if isinstance(child, StrokeGr):
|
||||
self.__buildStructure(child, stk, newParent)
|
||||
# A stroke - just add it to our list as well as
|
||||
# to the list of all the parents on the stack
|
||||
elif isinstance(child, Stroke):
|
||||
self.strokes.append(child)
|
||||
for pGroup in stk: pGroup.strokes.append(child)
|
||||
# Set the direct parent of the child
|
||||
child.parent = newParent
|
||||
|
||||
if newParent != parent: stk.pop()
|
||||
|
||||
class StructuredStrokeGroup:
|
||||
def __init__(self, parent, element, original, number):
|
||||
self.parent = parent
|
||||
self.element = element
|
||||
self.original = original
|
||||
self.number = number
|
||||
self.childs = []
|
||||
self.strokes = []
|
||||
|
||||
class KanjisHandler(BasicHandler):
|
||||
"""XML handler for parsing kanji files. It can handle single-kanji files or aggregation files. After parsing, the kanjis are accessible through the kanjis member, indexed by their svg file name."""
|
||||
def __init__(self, code, variant):
|
||||
@@ -353,9 +286,9 @@ class KanjisHandler(BasicHandler):
|
||||
def handle_end_strokegr(self):
|
||||
group = self.groups.pop()
|
||||
if len(self.groups) == 0:
|
||||
if self.kanji.root:
|
||||
if self.kanji.strokes:
|
||||
print "WARNING: overwriting root of kanji!"
|
||||
self.kanji.root = group
|
||||
self.kanji.strokes = group
|
||||
|
||||
def handle_start_stroke(self, attrs):
|
||||
if len(self.groups) == 0: parent = None
|
||||
@@ -366,7 +299,7 @@ class KanjisHandler(BasicHandler):
|
||||
self.groups[-1].childs.append(stroke)
|
||||
|
||||
class SVGHandler(BasicHandler):
|
||||
"""SVG handler for parsing final kanji files. It can handle single-kanji files or aggregation files. After parsing, the kanjis are accessible through the kanjis member, indexed by their svg file name."""
|
||||
"""SVG handler for parsing final kanji files. It can handle single-kanji files or aggregation files. After parsing, the kanji are accessible through the kanjis member, indexed by their svg file name."""
|
||||
def __init__(self):
|
||||
BasicHandler.__init__(self)
|
||||
self.kanjis = {}
|
||||
@@ -431,7 +364,7 @@ class SVGHandler(BasicHandler):
|
||||
group = self.groups.pop()
|
||||
# End of kanji?
|
||||
if len(self.groups) == 0:
|
||||
self.currentKanji.root = group
|
||||
self.currentKanji.strokes = group
|
||||
self.currentKanji = None
|
||||
self.groups = []
|
||||
|
||||
|
||||
+41
-18
@@ -43,7 +43,7 @@ kanjivg:phon CDATA #IMPLIED >
|
||||
xmlns:kanjivg CDATA #FIXED "http://kanjivg.tagaini.net"
|
||||
kanjivg:type CDATA #IMPLIED >
|
||||
]>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="109" height="109" viewBox="0 0 109 109" style="fill:none;stroke:#000000;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;">
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="109" height="109" viewBox="0 0 109 109">
|
||||
""")
|
||||
#<defs>
|
||||
#<marker id="Triangle"
|
||||
@@ -54,30 +54,48 @@ kanjivg:type CDATA #IMPLIED >
|
||||
#<path d="M 0 0 L 10 5 L 0 10 z" />
|
||||
#</marker>
|
||||
#</defs>
|
||||
kanji.toSVG(out)
|
||||
out.write("""<g id="StrokePaths" style="fill:none;stroke:#000000;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;">\n""")
|
||||
kanji.outputStrokes(out)
|
||||
out.write("</g>\n");
|
||||
out.write("""<g id="StrokeNumbers" style="font-size:8;fill:#808080">\n""")
|
||||
kanji.outputStrokesNumbers(out)
|
||||
out.write("</g>\n")
|
||||
out.write("</svg>\n")
|
||||
|
||||
# Basic handler to extract the information we need from former SVG files
|
||||
class KanjiStrokeHandler(BasicHandler):
|
||||
def __init__(self):
|
||||
BasicHandler.__init__(self)
|
||||
self.strokes = []
|
||||
self.active = False
|
||||
self.strokesNumbers = []
|
||||
# 0 -> do nothing, 1 -> parse numbers, 2 -> parse strokes
|
||||
self.step = 0
|
||||
|
||||
# Extract position of number
|
||||
def handle_start_text(self, attrs):
|
||||
if self.step != 1: return
|
||||
if not attrs.has_key("transform"): return
|
||||
transformData = attrs["transform"]
|
||||
match = re.match("matrix\(.+ .+ .+ .+ (.+) (.+)\)", transformData)
|
||||
if not match: return
|
||||
self.strokesNumbers.append((float(match.group(1)), float(match.group(2))))
|
||||
|
||||
def handle_start_path(self, attrs):
|
||||
if self.step != 2: return
|
||||
strokeData = attrs["d"]
|
||||
# Replace spaces between digits by the comma separator
|
||||
strokeData = re.sub('(\d) (\d)', '\\1,\\2', strokeData)
|
||||
strokeData = re.sub("[\n\t ]+", "", strokeData)
|
||||
|
||||
self.strokes.append(strokeData)
|
||||
|
||||
def handle_start_g(self, attrs):
|
||||
if attrs.has_key("id") and attrs["id"] == "Vektorbild": self.active = True
|
||||
if attrs.has_key("id"):
|
||||
if attrs["id"] == "StrokeNumbers": self.step = 1
|
||||
elif attrs["id"] == "StrokePaths": self.step = 2
|
||||
|
||||
if __name__ == "__main__":
|
||||
os.mkdir("output")
|
||||
os.mkdir("output/SVG")
|
||||
os.mkdir("output/SVGMismatch")
|
||||
os.mkdir("kanjivg")
|
||||
os.mkdir("kanjivgMismatch")
|
||||
files = os.listdir("XML")
|
||||
handled = set()
|
||||
metComponents = set()
|
||||
@@ -108,23 +126,27 @@ if __name__ == "__main__":
|
||||
kanji = descHandler.kanji
|
||||
desc = kanji.getStrokes()
|
||||
svg = svgHandler.strokes
|
||||
numbers = svgHandler.strokesNumbers
|
||||
if len(svg) != len(numbers):
|
||||
print "Warning: kanji %s has %d strokes but %d numbers!" % (kId, len(svg), len(numbers))
|
||||
|
||||
# Copy SVG into kanji desc
|
||||
for i in range(min(len(desc), len(svg))):
|
||||
desc[i].svg = svg[i]
|
||||
if i < len(numbers): desc[i].numberPos = numbers[i]
|
||||
|
||||
# Add dummy strokes for SVG orphans
|
||||
for i in range(len(desc), len(svg)):
|
||||
s = Stroke(kanji.root)
|
||||
s = Stroke(kanji.strokes)
|
||||
s.stype = "Missing stroke"
|
||||
s.svg = svg[i]
|
||||
kanji.root.childs.append(s)
|
||||
kanji.strokes.childs.append(s)
|
||||
|
||||
if len(desc) != len(svg): dst = "SVGMismatch"
|
||||
else: dst = "SVG"
|
||||
out = codecs.open("output/%s/%s.svg" % (dst, kanji.kId()), "w", "utf-8")
|
||||
if len(desc) != len(svg): dst = "kanjivgMismatch"
|
||||
else: dst = "kanjivg"
|
||||
out = codecs.open("%s/%s.svg" % (dst, kanji.kId()), "w", "utf-8")
|
||||
createSVG(out, kanji)
|
||||
|
||||
|
||||
# Now parse orphan SVGs (probably just kana and romaji)
|
||||
files = os.listdir("SVG")
|
||||
for f in files:
|
||||
@@ -143,9 +165,10 @@ if __name__ == "__main__":
|
||||
parser.parse(os.path.join("SVG", f))
|
||||
|
||||
kanji = Kanji(int(code, 16), variant)
|
||||
kanji.root = StrokeGr(None)
|
||||
kanji.strokes = StrokeGr(None)
|
||||
for s in svgHandler.strokes:
|
||||
stroke = Stroke(kanji.root)
|
||||
stroke = Stroke(kanji.strokes)
|
||||
stroke.svg = s
|
||||
kanji.root.childs.append(stroke)
|
||||
# TODO merge with upper part - kana and romaji should not be considered mismatched
|
||||
kanji.strokes.childs.append(stroke)
|
||||
out = codecs.open("kanjivg/%s.svg" % (kanji.kId(),), "w", "utf-8")
|
||||
createSVG(out, kanji)
|
||||
|
||||
Regular → Executable
+1
-1
@@ -22,7 +22,7 @@
|
||||
import os, datetime, re
|
||||
from kanjivg import licenseString
|
||||
|
||||
__datadir = "output/SVG"
|
||||
__datadir = "kanjivg"
|
||||
|
||||
if __name__ == "__main__":
|
||||
allfiles = os.listdir(__datadir)
|
||||
|
||||
Reference in New Issue
Block a user