From 74a79ace9beba3e9192fe3e27f8105dea0d25968 Mon Sep 17 00:00:00 2001 From: Alexandre Courbot Date: Mon, 27 Dec 2010 15:13:21 +0000 Subject: [PATCH] Continued implementation of new format - viewer now uses it --- createsvgfiles.py | 21 +++---- kanjivg.py | 137 ++++++++++++++++++++++++++++++---------------- mergexml.py | 12 ++-- viewer.py | 65 ++-------------------- 4 files changed, 110 insertions(+), 125 deletions(-) diff --git a/createsvgfiles.py b/createsvgfiles.py index 74deb9af6..418838813 100755 --- a/createsvgfiles.py +++ b/createsvgfiles.py @@ -18,6 +18,7 @@ kanjivg:variant CDATA #IMPLIED kanjivg:partial CDATA #IMPLIED kanjivg:original CDATA #IMPLIED kanjivg:part CDATA #IMPLIED +kanjivg:number CDATA #IMPLIED kanjivg:tradForm CDATA #IMPLIED kanjivg:radicalForm CDATA #IMPLIED kanjivg:position CDATA #IMPLIED @@ -28,16 +29,16 @@ xmlns:kanjivg CDATA #FIXED "http://kanjivg.tagaini.net" kanjivg:type CDATA #IMPLIED > ]> - - - - - """) +# + # + # + # +# kanji.toSVG(out) out.write("\n") @@ -47,6 +48,6 @@ if __name__ == "__main__": kanjis = handler.kanjis.values() for kanji in kanjis: - out = codecs.open("currentdata/SVG/" + str(kanji.id) + ".svg", "w", "utf-8") + out = codecs.open("data/" + str(kanji.id) + ".svg", "w", "utf-8") createSVG(out, kanji) diff --git a/kanjivg.py b/kanjivg.py index b1c14e0ad..255b7a2d8 100644 --- a/kanjivg.py +++ b/kanjivg.py @@ -62,7 +62,7 @@ class Kanji: self.root = None def toSVG(self, out, indent = 0): - self.root.toSVG(out, self.id, [0]) + self.root.toSVG(out, self.id, [0], [1]) def toXML(self, out, indent = 0): out.write("\t" * indent + '\n' % (self.midashi, self.id)) @@ -97,17 +97,10 @@ class StrokeGr: self.childs = [] - def toSVG(self, out, idRoot, idCpt = [0], indent = 0): - if idCpt[0] == 0: - gid = "strokes-" + idRoot - else: - if (self.element): elt = self.element - else: elt = self.original - gid = "c" + idRoot - #if elt: gid += "-" + hex(realord(elt))[2:] - #else: gid += "-xxxx" - gid += "-" + str(idCpt[0]) - idCpt[0] += 1 + def toSVG(self, out, rootId, groupCpt = [0], strCpt = [1], indent = 0): + gid = rootId + "-g" + str(groupCpt[0]) + groupCpt[0] += 1 + idString = ' id="%s"' % (gid) eltString = "" if self.element: eltString = ' kanjivg:element="%s"' % (self.element) @@ -134,38 +127,10 @@ class StrokeGr: out.write("\t" * indent + '\n' % (idString, eltString, partString, numberString, variantString, origString, partialString, tradFormString, radicalFormString, posString, radString, phonString)) for child in self.childs: - child.toSVG(out, idRoot, idCpt, indent + 1) + child.toSVG(out, rootId, groupCpt, strCpt, indent + 1) out.write("\t" * indent + '\n') - def toXML(self, out, indent = 0): - eltString = "" - if self.element: eltString = ' element="%s"' % (self.element) - variantString = "" - if self.variant: variantString = ' variant="true"' - partialString = "" - if self.partial: partialString = ' partial="true"' - origString = "" - if self.original: origString = ' original="%s"' % (self.original) - partString = "" - if self.part: partString = ' part="%d"' % (self.part) - numberString = "" - if self.number: numberString = ' number="%d"' % (self.number) - tradFormString = "" - if self.tradForm: tradFormString = ' tradForm="true"' - radicalFormString = "" - if self.radicalForm: radicalFormString = ' radicalForm="true"' - posString = "" - if self.position: posString = ' position="%s"' % (self.position) - radString = "" - if self.radical: radString = ' radical="%s"' % (self.radical) - phonString = "" - if self.phon: phonString = ' phon="%s"' % (self.phon) - out.write("\t" * indent + '\n' % (eltString, partString, numberString, variantString, origString, partialString, tradFormString, radicalFormString, posString, radString, phonString)) - - for child in self.childs: child.toXML(out, indent + 1) - - out.write("\t" * indent + '\n') def components(self, simplified = True, recursive = False, level = 0): ret = [] @@ -232,15 +197,12 @@ class Stroke: self.stype = None self.svg = None - def toSVG(self, out, idRoot, idCpt, indent = 0): - pid = "s" + idRoot + "-" + str(idCpt[0]) - idCpt[0] += 1 + def toSVG(self, out, rootId, groupCpt, strCpt, indent = 0): + pid = rootId + "-s" + str(strCpt[0]) + strCpt[0] += 1 if not self.svg: out.write("\t" * indent + '\n' % (pid, self.stype)) - else: out.write("\t" * indent + '\n' % (pid, self.svg, self.stype)) + else: out.write("\t" * indent + '\n' % (pid, self.stype, self.svg)) - def toXML(self, out, indent = 0): - if not self.svg: out.write("\t" * indent + '\n' % (self.stype)) - else: out.write("\t" * indent + '\n' % (self.stype, self.svg)) class StructuredKanji: """A more structured format for the kanji, where all the parts of groups are grouped together.""" @@ -408,3 +370,82 @@ class KanjisHandler(BasicHandler): stroke.stype = unicode(attrs["type"]) if attrs.has_key("path"): stroke.svg = unicode(attrs["path"]) self.groups[-1].childs.append(stroke) + +class SVGHandler(BasicHandler): + """SVG handler for parsing final kanji files. It can handle single-kanji files or aggregation files. After parsing, the kanjis are accessible through the kanjis member, indexed by their svg file name.""" + def __init__(self): + BasicHandler.__init__(self) + self.kanjis = {} + self.currentKanji = None + self.groups = [] + self.metComponents = set() + + def handle_start_g(self, attrs): + # Special case for handling the root + if len(self.groups) == 0: + id = hex(realord(attrs["kanjivg:element"]))[2:] + self.currentKanji = Kanji(id) + self.kanjis[id] = self.currentKanji + self.compCpt = {} + parent = None + else: parent = self.groups[-1] + + group = StrokeGr(parent) + # Now parse group attributes + if attrs.has_key("kanjivg:element"): group.element = unicode(attrs["kanjivg:element"]) + if attrs.has_key("kanjivg:variant"): group.variant = str(attrs["kanjivg:variant"]) + if attrs.has_key("kanjivg:partial"): group.partial = str(attrs["kanjivg:partial"]) + if attrs.has_key("kanjivg:original"): group.original = unicode(attrs["kanjivg:original"]) + if attrs.has_key("kanjivg:part"): group.part = int(attrs["kanjivg:part"]) + if attrs.has_key("kanjivg:number"): group.number = int(attrs["kanjivg:number"]) + if attrs.has_key("kanjivg:tradForm") and str(attrs["kanjivg:tradForm"]) == "true": group.tradForm = True + if attrs.has_key("kanjivg:radicalForm") and str(attrs["kanjivg:radicalForm"]) == "true": group.radicalForm = True + if attrs.has_key("kanjivg:position"): group.position = unicode(attrs["kanjivg:position"]) + if attrs.has_key("kanjivg:radical"): group.radical = unicode(attrs["kanjivg:radical"]) + if attrs.has_key("kanjivg:phon"): group.phon = unicode(attrs["kanjivg:phon"]) + + self.groups.append(group) + + if group.element: self.metComponents.add(group.element) + if group.original: self.metComponents.add(group.original) + + if group.number: + if not group.part: print "%s: Number specified, but part missing" % (self.currentKanji.id) + # The group must exist already + if group.part > 1: + if not self.compCpt.has_key(group.element + str(group.number)): + print "%s: Missing numbered group" % (self.currentKanji.id) + elif self.compCpt[group.element + str(group.number)] != group.part - 1: + print "%s: Incorrectly numbered group" % (self.currentKanji.id) + # The group must not exist + else: + if self.compCpt.has_key(group.element + str(group.number)): + print "%s: Duplicate numbered group" % (self.currentKanji.id) + self.compCpt[group.element + str(group.number)] = group.part + # No number, just a part - groups restart with part 1, otherwise must + # increase correctly + elif group.part: + # The group must exist already + if group.part > 1: + if not self.compCpt.has_key(group.element): + print "%s: Incorrectly started multi-part group" % (self.currentKanji.id) + elif self.compCpt[group.element] != group.part - 1: + print "%s: Incorrectly splitted multi-part group" % (self.currentKanji.id) + self.compCpt[group.element] = group.part + + def handle_end_g(self): + group = self.groups.pop() + # End of kanji? + if len(self.groups) == 0: + self.currentKanji.root = group + self.currentKanji = None + self.groups = [] + + + def handle_start_path(self, attrs): + if len(self.groups) == 0: parent = None + else: parent = self.groups[-1] + stroke = Stroke(parent) + stroke.stype = unicode(attrs["kanjivg:type"]) + if attrs.has_key("d"): stroke.svg = unicode(attrs["d"]) + self.groups[-1].childs.append(stroke) diff --git a/mergexml.py b/mergexml.py index fb785bac6..988805519 100755 --- a/mergexml.py +++ b/mergexml.py @@ -43,10 +43,10 @@ if __name__ == "__main__": handled = set() metComponents = set() for f in files: - # Let's skip the variations out of the process for now... - if len(f) > 10: continue - + # Let's keep the variations out of the process for now... + if '-' in f: continue if not f.endswith(".xml"): continue + descHandler = KanjisHandler() xml.sax.parse(os.path.join("XML", f), descHandler) handled.add(realchr(int(f[:-4], 16))) @@ -79,10 +79,10 @@ if __name__ == "__main__": # Now parse orphan SVGs (probably just kana and romaji) files = os.listdir("SVG") for f in files: - # Let's skip the variations out of the process for now... - if len(f) > 10: continue - + # Let's keep the variations out of the process for now... + if '-' in f: continue if not f.endswith(".svg"): continue + if realchr(int(f[:-4], 16)) in handled: continue parser = xml.sax.make_parser() svgHandler = KanjiStrokeHandler() diff --git a/viewer.py b/viewer.py index 094c7bca0..abd86a91d 100755 --- a/viewer.py +++ b/viewer.py @@ -20,47 +20,16 @@ import sys, os, xml.sax, re, codecs, datetime from PyQt4 import QtGui, QtCore from kanjivg import * -class KanjiStrokeHandler(BasicHandler): - def __init__(self): - BasicHandler.__init__(self) - self.strokes = [] - self.active = False - - def handle_start_path(self, attrs): - strokeData = attrs["d"] - # Replace spaces between digits by the comma separator - strokeData = re.sub('(\d) (\d)', '\\1,\\2', strokeData) - strokeData = re.sub("[\n\t ]+", "", strokeData) - - self.strokes.append(strokeData) - - def handle_start_g(self, attrs): - if attrs.has_key("id") and attrs["id"] == "Vektorbild": self.active = True - def loadKanji(code): f = str(code) - descHandler = KanjisHandler() - xml.sax.parse(os.path.join("XML", f + ".xml"), descHandler) - + svgHandler = SVGHandler() parser = xml.sax.make_parser() - svgHandler = KanjiStrokeHandler() parser.setContentHandler(svgHandler) parser.setFeature(xml.sax.handler.feature_external_ges, False) parser.setFeature(xml.sax.handler.feature_external_pes, False) - svgFile = os.path.join("SVG", f + ".svg") - if os.path.exists(svgFile): - parser.parse(svgFile) - - kanji = descHandler.kanjis.values()[0] - desc = kanji.getStrokes() - svg = svgHandler.strokes - if len(desc) != len(svg): - print("Stroke count mismatch!") - sys.exit(1) - - for stroke, path in zip(desc, svg): - stroke.svg = path + parser.parse(os.path.join("data", f + ".svg")) + kanji = svgHandler.kanjis.values()[0] return kanji from PyQt4.QtCore import QPointF @@ -366,33 +335,7 @@ class MainWindow(QtGui.QWidget): self.canvas.update() -def createSVG(out, kanji): - out.write('\n') - out.write("\n\n") - out.write(""" - -]> - -""") - kanji.toSVG(out) - out.write("\n") +from createsvgfiles import createSVG if __name__ == "__main__": if len(sys.argv) != 2: