mirror of
https://github.com/KanjiVG/kanjivg.git
synced 2026-04-21 13:00:43 +02:00
Remove old Python scripts
This commit is contained in:
103
harmonize-svg.py
103
harmonize-svg.py
@@ -1,103 +0,0 @@
|
||||
#!/usr/bin/python2
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2009 Alexandre Courbot
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
from xml.etree.ElementTree import XMLID, tostring
|
||||
import re, codecs, os, string, kanjivg, os.path, sys
|
||||
|
||||
def findText(elt):
|
||||
if elt.text: return elt.text
|
||||
else:
|
||||
childs = elt.getchildren()
|
||||
if len(childs): return findText(childs[0])
|
||||
else: return None
|
||||
|
||||
class Parser:
|
||||
def __init__(self, content):
|
||||
self.content = content
|
||||
|
||||
def parse(self):
|
||||
while 1:
|
||||
match = re.search('\$\$(\w*)', self.content)
|
||||
if not match: break
|
||||
fname = 'callback_' + match.group(1)
|
||||
if hasattr(self, fname):
|
||||
rfunc = getattr(self, fname)
|
||||
ret = rfunc()
|
||||
self.content = self.content[:match.start(0)] + ret + self.content[match.end(0):]
|
||||
else: self.content = self.content[:match.start(0)] + self.content[match.end(0):]
|
||||
|
||||
class TemplateParser(Parser):
|
||||
def __init__(self, content, kanji, document, groups):
|
||||
Parser.__init__(self, content)
|
||||
self.kanji = kanji
|
||||
self.document = document
|
||||
self.groups = groups
|
||||
|
||||
def callback_kanji(self):
|
||||
return self.kanji
|
||||
|
||||
def callback_strokenumbers(self):
|
||||
if not self.groups.has_key("StrokeNumbers"):
|
||||
print "Error - no StrokeNumbers group for kanji %s (%s)" % (self.kanji, hex(kanjivg.realord(self.kanji)))
|
||||
return ""
|
||||
numbers = self.groups["StrokeNumbers"]
|
||||
elts = numbers.findall(".//{http://www.w3.org/2000/svg}text")
|
||||
strs = []
|
||||
for elt in elts:
|
||||
attrs = []
|
||||
if elt.attrib.has_key("transform"): attrs.append(' transform="%s"' % (elt.attrib["transform"],))
|
||||
if elt.attrib.has_key("x"): attrs.append(' x="%s"' % (elt.attrib["x"],))
|
||||
if elt.attrib.has_key("y"): attrs.append(' y="%s"' % (elt.attrib["y"],))
|
||||
strs.append('<text%s>%s</text>' % (''.join(attrs), findText(elt)))
|
||||
return "\n\t\t".join(strs)
|
||||
|
||||
def callback_strokepaths(self):
|
||||
if not self.groups.has_key("StrokePaths"):
|
||||
print "Error - no StrokePaths group for kanji %s (%s)" % (self.kanji, hex(kanjivg.realord(self.kanji)))
|
||||
return ""
|
||||
paths = self.groups["StrokePaths"]
|
||||
elts = paths.findall(".//{http://www.w3.org/2000/svg}path")
|
||||
strs = []
|
||||
for elt in elts:
|
||||
d = elt.attrib["d"]
|
||||
d = re.sub('(\d) (\d)', '\\1,\\2', d)
|
||||
d = re.sub("[\n\t ]+", "", d)
|
||||
strs.append('<path d="%s"/>' % (d,))
|
||||
return "\n\t\t".join(strs)
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Only process files given as argument...
|
||||
if len(sys.argv) > 1:
|
||||
filesToProceed = sys.argv[1:]
|
||||
# Or do the whole SVG set if no argument is given
|
||||
else:
|
||||
filesToProceed = []
|
||||
for f in os.listdir("SVG"):
|
||||
if not f.endswith(".svg"): continue
|
||||
filesToProceed.append(os.path.join("SVG", f))
|
||||
|
||||
for f in filesToProceed:
|
||||
fname = f.split(os.path.sep)[-1]
|
||||
if fname[4] in "0123456789abcdef":
|
||||
kanji = kanjivg.realchr(int(fname[:5], 16))
|
||||
else: kanji = kanjivg.realchr(int(fname[:4], 16))
|
||||
|
||||
document, groups = XMLID(open(f).read())
|
||||
tpp = TemplateParser(open("template.svg").read(), kanji, document, groups)
|
||||
tpp.parse()
|
||||
out = codecs.open(f, "w", "utf-8")
|
||||
out.write(tpp.content)
|
||||
381
kanjivg.py
381
kanjivg.py
@@ -1,381 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2009-2013 Alexandre Courbot
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
from xmlhandler import *
|
||||
|
||||
# Sample licence header
|
||||
licenseString = """Copyright (C) 2009-2013 Ulrich Apel.
|
||||
This work is distributed under the conditions of the Creative Commons
|
||||
Attribution-Share Alike 3.0 Licence. This means you are free:
|
||||
* to Share - to copy, distribute and transmit the work
|
||||
* to Remix - to adapt the work
|
||||
|
||||
Under the following conditions:
|
||||
* Attribution. You must attribute the work by stating your use of KanjiVG in
|
||||
your own copyright header and linking to KanjiVG's website
|
||||
(http://kanjivg.tagaini.net)
|
||||
* Share Alike. If you alter, transform, or build upon this work, you may
|
||||
distribute the resulting work only under the same or similar license to this
|
||||
one.
|
||||
|
||||
See http://creativecommons.org/licenses/by-sa/3.0/ for more details."""
|
||||
|
||||
def isKanji(v):
|
||||
return (v >= 0x4E00 and v <= 0x9FC3) or (v >= 0x3400 and v <= 0x4DBF) or (v >= 0xF900 and v <= 0xFAD9) or (v >= 0x2E80 and v <= 0x2EFF) or (v >= 0x20000 and v <= 0x2A6DF)
|
||||
|
||||
# Returns the unicode of a character in a unicode string, taking surrogate pairs into account
|
||||
def realord(s, pos = 0):
|
||||
if s == None: return None
|
||||
code = ord(s[pos])
|
||||
if code >= 0xD800 and code < 0xDC00:
|
||||
if (len(s) <= pos + 1):
|
||||
print "realord warning: missing surrogate character"
|
||||
return 0
|
||||
code2 = ord(s[pos + 1])
|
||||
if code2 >= 0xDC00 and code < 0xE000:
|
||||
code = 0x10000 + ((code - 0xD800) << 10) + (code2 - 0xDC00)
|
||||
return code
|
||||
|
||||
def realchr(i):
|
||||
if i < 0x10000: return unichr(i)
|
||||
else: return unichr(((i - 0x10000) >> 10) + 0xD800) + unichr(0xDC00 + (i & 0x3ff))
|
||||
|
||||
class Kanji:
|
||||
"""Describes a kanji. The root stroke group is accessible from the strokes member."""
|
||||
def __init__(self, code, variant):
|
||||
# Unicode of char being represented (int)
|
||||
self.code = code
|
||||
# Variant of the character, if any
|
||||
self.variant = variant
|
||||
self.strokes = None
|
||||
|
||||
# String identifier used to uniquely identify the kanji
|
||||
def kId(self):
|
||||
ret = "%05x" % (self.code,)
|
||||
if self.variant: ret += "-%s" % (self.variant,)
|
||||
return ret
|
||||
|
||||
def outputStrokesNumbers(self, out, indent = 0):
|
||||
strokes = self.getStrokes()
|
||||
cpt = 1
|
||||
for stroke in strokes:
|
||||
stroke.numberToSVG(out, cpt, indent + 1)
|
||||
cpt += 1
|
||||
|
||||
def outputStrokes(self, out, indent = 0):
|
||||
self.strokes.toSVG(out, self.kId(), [0], [1])
|
||||
|
||||
def simplify(self):
|
||||
self.strokes.simplify()
|
||||
|
||||
def getStrokes(self):
|
||||
return self.strokes.getStrokes()
|
||||
|
||||
|
||||
class StrokeGr:
|
||||
"""Describes a stroke group belonging to a kanji as closely as possible to the XML format. Sub-stroke groups or strokes are available in the childs member. They can either be of class StrokeGr or Stroke so their type should be checked."""
|
||||
def __init__(self, parent):
|
||||
self.parent = parent
|
||||
if parent: parent.childs.append(self)
|
||||
# Element of strokegr
|
||||
self.element = None
|
||||
# A more common, safer element this one derives of
|
||||
self.original = None
|
||||
self.part = None
|
||||
self.number = None
|
||||
self.variant = False
|
||||
self.partial = False
|
||||
self.tradForm = False
|
||||
self.radicalForm = False
|
||||
self.position = None
|
||||
self.radical = None
|
||||
self.phon = None
|
||||
|
||||
self.childs = []
|
||||
|
||||
def toSVG(self, out, rootId, groupCpt = [0], strCpt = [1], indent = 0):
|
||||
gid = rootId
|
||||
if groupCpt[0] != 0: gid += "-g" + str(groupCpt[0])
|
||||
groupCpt[0] += 1
|
||||
|
||||
idString = ' id="kvg:%s"' % (gid)
|
||||
eltString = ""
|
||||
if self.element: eltString = ' kvg:element="%s"' % (self.element)
|
||||
variantString = ""
|
||||
if self.variant: variantString = ' kvg:variant="true"'
|
||||
partialString = ""
|
||||
if self.partial: partialString = ' kvg:partial="true"'
|
||||
origString = ""
|
||||
if self.original: origString = ' kvg:original="%s"' % (self.original)
|
||||
partString = ""
|
||||
if self.part: partString = ' kvg:part="%d"' % (self.part)
|
||||
numberString = ""
|
||||
if self.number: numberString = ' kvg:number="%d"' % (self.number)
|
||||
tradFormString = ""
|
||||
if self.tradForm: tradFormString = ' kvg:tradForm="true"'
|
||||
radicalFormString = ""
|
||||
if self.radicalForm: radicalFormString = ' kvg:radicalForm="true"'
|
||||
posString = ""
|
||||
if self.position: posString = ' kvg:position="%s"' % (self.position)
|
||||
radString = ""
|
||||
if self.radical: radString = ' kvg:radical="%s"' % (self.radical)
|
||||
phonString = ""
|
||||
if self.phon: phonString = ' kvg:phon="%s"' % (self.phon)
|
||||
out.write("\t" * indent + '<g%s%s%s%s%s%s%s%s%s%s%s%s>\n' % (idString, eltString, partString, numberString, variantString, origString, partialString, tradFormString, radicalFormString, posString, radString, phonString))
|
||||
|
||||
for child in self.childs:
|
||||
child.toSVG(out, rootId, groupCpt, strCpt, indent + 1)
|
||||
|
||||
out.write("\t" * indent + '</g>\n')
|
||||
|
||||
|
||||
def components(self, simplified = True, recursive = False, level = 0):
|
||||
ret = []
|
||||
childsComp = []
|
||||
for child in self.childs:
|
||||
if isinstance(child, StrokeGr):
|
||||
found = False
|
||||
# Can we find the component in the child?
|
||||
if simplified and child.original: ret.append(child.original); found = True
|
||||
elif child.element: ret.append(child.element); found = True
|
||||
# If not, the components we are looking for are the child's
|
||||
# components - we also do that if we asked all the sub-components of the group
|
||||
if not found or recursive:
|
||||
newLevel = level
|
||||
if found: newLevel += 1
|
||||
childsComp += child.components(simplified, recursive, newLevel)
|
||||
if recursive and not len(ret) == 0: ret = [ level ] + ret + childsComp
|
||||
return ret
|
||||
|
||||
def simplify(self):
|
||||
for child in self.childs:
|
||||
if isinstance(child, StrokeGr): child.simplify()
|
||||
if len(self.childs) == 1 and isinstance(self.childs[0], StrokeGr):
|
||||
# Check if there is no conflict
|
||||
if child.element and self.element and child.element != self.element: return
|
||||
if child.original and self.original and child.original != self.original: return
|
||||
# Parts cannot be merged
|
||||
if child.part and self.part and self.part != child.part: return
|
||||
if child.variant and self.variant and child.variant != self.variant: return
|
||||
if child.partial and self.partial and child.partial != self.partial: return
|
||||
if child.tradForm and self.tradForm and child.tradForm != self.tradForm: return
|
||||
if child.radicalForm and self.radicalForm and child.radicalForm != self.radicalForm: return
|
||||
# We want to preserve inner identical positions - we may have something at the top
|
||||
# of another top element, for instance.
|
||||
if child.position and self.position: return
|
||||
if child.radical and self.radical and child.radical != self.radical: return
|
||||
if child.phon and self.phon and child.phon != self.phon: return
|
||||
|
||||
# Ok, let's merge!
|
||||
child = self.childs[0]
|
||||
self.childs = child.childs
|
||||
if child.element: self.element = child.element
|
||||
if child.original: self.original = child.original
|
||||
if child.part: self.part = child.part
|
||||
if child.variant: self.variant = child.variant
|
||||
if child.partial: self.partial = child.partial
|
||||
if child.tradForm: self.tradForm = child.tradForm
|
||||
if child.radicalForm: self.radicalForm = child.radicalForm
|
||||
if child.position: self.position = child.position
|
||||
if child.radical: self.radical = child.radical
|
||||
if child.phon: self.phon = child.phon
|
||||
|
||||
def getStrokes(self):
|
||||
ret = []
|
||||
for child in self.childs:
|
||||
if isinstance(child, StrokeGr): ret += child.getStrokes()
|
||||
else: ret.append(child)
|
||||
return ret
|
||||
|
||||
|
||||
class Stroke:
|
||||
"""A single stroke, containing its type and (optionally) its SVG data."""
|
||||
def __init__(self, parent):
|
||||
self.stype = None
|
||||
self.svg = None
|
||||
self.numberPos = None
|
||||
|
||||
def numberToSVG(self, out, number, indent = 0):
|
||||
if self.numberPos:
|
||||
out.write("\t" * indent + '<text transform="matrix(1 0 0 1 %.2f %.2f)">%d</text>\n' % (self.numberPos[0], self.numberPos[1], number))
|
||||
|
||||
def toSVG(self, out, rootId, groupCpt, strCpt, indent = 0):
|
||||
pid = rootId + "-s" + str(strCpt[0])
|
||||
strCpt[0] += 1
|
||||
s = "\t" * indent + '<path id="kvg:%s"' % (pid,)
|
||||
if self.stype: s += ' kvg:type="%s"' % (self.stype,)
|
||||
if self.svg: s += ' d="%s"' % (self.svg)
|
||||
s += '/>\n'
|
||||
out.write(s)
|
||||
|
||||
class KanjisHandler(BasicHandler):
|
||||
"""XML handler for parsing kanji files. It can handle single-kanji files or aggregation files. After parsing, the kanjis are accessible through the kanjis member, indexed by their svg file name."""
|
||||
def __init__(self, code, variant):
|
||||
BasicHandler.__init__(self)
|
||||
self.kanji = Kanji(code, variant)
|
||||
self.groups = []
|
||||
self.compCpt = {}
|
||||
self.metComponents = set()
|
||||
|
||||
def handle_start_kanji(self, attrs):
|
||||
pass
|
||||
|
||||
def handle_end_kanji(self):
|
||||
if len(self.groups) != 0:
|
||||
print "WARNING: stroke groups remaining after reading kanji!"
|
||||
self.groups = []
|
||||
|
||||
def handle_start_strokegr(self, attrs):
|
||||
if len(self.groups) == 0: parent = None
|
||||
else: parent = self.groups[-1]
|
||||
group = StrokeGr(parent)
|
||||
|
||||
# Now parse group attributes
|
||||
if attrs.has_key("element"): group.element = unicode(attrs["element"])
|
||||
if attrs.has_key("variant"): group.variant = str(attrs["variant"])
|
||||
if attrs.has_key("partial"): group.partial = str(attrs["partial"])
|
||||
if attrs.has_key("original"): group.original = unicode(attrs["original"])
|
||||
if attrs.has_key("part"): group.part = int(attrs["part"])
|
||||
if attrs.has_key("number"): group.number = int(attrs["number"])
|
||||
if attrs.has_key("tradForm") and str(attrs["tradForm"]) == "true": group.tradForm = True
|
||||
if attrs.has_key("radicalForm") and str(attrs["radicalForm"]) == "true": group.radicalForm = True
|
||||
if attrs.has_key("position"): group.position = unicode(attrs["position"])
|
||||
if attrs.has_key("radical"): group.radical = unicode(attrs["radical"])
|
||||
if attrs.has_key("phon"): group.phon = unicode(attrs["phon"])
|
||||
|
||||
self.groups.append(group)
|
||||
|
||||
if group.element: self.metComponents.add(group.element)
|
||||
if group.original: self.metComponents.add(group.original)
|
||||
|
||||
if group.number:
|
||||
if not group.part: print "%s: Number specified, but part missing" % (self.kanji.kId())
|
||||
# The group must exist already
|
||||
if group.part > 1:
|
||||
if not self.compCpt.has_key(group.element + str(group.number)):
|
||||
print "%s: Missing numbered group" % (self.kanji.kId())
|
||||
elif self.compCpt[group.element + str(group.number)] != group.part - 1:
|
||||
print "%s: Incorrectly numbered group" % (self.kanji.kId())
|
||||
# The group must not exist
|
||||
else:
|
||||
if self.compCpt.has_key(group.element + str(group.number)):
|
||||
print "%s: Duplicate numbered group" % (self.kanji.kId())
|
||||
self.compCpt[group.element + str(group.number)] = group.part
|
||||
# No number, just a part - groups restart with part 1, otherwise must
|
||||
# increase correctly
|
||||
elif group.part:
|
||||
# The group must exist already
|
||||
if group.part > 1:
|
||||
if not self.compCpt.has_key(group.element):
|
||||
print "%s: Incorrectly started multi-part group" % (self.kanji.kId())
|
||||
elif self.compCpt[group.element] != group.part - 1:
|
||||
print "%s: Incorrectly splitted multi-part group" % (self.kanji.kId())
|
||||
self.compCpt[group.element] = group.part
|
||||
|
||||
def handle_end_strokegr(self):
|
||||
group = self.groups.pop()
|
||||
if len(self.groups) == 0:
|
||||
if self.kanji.strokes:
|
||||
print "WARNING: overwriting root of kanji!"
|
||||
self.kanji.strokes = group
|
||||
|
||||
def handle_start_stroke(self, attrs):
|
||||
if len(self.groups) == 0: parent = None
|
||||
else: parent = self.groups[-1]
|
||||
stroke = Stroke(parent)
|
||||
stroke.stype = unicode(attrs["type"])
|
||||
if attrs.has_key("path"): stroke.svg = unicode(attrs["path"])
|
||||
self.groups[-1].childs.append(stroke)
|
||||
|
||||
class SVGHandler(BasicHandler):
|
||||
"""SVG handler for parsing final kanji files. It can handle single-kanji files or aggregation files. After parsing, the kanji are accessible through the kanjis member, indexed by their svg file name."""
|
||||
def __init__(self):
|
||||
BasicHandler.__init__(self)
|
||||
self.kanjis = {}
|
||||
self.currentKanji = None
|
||||
self.groups = []
|
||||
self.metComponents = set()
|
||||
|
||||
def handle_start_g(self, attrs):
|
||||
# Special case for handling the root
|
||||
if len(self.groups) == 0:
|
||||
id = hex(realord(attrs["kvg:element"]))[2:]
|
||||
self.currentKanji = Kanji(id)
|
||||
self.kanjis[id] = self.currentKanji
|
||||
self.compCpt = {}
|
||||
parent = None
|
||||
else: parent = self.groups[-1]
|
||||
|
||||
group = StrokeGr(parent)
|
||||
# Now parse group attributes
|
||||
if attrs.has_key("kvg:element"): group.element = unicode(attrs["kvg:element"])
|
||||
if attrs.has_key("kvg:variant"): group.variant = str(attrs["kvg:variant"])
|
||||
if attrs.has_key("kvg:partial"): group.partial = str(attrs["kvg:partial"])
|
||||
if attrs.has_key("kvg:original"): group.original = unicode(attrs["kvg:original"])
|
||||
if attrs.has_key("kvg:part"): group.part = int(attrs["kvg:part"])
|
||||
if attrs.has_key("kvg:number"): group.number = int(attrs["kvg:number"])
|
||||
if attrs.has_key("kvg:tradForm") and str(attrs["kvg:tradForm"]) == "true": group.tradForm = True
|
||||
if attrs.has_key("kvg:radicalForm") and str(attrs["kvg:radicalForm"]) == "true": group.radicalForm = True
|
||||
if attrs.has_key("kvg:position"): group.position = unicode(attrs["kvg:position"])
|
||||
if attrs.has_key("kvg:radical"): group.radical = unicode(attrs["kvg:radical"])
|
||||
if attrs.has_key("kvg:phon"): group.phon = unicode(attrs["kvg:phon"])
|
||||
|
||||
self.groups.append(group)
|
||||
|
||||
if group.element: self.metComponents.add(group.element)
|
||||
if group.original: self.metComponents.add(group.original)
|
||||
|
||||
if group.number:
|
||||
if not group.part: print "%s: Number specified, but part missing" % (self.currentKanji.kId())
|
||||
# The group must exist already
|
||||
if group.part > 1:
|
||||
if not self.compCpt.has_key(group.element + str(group.number)):
|
||||
print "%s: Missing numbered group" % (self.currentKanji.kId())
|
||||
elif self.compCpt[group.element + str(group.number)] != group.part - 1:
|
||||
print "%s: Incorrectly numbered group" % (self.currentKanji.kId())
|
||||
# The group must not exist
|
||||
else:
|
||||
if self.compCpt.has_key(group.element + str(group.number)):
|
||||
print "%s: Duplicate numbered group" % (self.currentKanji.kId())
|
||||
self.compCpt[group.element + str(group.number)] = group.part
|
||||
# No number, just a part - groups restart with part 1, otherwise must
|
||||
# increase correctly
|
||||
elif group.part:
|
||||
# The group must exist already
|
||||
if group.part > 1:
|
||||
if not self.compCpt.has_key(group.element):
|
||||
print "%s: Incorrectly started multi-part group" % (self.currentKanji.kId())
|
||||
elif self.compCpt[group.element] != group.part - 1:
|
||||
print "%s: Incorrectly splitted multi-part group" % (self.currentKanji.kId())
|
||||
self.compCpt[group.element] = group.part
|
||||
|
||||
def handle_end_g(self):
|
||||
group = self.groups.pop()
|
||||
# End of kanji?
|
||||
if len(self.groups) == 0:
|
||||
self.currentKanji.strokes = group
|
||||
self.currentKanji = None
|
||||
self.groups = []
|
||||
|
||||
|
||||
def handle_start_path(self, attrs):
|
||||
if len(self.groups) == 0: parent = None
|
||||
else: parent = self.groups[-1]
|
||||
stroke = Stroke(parent)
|
||||
stroke.stype = unicode(attrs["kvg:type"])
|
||||
if attrs.has_key("d"): stroke.svg = unicode(attrs["d"])
|
||||
self.groups[-1].childs.append(stroke)
|
||||
111
kvg.py
111
kvg.py
@@ -1,111 +0,0 @@
|
||||
#!/usr/bin/python2
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2011-2013 Alexandre Courbot
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import os, os.path, sys, codecs, re, datetime
|
||||
from kanjivg import licenseString
|
||||
|
||||
pathre = re.compile(r'<path .*d="([^"]*)".*/>')
|
||||
|
||||
helpString = """Usage: %s <command> [ kanji files ]
|
||||
Recognized commands:
|
||||
split file1 [ file2 ... ] extract path data into a -paths suffixed file
|
||||
merge file1 [ file2 ... ] merge path data from -paths suffixed file
|
||||
release create single release file""" % (sys.argv[0],)
|
||||
|
||||
def createPathsSVG(f):
|
||||
s = codecs.open(f, "r", "utf-8").read()
|
||||
paths = pathre.findall(s)
|
||||
out = codecs.open(f[:-4] + "-paths.svg", "w", "utf-8")
|
||||
out.write("""<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" []>
|
||||
<svg xmlns="http://www.w3.org/2000/svg" width="109" height="109" viewBox="0 0 109 109" style="fill:none;stroke:#000000;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;">\n""")
|
||||
i = 1
|
||||
for path in paths:
|
||||
out.write('<!--%2d--><path d="%s"/>\n' % (i, path))
|
||||
i += 1
|
||||
out.write("</svg>")
|
||||
|
||||
def mergePathsSVG(f):
|
||||
pFile = f[:-4] + "-paths.svg"
|
||||
if not os.path.exists(pFile):
|
||||
print "%s does not exist!" % (pFile,)
|
||||
return
|
||||
s = codecs.open(pFile, "r", "utf-8").read()
|
||||
paths = pathre.findall(s)
|
||||
s = codecs.open(f, "r", "utf-8").read()
|
||||
pos = 0
|
||||
while True:
|
||||
match = pathre.search(s[pos:])
|
||||
if match and len(paths) == 0 or not match and len(paths) > 0:
|
||||
print "Paths count mismatch for %s" % (f,)
|
||||
return
|
||||
if not match and len(paths) == 0: break
|
||||
s = s[:pos + match.start(1)] + paths[0] + s[pos + match.end(1):]
|
||||
pos += match.start(1) + len(paths[0])
|
||||
del paths[0]
|
||||
codecs.open(f, "w", "utf-8").write(s)
|
||||
|
||||
def release():
|
||||
datadir = "kanji"
|
||||
idMatchString = "<g id=\"kvg:StrokePaths_"
|
||||
allfiles = os.listdir(datadir)
|
||||
files = []
|
||||
for f in allfiles:
|
||||
if len(f) == 9: files.append(f)
|
||||
del allfiles
|
||||
files.sort()
|
||||
|
||||
out = open("kanjivg.xml", "w")
|
||||
out.write('<?xml version="1.0" encoding="UTF-8"?>\n')
|
||||
out.write("<!--\n")
|
||||
out.write(licenseString)
|
||||
out.write("\nThis file has been generated on %s, using the latest KanjiVG data\nto this date." % (datetime.date.today()))
|
||||
out.write("\n-->\n")
|
||||
out.write("<kanjivg xmlns:kvg='http://kanjivg.tagaini.net'>\n")
|
||||
for f in files:
|
||||
data = open(os.path.join(datadir, f)).read()
|
||||
data = data[data.find("<svg "):]
|
||||
data = data[data.find(idMatchString) + len(idMatchString):]
|
||||
kidend = data.find("\"")
|
||||
data = "<kanji id=\"kvg:kanji_%s\">" % (data[:kidend],) + data[data.find("\n"):data.find('<g id="kvg:StrokeNumbers_') - 5] + "</kanji>\n"
|
||||
out.write(data)
|
||||
out.write("</kanjivg>\n")
|
||||
out.close()
|
||||
print("%d kanji emitted" % len(files))
|
||||
|
||||
actions = {
|
||||
"split": (createPathsSVG, 2),
|
||||
"merge": (mergePathsSVG, 2),
|
||||
"release": (release, 1)
|
||||
}
|
||||
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) < 2 or sys.argv[1] not in actions.keys() or \
|
||||
len(sys.argv) <= actions[sys.argv[1]][1]:
|
||||
print helpString
|
||||
sys.exit(0)
|
||||
|
||||
action = actions[sys.argv[1]][0]
|
||||
files = sys.argv[2:]
|
||||
|
||||
if len(files) == 0: action()
|
||||
else:
|
||||
for f in files:
|
||||
if not os.path.exists(f):
|
||||
print "%s does not exist!" % (f,)
|
||||
continue
|
||||
action(f)
|
||||
@@ -1,50 +0,0 @@
|
||||
#!/usr/bin/python2
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2009 Alexandre Courbot
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import os, codecs, xml.sax
|
||||
from kanjivg import *
|
||||
|
||||
def addComponents(strokegr, compSet):
|
||||
if strokegr.element: compSet.add(strokegr.element)
|
||||
if strokegr.original: compSet.add(strokegr.original)
|
||||
for child in strokegr.childs:
|
||||
if isinstance(child, StrokeGr):
|
||||
addComponents(child, compSet)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Read all kanjis
|
||||
handler = KanjisHandler()
|
||||
xml.sax.parse("kanjivg.xml", handler)
|
||||
kanjis = handler.kanjis.values()
|
||||
|
||||
kanjis.sort(lambda x,y: cmp(x.id, y.id))
|
||||
|
||||
componentsList = set()
|
||||
for kanji in kanjis:
|
||||
addComponents(kanji.root, componentsList)
|
||||
print len(componentsList)
|
||||
|
||||
missingComponents = set()
|
||||
for component in componentsList:
|
||||
key = hex(realord(component))[2:]
|
||||
if not handler.kanjis.has_key(key): missingComponents.add(component)
|
||||
print "Missing components:"
|
||||
for component in missingComponents:
|
||||
print component, hex(realord(component))
|
||||
print len(missingComponents), "missing components"
|
||||
142
swap-strokes.py
142
swap-strokes.py
@@ -1,142 +0,0 @@
|
||||
#! /usr/bin/env python3
|
||||
# -*- coding: utf-8 ; mode: python -*-
|
||||
# © Copyright 2013 ospalh@gmail.com
|
||||
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
|
||||
|
||||
import argparse
|
||||
import re
|
||||
|
||||
"""
|
||||
Swap stroke data in KanjiVG files.
|
||||
|
||||
This is a helper script to fix problems where strokes or stroke
|
||||
numbers are out of order. Run as script with --help for more info.
|
||||
|
||||
N.B.:
|
||||
This is rather brittle. It does not use any kind of xml parser, but
|
||||
looks for strings commonly found in the svg files. Use this only as a
|
||||
support tool. Check that the script did what you expected after
|
||||
running it.
|
||||
"""
|
||||
|
||||
__version__ = '0.1.0'
|
||||
|
||||
number_text_pattern = '>{0}</text>'
|
||||
stroke_re = '^\s.*-s{0}" kvg:type=".*" d="(.*)"/>'
|
||||
stroke_text_pattern = '-s{0}" kvg:type="'
|
||||
|
||||
|
||||
def swap_numbers(kanji, a, b):
|
||||
"""Swap stroke numbers in a kanjivg file"""
|
||||
# We do hardly any checking. If something is wrong, just blow up.
|
||||
with open(kanji) as kf:
|
||||
lines = kf.readlines()
|
||||
num_a = -1
|
||||
num_b = -1
|
||||
line_a = ''
|
||||
line_b = ''
|
||||
line_a_pattern = number_text_pattern.format(a)
|
||||
line_b_pattern = number_text_pattern.format(b)
|
||||
for n, l in enumerate(lines):
|
||||
if line_a_pattern in l:
|
||||
num_a = n
|
||||
line_a = l
|
||||
if line_b_pattern in l:
|
||||
num_b = n
|
||||
line_b = l
|
||||
if num_a < 0 or num_b < 0:
|
||||
raise RuntimeError("Did not find both lines")
|
||||
lines[num_a] = line_b.replace(line_b_pattern, line_a_pattern)
|
||||
lines[num_b] = line_a.replace(line_a_pattern, line_b_pattern)
|
||||
with open(kanji, 'w') as kf:
|
||||
for l in lines:
|
||||
kf.write(l)
|
||||
|
||||
|
||||
def swap_stroke_data(kanji, a, b):
|
||||
"""Swap the stroke data in a kanjivg file"""
|
||||
# We do hardly any checking. If something is wrong, just blow up.
|
||||
with open(kanji) as kf:
|
||||
lines = kf.readlines()
|
||||
num_a = -1
|
||||
num_b = -1
|
||||
line_a_match = None
|
||||
line_b_match = None
|
||||
line_a_re = stroke_re.format(a)
|
||||
line_b_re = stroke_re.format(b)
|
||||
for n, l in enumerate(lines):
|
||||
m = re.search(line_a_re, l)
|
||||
if m:
|
||||
num_a = n
|
||||
line_a_match = m
|
||||
m = re.search(line_b_re, l)
|
||||
if m:
|
||||
num_b = n
|
||||
line_b_match = m
|
||||
if num_a < 0 or num_b < 0:
|
||||
raise RuntimeError("Did not find both lines")
|
||||
lines[num_a] = lines[num_a].replace(line_a_match.group(1),
|
||||
line_b_match.group(1))
|
||||
lines[num_b] = lines[num_b].replace(line_b_match.group(1),
|
||||
line_a_match.group(1))
|
||||
with open(kanji, 'w') as kf:
|
||||
for l in lines:
|
||||
kf.write(l)
|
||||
|
||||
|
||||
def swap_strokes(kanji, a, b):
|
||||
"""Swap strokes in a kanjivg file"""
|
||||
# We do hardly any checking. If something is wrong, just blow up.
|
||||
with open(kanji) as kf:
|
||||
lines = kf.readlines()
|
||||
num_a = -1
|
||||
num_b = -1
|
||||
line_a = ''
|
||||
line_b = ''
|
||||
line_a_pattern = stroke_text_pattern.format(a)
|
||||
line_b_pattern = stroke_text_pattern.format(b)
|
||||
for n, l in enumerate(lines):
|
||||
if line_a_pattern in l:
|
||||
num_a = n
|
||||
line_a = l
|
||||
if line_b_pattern in l:
|
||||
num_b = n
|
||||
line_b = l
|
||||
if num_a < 0 or num_b < 0:
|
||||
raise RuntimeError("Did not find both lines")
|
||||
lines[num_a] = line_b.replace(line_b_pattern, line_a_pattern)
|
||||
lines[num_b] = line_a.replace(line_a_pattern, line_b_pattern)
|
||||
with open(kanji, 'w') as kf:
|
||||
for l in lines:
|
||||
kf.write(l)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
parser = argparse.ArgumentParser(
|
||||
description=u"""Swaps data for strokes a and b in the kanjivg svg
|
||||
file "file".
|
||||
Select one of the three options, number, data or stroke.
|
||||
Look at the svg file with a text editor to determine which of the last two
|
||||
options to use. When both stroke numbers and the strokes themselves are
|
||||
out of order, run the script twice.""")
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('-n', '--number', action='store_const',
|
||||
const=swap_numbers, dest='function',
|
||||
help=u"""Swap the stroke numbers. Use this when the
|
||||
numbers seen are out of order.""")
|
||||
group.add_argument('-d', '--data', action='store_const',
|
||||
const=swap_stroke_data, dest='function',
|
||||
help=u"""Swap only the vector data of the strokes.
|
||||
Use this when the stroke types are correct in the original file, but the
|
||||
graphical data doesn't match these types.""")
|
||||
group.add_argument('-s', '--stroke', action='store_const',
|
||||
const=swap_strokes, dest='function',
|
||||
help=u"""Swap the whole strokes, including the stroke
|
||||
type. Use this if the graphical stroke data matches the stroke types in the
|
||||
original file, but the strokes are in the wrong order.""")
|
||||
parser.add_argument('file', type=str, help='Kanji SVG file')
|
||||
parser.add_argument('stroke_a', type=int, help='First stroke to swap')
|
||||
parser.add_argument('stroke_b', type=int,
|
||||
help='Second stroke to swap with the first stroke')
|
||||
args = parser.parse_args()
|
||||
args.function(args.file, args.stroke_a, args.stroke_b)
|
||||
@@ -1,52 +0,0 @@
|
||||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
#
|
||||
# Copyright (C) 2008 Alexandre Courbot
|
||||
#
|
||||
# This program is free software: you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation, either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
|
||||
import xml.sax.handler
|
||||
|
||||
class BasicHandler(xml.sax.handler.ContentHandler):
|
||||
def __init__(self):
|
||||
xml.sax.handler.ContentHandler.__init__(self)
|
||||
self.elementsTree = []
|
||||
|
||||
def currentElement(self):
|
||||
return str(self.elementsTree[-1])
|
||||
|
||||
def startElement(self, qName, atts):
|
||||
self.elementsTree.append(str(qName))
|
||||
attrName = "handle_start_" + str(qName)
|
||||
if hasattr(self, attrName):
|
||||
rfunc = getattr(self, attrName)
|
||||
rfunc(atts)
|
||||
self.characters = ""
|
||||
return True
|
||||
|
||||
def endElement(self, qName):
|
||||
attrName = "handle_data_" + qName
|
||||
if hasattr(self, attrName):
|
||||
rfunc = getattr(self, attrName)
|
||||
rfunc(self.characters)
|
||||
attrName = "handle_end_" + str(qName)
|
||||
if hasattr(self, attrName):
|
||||
rfunc = getattr(self, attrName)
|
||||
rfunc()
|
||||
self.elementsTree.pop()
|
||||
return True
|
||||
|
||||
def characters(self, string):
|
||||
self.characters += string
|
||||
return True
|
||||
Reference in New Issue
Block a user