1
0
mirror of https://github.com/KanjiVG/kanjivg.git synced 2026-04-21 13:00:43 +02:00

Remove old Python scripts

This commit is contained in:
Ben Bullock
2022-03-28 20:41:30 +09:00
parent 5681b9644c
commit 0f6ce3db94
6 changed files with 0 additions and 839 deletions

View File

@@ -1,103 +0,0 @@
#!/usr/bin/python2
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009 Alexandre Courbot
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from xml.etree.ElementTree import XMLID, tostring
import re, codecs, os, string, kanjivg, os.path, sys
def findText(elt):
if elt.text: return elt.text
else:
childs = elt.getchildren()
if len(childs): return findText(childs[0])
else: return None
class Parser:
def __init__(self, content):
self.content = content
def parse(self):
while 1:
match = re.search('\$\$(\w*)', self.content)
if not match: break
fname = 'callback_' + match.group(1)
if hasattr(self, fname):
rfunc = getattr(self, fname)
ret = rfunc()
self.content = self.content[:match.start(0)] + ret + self.content[match.end(0):]
else: self.content = self.content[:match.start(0)] + self.content[match.end(0):]
class TemplateParser(Parser):
def __init__(self, content, kanji, document, groups):
Parser.__init__(self, content)
self.kanji = kanji
self.document = document
self.groups = groups
def callback_kanji(self):
return self.kanji
def callback_strokenumbers(self):
if not self.groups.has_key("StrokeNumbers"):
print "Error - no StrokeNumbers group for kanji %s (%s)" % (self.kanji, hex(kanjivg.realord(self.kanji)))
return ""
numbers = self.groups["StrokeNumbers"]
elts = numbers.findall(".//{http://www.w3.org/2000/svg}text")
strs = []
for elt in elts:
attrs = []
if elt.attrib.has_key("transform"): attrs.append(' transform="%s"' % (elt.attrib["transform"],))
if elt.attrib.has_key("x"): attrs.append(' x="%s"' % (elt.attrib["x"],))
if elt.attrib.has_key("y"): attrs.append(' y="%s"' % (elt.attrib["y"],))
strs.append('<text%s>%s</text>' % (''.join(attrs), findText(elt)))
return "\n\t\t".join(strs)
def callback_strokepaths(self):
if not self.groups.has_key("StrokePaths"):
print "Error - no StrokePaths group for kanji %s (%s)" % (self.kanji, hex(kanjivg.realord(self.kanji)))
return ""
paths = self.groups["StrokePaths"]
elts = paths.findall(".//{http://www.w3.org/2000/svg}path")
strs = []
for elt in elts:
d = elt.attrib["d"]
d = re.sub('(\d) (\d)', '\\1,\\2', d)
d = re.sub("[\n\t ]+", "", d)
strs.append('<path d="%s"/>' % (d,))
return "\n\t\t".join(strs)
if __name__ == "__main__":
# Only process files given as argument...
if len(sys.argv) > 1:
filesToProceed = sys.argv[1:]
# Or do the whole SVG set if no argument is given
else:
filesToProceed = []
for f in os.listdir("SVG"):
if not f.endswith(".svg"): continue
filesToProceed.append(os.path.join("SVG", f))
for f in filesToProceed:
fname = f.split(os.path.sep)[-1]
if fname[4] in "0123456789abcdef":
kanji = kanjivg.realchr(int(fname[:5], 16))
else: kanji = kanjivg.realchr(int(fname[:4], 16))
document, groups = XMLID(open(f).read())
tpp = TemplateParser(open("template.svg").read(), kanji, document, groups)
tpp.parse()
out = codecs.open(f, "w", "utf-8")
out.write(tpp.content)

View File

@@ -1,381 +0,0 @@
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009-2013 Alexandre Courbot
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from xmlhandler import *
# Sample licence header
licenseString = """Copyright (C) 2009-2013 Ulrich Apel.
This work is distributed under the conditions of the Creative Commons
Attribution-Share Alike 3.0 Licence. This means you are free:
* to Share - to copy, distribute and transmit the work
* to Remix - to adapt the work
Under the following conditions:
* Attribution. You must attribute the work by stating your use of KanjiVG in
your own copyright header and linking to KanjiVG's website
(http://kanjivg.tagaini.net)
* Share Alike. If you alter, transform, or build upon this work, you may
distribute the resulting work only under the same or similar license to this
one.
See http://creativecommons.org/licenses/by-sa/3.0/ for more details."""
def isKanji(v):
return (v >= 0x4E00 and v <= 0x9FC3) or (v >= 0x3400 and v <= 0x4DBF) or (v >= 0xF900 and v <= 0xFAD9) or (v >= 0x2E80 and v <= 0x2EFF) or (v >= 0x20000 and v <= 0x2A6DF)
# Returns the unicode of a character in a unicode string, taking surrogate pairs into account
def realord(s, pos = 0):
if s == None: return None
code = ord(s[pos])
if code >= 0xD800 and code < 0xDC00:
if (len(s) <= pos + 1):
print "realord warning: missing surrogate character"
return 0
code2 = ord(s[pos + 1])
if code2 >= 0xDC00 and code < 0xE000:
code = 0x10000 + ((code - 0xD800) << 10) + (code2 - 0xDC00)
return code
def realchr(i):
if i < 0x10000: return unichr(i)
else: return unichr(((i - 0x10000) >> 10) + 0xD800) + unichr(0xDC00 + (i & 0x3ff))
class Kanji:
"""Describes a kanji. The root stroke group is accessible from the strokes member."""
def __init__(self, code, variant):
# Unicode of char being represented (int)
self.code = code
# Variant of the character, if any
self.variant = variant
self.strokes = None
# String identifier used to uniquely identify the kanji
def kId(self):
ret = "%05x" % (self.code,)
if self.variant: ret += "-%s" % (self.variant,)
return ret
def outputStrokesNumbers(self, out, indent = 0):
strokes = self.getStrokes()
cpt = 1
for stroke in strokes:
stroke.numberToSVG(out, cpt, indent + 1)
cpt += 1
def outputStrokes(self, out, indent = 0):
self.strokes.toSVG(out, self.kId(), [0], [1])
def simplify(self):
self.strokes.simplify()
def getStrokes(self):
return self.strokes.getStrokes()
class StrokeGr:
"""Describes a stroke group belonging to a kanji as closely as possible to the XML format. Sub-stroke groups or strokes are available in the childs member. They can either be of class StrokeGr or Stroke so their type should be checked."""
def __init__(self, parent):
self.parent = parent
if parent: parent.childs.append(self)
# Element of strokegr
self.element = None
# A more common, safer element this one derives of
self.original = None
self.part = None
self.number = None
self.variant = False
self.partial = False
self.tradForm = False
self.radicalForm = False
self.position = None
self.radical = None
self.phon = None
self.childs = []
def toSVG(self, out, rootId, groupCpt = [0], strCpt = [1], indent = 0):
gid = rootId
if groupCpt[0] != 0: gid += "-g" + str(groupCpt[0])
groupCpt[0] += 1
idString = ' id="kvg:%s"' % (gid)
eltString = ""
if self.element: eltString = ' kvg:element="%s"' % (self.element)
variantString = ""
if self.variant: variantString = ' kvg:variant="true"'
partialString = ""
if self.partial: partialString = ' kvg:partial="true"'
origString = ""
if self.original: origString = ' kvg:original="%s"' % (self.original)
partString = ""
if self.part: partString = ' kvg:part="%d"' % (self.part)
numberString = ""
if self.number: numberString = ' kvg:number="%d"' % (self.number)
tradFormString = ""
if self.tradForm: tradFormString = ' kvg:tradForm="true"'
radicalFormString = ""
if self.radicalForm: radicalFormString = ' kvg:radicalForm="true"'
posString = ""
if self.position: posString = ' kvg:position="%s"' % (self.position)
radString = ""
if self.radical: radString = ' kvg:radical="%s"' % (self.radical)
phonString = ""
if self.phon: phonString = ' kvg:phon="%s"' % (self.phon)
out.write("\t" * indent + '<g%s%s%s%s%s%s%s%s%s%s%s%s>\n' % (idString, eltString, partString, numberString, variantString, origString, partialString, tradFormString, radicalFormString, posString, radString, phonString))
for child in self.childs:
child.toSVG(out, rootId, groupCpt, strCpt, indent + 1)
out.write("\t" * indent + '</g>\n')
def components(self, simplified = True, recursive = False, level = 0):
ret = []
childsComp = []
for child in self.childs:
if isinstance(child, StrokeGr):
found = False
# Can we find the component in the child?
if simplified and child.original: ret.append(child.original); found = True
elif child.element: ret.append(child.element); found = True
# If not, the components we are looking for are the child's
# components - we also do that if we asked all the sub-components of the group
if not found or recursive:
newLevel = level
if found: newLevel += 1
childsComp += child.components(simplified, recursive, newLevel)
if recursive and not len(ret) == 0: ret = [ level ] + ret + childsComp
return ret
def simplify(self):
for child in self.childs:
if isinstance(child, StrokeGr): child.simplify()
if len(self.childs) == 1 and isinstance(self.childs[0], StrokeGr):
# Check if there is no conflict
if child.element and self.element and child.element != self.element: return
if child.original and self.original and child.original != self.original: return
# Parts cannot be merged
if child.part and self.part and self.part != child.part: return
if child.variant and self.variant and child.variant != self.variant: return
if child.partial and self.partial and child.partial != self.partial: return
if child.tradForm and self.tradForm and child.tradForm != self.tradForm: return
if child.radicalForm and self.radicalForm and child.radicalForm != self.radicalForm: return
# We want to preserve inner identical positions - we may have something at the top
# of another top element, for instance.
if child.position and self.position: return
if child.radical and self.radical and child.radical != self.radical: return
if child.phon and self.phon and child.phon != self.phon: return
# Ok, let's merge!
child = self.childs[0]
self.childs = child.childs
if child.element: self.element = child.element
if child.original: self.original = child.original
if child.part: self.part = child.part
if child.variant: self.variant = child.variant
if child.partial: self.partial = child.partial
if child.tradForm: self.tradForm = child.tradForm
if child.radicalForm: self.radicalForm = child.radicalForm
if child.position: self.position = child.position
if child.radical: self.radical = child.radical
if child.phon: self.phon = child.phon
def getStrokes(self):
ret = []
for child in self.childs:
if isinstance(child, StrokeGr): ret += child.getStrokes()
else: ret.append(child)
return ret
class Stroke:
"""A single stroke, containing its type and (optionally) its SVG data."""
def __init__(self, parent):
self.stype = None
self.svg = None
self.numberPos = None
def numberToSVG(self, out, number, indent = 0):
if self.numberPos:
out.write("\t" * indent + '<text transform="matrix(1 0 0 1 %.2f %.2f)">%d</text>\n' % (self.numberPos[0], self.numberPos[1], number))
def toSVG(self, out, rootId, groupCpt, strCpt, indent = 0):
pid = rootId + "-s" + str(strCpt[0])
strCpt[0] += 1
s = "\t" * indent + '<path id="kvg:%s"' % (pid,)
if self.stype: s += ' kvg:type="%s"' % (self.stype,)
if self.svg: s += ' d="%s"' % (self.svg)
s += '/>\n'
out.write(s)
class KanjisHandler(BasicHandler):
"""XML handler for parsing kanji files. It can handle single-kanji files or aggregation files. After parsing, the kanjis are accessible through the kanjis member, indexed by their svg file name."""
def __init__(self, code, variant):
BasicHandler.__init__(self)
self.kanji = Kanji(code, variant)
self.groups = []
self.compCpt = {}
self.metComponents = set()
def handle_start_kanji(self, attrs):
pass
def handle_end_kanji(self):
if len(self.groups) != 0:
print "WARNING: stroke groups remaining after reading kanji!"
self.groups = []
def handle_start_strokegr(self, attrs):
if len(self.groups) == 0: parent = None
else: parent = self.groups[-1]
group = StrokeGr(parent)
# Now parse group attributes
if attrs.has_key("element"): group.element = unicode(attrs["element"])
if attrs.has_key("variant"): group.variant = str(attrs["variant"])
if attrs.has_key("partial"): group.partial = str(attrs["partial"])
if attrs.has_key("original"): group.original = unicode(attrs["original"])
if attrs.has_key("part"): group.part = int(attrs["part"])
if attrs.has_key("number"): group.number = int(attrs["number"])
if attrs.has_key("tradForm") and str(attrs["tradForm"]) == "true": group.tradForm = True
if attrs.has_key("radicalForm") and str(attrs["radicalForm"]) == "true": group.radicalForm = True
if attrs.has_key("position"): group.position = unicode(attrs["position"])
if attrs.has_key("radical"): group.radical = unicode(attrs["radical"])
if attrs.has_key("phon"): group.phon = unicode(attrs["phon"])
self.groups.append(group)
if group.element: self.metComponents.add(group.element)
if group.original: self.metComponents.add(group.original)
if group.number:
if not group.part: print "%s: Number specified, but part missing" % (self.kanji.kId())
# The group must exist already
if group.part > 1:
if not self.compCpt.has_key(group.element + str(group.number)):
print "%s: Missing numbered group" % (self.kanji.kId())
elif self.compCpt[group.element + str(group.number)] != group.part - 1:
print "%s: Incorrectly numbered group" % (self.kanji.kId())
# The group must not exist
else:
if self.compCpt.has_key(group.element + str(group.number)):
print "%s: Duplicate numbered group" % (self.kanji.kId())
self.compCpt[group.element + str(group.number)] = group.part
# No number, just a part - groups restart with part 1, otherwise must
# increase correctly
elif group.part:
# The group must exist already
if group.part > 1:
if not self.compCpt.has_key(group.element):
print "%s: Incorrectly started multi-part group" % (self.kanji.kId())
elif self.compCpt[group.element] != group.part - 1:
print "%s: Incorrectly splitted multi-part group" % (self.kanji.kId())
self.compCpt[group.element] = group.part
def handle_end_strokegr(self):
group = self.groups.pop()
if len(self.groups) == 0:
if self.kanji.strokes:
print "WARNING: overwriting root of kanji!"
self.kanji.strokes = group
def handle_start_stroke(self, attrs):
if len(self.groups) == 0: parent = None
else: parent = self.groups[-1]
stroke = Stroke(parent)
stroke.stype = unicode(attrs["type"])
if attrs.has_key("path"): stroke.svg = unicode(attrs["path"])
self.groups[-1].childs.append(stroke)
class SVGHandler(BasicHandler):
"""SVG handler for parsing final kanji files. It can handle single-kanji files or aggregation files. After parsing, the kanji are accessible through the kanjis member, indexed by their svg file name."""
def __init__(self):
BasicHandler.__init__(self)
self.kanjis = {}
self.currentKanji = None
self.groups = []
self.metComponents = set()
def handle_start_g(self, attrs):
# Special case for handling the root
if len(self.groups) == 0:
id = hex(realord(attrs["kvg:element"]))[2:]
self.currentKanji = Kanji(id)
self.kanjis[id] = self.currentKanji
self.compCpt = {}
parent = None
else: parent = self.groups[-1]
group = StrokeGr(parent)
# Now parse group attributes
if attrs.has_key("kvg:element"): group.element = unicode(attrs["kvg:element"])
if attrs.has_key("kvg:variant"): group.variant = str(attrs["kvg:variant"])
if attrs.has_key("kvg:partial"): group.partial = str(attrs["kvg:partial"])
if attrs.has_key("kvg:original"): group.original = unicode(attrs["kvg:original"])
if attrs.has_key("kvg:part"): group.part = int(attrs["kvg:part"])
if attrs.has_key("kvg:number"): group.number = int(attrs["kvg:number"])
if attrs.has_key("kvg:tradForm") and str(attrs["kvg:tradForm"]) == "true": group.tradForm = True
if attrs.has_key("kvg:radicalForm") and str(attrs["kvg:radicalForm"]) == "true": group.radicalForm = True
if attrs.has_key("kvg:position"): group.position = unicode(attrs["kvg:position"])
if attrs.has_key("kvg:radical"): group.radical = unicode(attrs["kvg:radical"])
if attrs.has_key("kvg:phon"): group.phon = unicode(attrs["kvg:phon"])
self.groups.append(group)
if group.element: self.metComponents.add(group.element)
if group.original: self.metComponents.add(group.original)
if group.number:
if not group.part: print "%s: Number specified, but part missing" % (self.currentKanji.kId())
# The group must exist already
if group.part > 1:
if not self.compCpt.has_key(group.element + str(group.number)):
print "%s: Missing numbered group" % (self.currentKanji.kId())
elif self.compCpt[group.element + str(group.number)] != group.part - 1:
print "%s: Incorrectly numbered group" % (self.currentKanji.kId())
# The group must not exist
else:
if self.compCpt.has_key(group.element + str(group.number)):
print "%s: Duplicate numbered group" % (self.currentKanji.kId())
self.compCpt[group.element + str(group.number)] = group.part
# No number, just a part - groups restart with part 1, otherwise must
# increase correctly
elif group.part:
# The group must exist already
if group.part > 1:
if not self.compCpt.has_key(group.element):
print "%s: Incorrectly started multi-part group" % (self.currentKanji.kId())
elif self.compCpt[group.element] != group.part - 1:
print "%s: Incorrectly splitted multi-part group" % (self.currentKanji.kId())
self.compCpt[group.element] = group.part
def handle_end_g(self):
group = self.groups.pop()
# End of kanji?
if len(self.groups) == 0:
self.currentKanji.strokes = group
self.currentKanji = None
self.groups = []
def handle_start_path(self, attrs):
if len(self.groups) == 0: parent = None
else: parent = self.groups[-1]
stroke = Stroke(parent)
stroke.stype = unicode(attrs["kvg:type"])
if attrs.has_key("d"): stroke.svg = unicode(attrs["d"])
self.groups[-1].childs.append(stroke)

111
kvg.py
View File

@@ -1,111 +0,0 @@
#!/usr/bin/python2
# -*- coding: utf-8 -*-
#
# Copyright (C) 2011-2013 Alexandre Courbot
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os, os.path, sys, codecs, re, datetime
from kanjivg import licenseString
pathre = re.compile(r'<path .*d="([^"]*)".*/>')
helpString = """Usage: %s <command> [ kanji files ]
Recognized commands:
split file1 [ file2 ... ] extract path data into a -paths suffixed file
merge file1 [ file2 ... ] merge path data from -paths suffixed file
release create single release file""" % (sys.argv[0],)
def createPathsSVG(f):
s = codecs.open(f, "r", "utf-8").read()
paths = pathre.findall(s)
out = codecs.open(f[:-4] + "-paths.svg", "w", "utf-8")
out.write("""<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" []>
<svg xmlns="http://www.w3.org/2000/svg" width="109" height="109" viewBox="0 0 109 109" style="fill:none;stroke:#000000;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;">\n""")
i = 1
for path in paths:
out.write('<!--%2d--><path d="%s"/>\n' % (i, path))
i += 1
out.write("</svg>")
def mergePathsSVG(f):
pFile = f[:-4] + "-paths.svg"
if not os.path.exists(pFile):
print "%s does not exist!" % (pFile,)
return
s = codecs.open(pFile, "r", "utf-8").read()
paths = pathre.findall(s)
s = codecs.open(f, "r", "utf-8").read()
pos = 0
while True:
match = pathre.search(s[pos:])
if match and len(paths) == 0 or not match and len(paths) > 0:
print "Paths count mismatch for %s" % (f,)
return
if not match and len(paths) == 0: break
s = s[:pos + match.start(1)] + paths[0] + s[pos + match.end(1):]
pos += match.start(1) + len(paths[0])
del paths[0]
codecs.open(f, "w", "utf-8").write(s)
def release():
datadir = "kanji"
idMatchString = "<g id=\"kvg:StrokePaths_"
allfiles = os.listdir(datadir)
files = []
for f in allfiles:
if len(f) == 9: files.append(f)
del allfiles
files.sort()
out = open("kanjivg.xml", "w")
out.write('<?xml version="1.0" encoding="UTF-8"?>\n')
out.write("<!--\n")
out.write(licenseString)
out.write("\nThis file has been generated on %s, using the latest KanjiVG data\nto this date." % (datetime.date.today()))
out.write("\n-->\n")
out.write("<kanjivg xmlns:kvg='http://kanjivg.tagaini.net'>\n")
for f in files:
data = open(os.path.join(datadir, f)).read()
data = data[data.find("<svg "):]
data = data[data.find(idMatchString) + len(idMatchString):]
kidend = data.find("\"")
data = "<kanji id=\"kvg:kanji_%s\">" % (data[:kidend],) + data[data.find("\n"):data.find('<g id="kvg:StrokeNumbers_') - 5] + "</kanji>\n"
out.write(data)
out.write("</kanjivg>\n")
out.close()
print("%d kanji emitted" % len(files))
actions = {
"split": (createPathsSVG, 2),
"merge": (mergePathsSVG, 2),
"release": (release, 1)
}
if __name__ == "__main__":
if len(sys.argv) < 2 or sys.argv[1] not in actions.keys() or \
len(sys.argv) <= actions[sys.argv[1]][1]:
print helpString
sys.exit(0)
action = actions[sys.argv[1]][0]
files = sys.argv[2:]
if len(files) == 0: action()
else:
for f in files:
if not os.path.exists(f):
print "%s does not exist!" % (f,)
continue
action(f)

View File

@@ -1,50 +0,0 @@
#!/usr/bin/python2
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009 Alexandre Courbot
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os, codecs, xml.sax
from kanjivg import *
def addComponents(strokegr, compSet):
if strokegr.element: compSet.add(strokegr.element)
if strokegr.original: compSet.add(strokegr.original)
for child in strokegr.childs:
if isinstance(child, StrokeGr):
addComponents(child, compSet)
if __name__ == "__main__":
# Read all kanjis
handler = KanjisHandler()
xml.sax.parse("kanjivg.xml", handler)
kanjis = handler.kanjis.values()
kanjis.sort(lambda x,y: cmp(x.id, y.id))
componentsList = set()
for kanji in kanjis:
addComponents(kanji.root, componentsList)
print len(componentsList)
missingComponents = set()
for component in componentsList:
key = hex(realord(component))[2:]
if not handler.kanjis.has_key(key): missingComponents.add(component)
print "Missing components:"
for component in missingComponents:
print component, hex(realord(component))
print len(missingComponents), "missing components"

View File

@@ -1,142 +0,0 @@
#! /usr/bin/env python3
# -*- coding: utf-8 ; mode: python -*-
# © Copyright 2013 ospalh@gmail.com
# License: GNU AGPL, version 3 or later; http://www.gnu.org/licenses/agpl.html
import argparse
import re
"""
Swap stroke data in KanjiVG files.
This is a helper script to fix problems where strokes or stroke
numbers are out of order. Run as script with --help for more info.
N.B.:
This is rather brittle. It does not use any kind of xml parser, but
looks for strings commonly found in the svg files. Use this only as a
support tool. Check that the script did what you expected after
running it.
"""
__version__ = '0.1.0'
number_text_pattern = '>{0}</text>'
stroke_re = '^\s.*-s{0}" kvg:type=".*" d="(.*)"/>'
stroke_text_pattern = '-s{0}" kvg:type="'
def swap_numbers(kanji, a, b):
"""Swap stroke numbers in a kanjivg file"""
# We do hardly any checking. If something is wrong, just blow up.
with open(kanji) as kf:
lines = kf.readlines()
num_a = -1
num_b = -1
line_a = ''
line_b = ''
line_a_pattern = number_text_pattern.format(a)
line_b_pattern = number_text_pattern.format(b)
for n, l in enumerate(lines):
if line_a_pattern in l:
num_a = n
line_a = l
if line_b_pattern in l:
num_b = n
line_b = l
if num_a < 0 or num_b < 0:
raise RuntimeError("Did not find both lines")
lines[num_a] = line_b.replace(line_b_pattern, line_a_pattern)
lines[num_b] = line_a.replace(line_a_pattern, line_b_pattern)
with open(kanji, 'w') as kf:
for l in lines:
kf.write(l)
def swap_stroke_data(kanji, a, b):
"""Swap the stroke data in a kanjivg file"""
# We do hardly any checking. If something is wrong, just blow up.
with open(kanji) as kf:
lines = kf.readlines()
num_a = -1
num_b = -1
line_a_match = None
line_b_match = None
line_a_re = stroke_re.format(a)
line_b_re = stroke_re.format(b)
for n, l in enumerate(lines):
m = re.search(line_a_re, l)
if m:
num_a = n
line_a_match = m
m = re.search(line_b_re, l)
if m:
num_b = n
line_b_match = m
if num_a < 0 or num_b < 0:
raise RuntimeError("Did not find both lines")
lines[num_a] = lines[num_a].replace(line_a_match.group(1),
line_b_match.group(1))
lines[num_b] = lines[num_b].replace(line_b_match.group(1),
line_a_match.group(1))
with open(kanji, 'w') as kf:
for l in lines:
kf.write(l)
def swap_strokes(kanji, a, b):
"""Swap strokes in a kanjivg file"""
# We do hardly any checking. If something is wrong, just blow up.
with open(kanji) as kf:
lines = kf.readlines()
num_a = -1
num_b = -1
line_a = ''
line_b = ''
line_a_pattern = stroke_text_pattern.format(a)
line_b_pattern = stroke_text_pattern.format(b)
for n, l in enumerate(lines):
if line_a_pattern in l:
num_a = n
line_a = l
if line_b_pattern in l:
num_b = n
line_b = l
if num_a < 0 or num_b < 0:
raise RuntimeError("Did not find both lines")
lines[num_a] = line_b.replace(line_b_pattern, line_a_pattern)
lines[num_b] = line_a.replace(line_a_pattern, line_b_pattern)
with open(kanji, 'w') as kf:
for l in lines:
kf.write(l)
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=u"""Swaps data for strokes a and b in the kanjivg svg
file "file".
Select one of the three options, number, data or stroke.
Look at the svg file with a text editor to determine which of the last two
options to use. When both stroke numbers and the strokes themselves are
out of order, run the script twice.""")
group = parser.add_mutually_exclusive_group(required=True)
group.add_argument('-n', '--number', action='store_const',
const=swap_numbers, dest='function',
help=u"""Swap the stroke numbers. Use this when the
numbers seen are out of order.""")
group.add_argument('-d', '--data', action='store_const',
const=swap_stroke_data, dest='function',
help=u"""Swap only the vector data of the strokes.
Use this when the stroke types are correct in the original file, but the
graphical data doesn't match these types.""")
group.add_argument('-s', '--stroke', action='store_const',
const=swap_strokes, dest='function',
help=u"""Swap the whole strokes, including the stroke
type. Use this if the graphical stroke data matches the stroke types in the
original file, but the strokes are in the wrong order.""")
parser.add_argument('file', type=str, help='Kanji SVG file')
parser.add_argument('stroke_a', type=int, help='First stroke to swap')
parser.add_argument('stroke_b', type=int,
help='Second stroke to swap with the first stroke')
args = parser.parse_args()
args.function(args.file, args.stroke_a, args.stroke_b)

View File

@@ -1,52 +0,0 @@
#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# Copyright (C) 2008 Alexandre Courbot
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import xml.sax.handler
class BasicHandler(xml.sax.handler.ContentHandler):
def __init__(self):
xml.sax.handler.ContentHandler.__init__(self)
self.elementsTree = []
def currentElement(self):
return str(self.elementsTree[-1])
def startElement(self, qName, atts):
self.elementsTree.append(str(qName))
attrName = "handle_start_" + str(qName)
if hasattr(self, attrName):
rfunc = getattr(self, attrName)
rfunc(atts)
self.characters = ""
return True
def endElement(self, qName):
attrName = "handle_data_" + qName
if hasattr(self, attrName):
rfunc = getattr(self, attrName)
rfunc(self.characters)
attrName = "handle_end_" + str(qName)
if hasattr(self, attrName):
rfunc = getattr(self, attrName)
rfunc()
self.elementsTree.pop()
return True
def characters(self, string):
self.characters += string
return True