1
0
mirror of https://github.com/KanjiVG/kanjivg.git synced 2026-01-30 01:53:16 +01:00
Files
kanjivg/mergexml.py
2010-12-31 12:54:19 +09:00

177 lines
6.2 KiB
Python
Executable File

#!/usr/bin/python2
# -*- coding: utf-8 -*-
#
# Copyright (C) 2009 Alexandre Courbot
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import os, codecs, xml.sax, re, datetime
from kanjivg import *
def createSVG(out, kanji):
out.write('<?xml version="1.0" encoding="UTF-8"?>\n')
out.write("<!-- ")
out.write(licenseString)
out.write("\nThis file has been generated on %s, using the latest KanjiVG data to this date." % (datetime.date.today()))
out.write("\n-->\n\n")
out.write("""<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.0//EN" "http://www.w3.org/TR/2001/REC-SVG-20010904/DTD/svg10.dtd" [
<!ATTLIST g
xmlns:kanjivg CDATA #FIXED "http://kanjivg.tagaini.net"
kanjivg:element CDATA #IMPLIED
kanjivg:variant CDATA #IMPLIED
kanjivg:partial CDATA #IMPLIED
kanjivg:original CDATA #IMPLIED
kanjivg:part CDATA #IMPLIED
kanjivg:number CDATA #IMPLIED
kanjivg:tradForm CDATA #IMPLIED
kanjivg:radicalForm CDATA #IMPLIED
kanjivg:position CDATA #IMPLIED
kanjivg:radical CDATA #IMPLIED
kanjivg:phon CDATA #IMPLIED >
<!ATTLIST path
xmlns:kanjivg CDATA #FIXED "http://kanjivg.tagaini.net"
kanjivg:type CDATA #IMPLIED >
]>
<svg xmlns="http://www.w3.org/2000/svg" width="109" height="109" viewBox="0 0 109 109" style="fill:none;stroke:#000000;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;">
""")
#<defs>
#<marker id="Triangle"
#viewBox="0 0 10 10" refX="0" refY="5"
#markerUnits="strokeWidth"
#markerWidth="4" markerHeight="3"
#orient="auto" stroke="none" fill="#ff0000">
#<path d="M 0 0 L 10 5 L 0 10 z" />
#</marker>
#</defs>
kanji.toSVG(out)
out.write("</svg>\n")
class KanjiStrokeHandler(BasicHandler):
def __init__(self):
BasicHandler.__init__(self)
self.strokes = []
self.active = False
def handle_start_path(self, attrs):
strokeData = attrs["d"]
# Replace spaces between digits by the comma separator
strokeData = re.sub('(\d) (\d)', '\\1,\\2', strokeData)
strokeData = re.sub("[\n\t ]+", "", strokeData)
self.strokes.append(strokeData)
def handle_start_g(self, attrs):
if attrs.has_key("id") and attrs["id"] == "Vektorbild": self.active = True
if __name__ == "__main__":
files = os.listdir("XML")
kanjis = []
mismatch = []
handled = set()
metComponents = set()
for f in files:
# Let's keep the variations out of the process for now...
if '-' in f: continue
if not f.endswith(".xml"): continue
descHandler = KanjisHandler()
xml.sax.parse(os.path.join("XML", f), descHandler)
handled.add(realchr(int(f[:-4], 16)))
parser = xml.sax.make_parser()
svgHandler = KanjiStrokeHandler()
parser.setContentHandler(svgHandler)
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.setFeature(xml.sax.handler.feature_external_pes, False)
svgFile = os.path.join("SVG", f[:-3] + "svg")
if os.path.exists(svgFile):
parser.parse(svgFile)
metComponents = metComponents.union(descHandler.metComponents)
kanji = descHandler.kanjis.values()[0]
desc = kanji.getStrokes()
svg = svgHandler.strokes
if len(desc) != len(svg): mismatch.append((descHandler.kanjis.values()[0].root.element, len(desc), len(svg)))
for i in range(min(len(desc), len(svg))):
desc[i].svg = svg[i]
# Add dummy strokes for SVG orphans
for i in range(len(desc), len(svg)):
s = Stroke(kanji.root)
s.stype = "Missing stroke"
s.svg = svg[i]
kanji.root.childs.append(s)
kanjis.append(kanji)
# Now parse orphan SVGs (probably just kana and romaji)
files = os.listdir("SVG")
for f in files:
# Let's keep the variations out of the process for now...
if '-' in f: continue
if not f.endswith(".svg"): continue
if realchr(int(f[:-4], 16)) in handled: continue
parser = xml.sax.make_parser()
svgHandler = KanjiStrokeHandler()
parser.setContentHandler(svgHandler)
parser.setFeature(xml.sax.handler.feature_external_ges, False)
parser.setFeature(xml.sax.handler.feature_external_pes, False)
parser.parse(os.path.join("SVG", f))
kanji = Kanji(f[:-4])
kanji.midashi = unichr(int(f[:-4], 16))
kanji.root = StrokeGr(None)
for s in svgHandler.strokes:
stroke = Stroke(kanji.root)
stroke.svg = s
kanji.root.childs.append(stroke)
kanjis.append(kanji)
# Stroke count mismatch kanji
mismatch.sort()
misout = codecs.open("Main.StrokeCountMismatch", "w", "utf-8")
misout.write('version=pmwiki-2.1.0 urlencoded=1\ntext=')
misout.write("'''This page is generated - please do not edit it!'''%0a%0aThe following kanji have a stroke order mismatch between their XML and SVG descriptions:%0a")
for i in range(len(mismatch)):
misout.write("* %s: XML %d, SVG %d" % (mismatch[i][0], mismatch[i][1], mismatch[i][2]))
misout.write("%0a")
# Missing components
misout = codecs.open("Main.MissingKanji", "w", "utf-8")
misout.write('version=pmwiki-2.1.0 urlencoded=1\ntext=')
misout.write("'''This page is generated - please do not edit it!'''%0a%0aThe following kanji are referenced as components but no data is available for them:%0a")
for k in metComponents.difference(handled):
misout.write("* %s" % (k,))
misout.write("%0a")
# Finally write the output files
os.mkdir("data")
for kanji in kanjis:
out = codecs.open("data/" + str(kanji.id) + ".svg", "w", "utf-8")
createSVG(out, kanji)
# Finally write the output file
#curDate = str(datetime.date.today())
#kanjis.sort(lambda x,y: cmp(x.id, y.id))
#out = codecs.open("kanjivg-%s.xml" % (curDate.replace("-", ""),), "w", "utf-8")
#out.write('<?xml version="1.0" encoding="UTF-8"?>\n')
#out.write("<!-- ")
#out.write(licenseString)
#out.write("\nThis file has been generated on %s, using the latest KanjiVG data to this date." % (curDate))
#out.write("\n-->\n\n")
#out.write("<kanjis>\n");
#for kanji in kanjis:
#kanji.toXML(out)
#out.write("</kanjis>\n");