1
0
mirror of https://github.com/KanjiVG/kanjivg.git synced 2026-04-21 13:00:43 +02:00

Added script for 豕 errors; found one error in 6726 reversed l/r sides

Created perl module for generic searches for a particular pattern.
This commit is contained in:
Ben Bullock
2011-07-13 17:54:24 +09:00
parent 47e262f741
commit 3e29c70808
5 changed files with 207 additions and 66 deletions

53
KanjiVG.pm Normal file
View File

@@ -0,0 +1,53 @@
package KanjiVG;
use parent Exporter;
our @EXPORT_OK = qw/handle_element/;
use warnings;
use strict;
use Carp;
my $dir = "$FindBin::Bin/kanjivg";
sub find_element
{
my ($element) = @_;
if (! defined $element) {
croak "No element";
}
my $string = qr/kanjivg:element="$element"/;
my @files = <$dir/*.svg>;
my @matches;
for my $file (@files) {
open my $in, "<:encoding(utf8)", $file
or die $!;
while (<$in>) {
if (/$string/) {
push @matches, $file;
# print "$file matches.\n";
}
}
close $in or die $!;
}
return @matches;
}
sub handle_element
{
my ($element, $handle_start, $data) = @_;
my @matches = find_element ($element);
if (ref $data ne 'HASH') {
croak "Give me a hash ref";
}
my $parser = XML::Parser->new (
Handlers => {
Start => sub { &{$handle_start} ($element, $data, @_)},
},
);
for my $file (@matches) {
# print "Parsing '$file'.\n";
$data->{file} = $file;
$parser->parsefile ($file);
}
}
1;

View File

@@ -46,6 +46,10 @@ See http://creativecommons.org/licenses/by-sa/3.0/ for more details. -->
<text transform="matrix(1 0 0 1 77.5 75.9414)">17</text>
</g>
<g id="StrokePaths" style="fill:none;stroke:#404040;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;">
<path d="M18.14,22.68c0.47,0.95,0.79,2.38,0.95,3.3c0.16,0.92,0.14,24.9-0.37,32.63c-0.72,11-2.47,25.08-7.97,35.72"/>
<path d="M20.03,24.25c1.89-0.06,10.94-2.34,12.35-2.46c2.52-0.23,3.15,1.31,3.15,2.67c0,3.23-0.04,44.72-0.04,63.05c0,12.61-5.32,3.55-6.93,2.04"/>
<path d="M19.6,42.65c4.15-0.35,11.74-1.54,15.38-1.57"/>
<path d="M18.68,59.09c3.42-0.08,11.35-0.78,15.99-1.2"/>
<path d="M44.08,21.71c0.99,0.53,2.14,0.63,3.14,0.53c9.11-0.96,30.9-3.46,41.4-3.53c1.65-0.01,2.64,0.25,3.47,0.51"/>
<path d="M54.81,12.94c1.15,1.05,1.52,1.37,1.64,2.01c1.15,6.04,1.48,11.54,1.81,13.74"/>
<path d="M75.55,11.69c0.64,0.75,1.07,1.89,0.86,2.99c-0.64,3.36-1.84,8.97-2.77,13.26"/>
@@ -59,9 +63,5 @@ See http://creativecommons.org/licenses/by-sa/3.0/ for more details. -->
<path d="M65.59,74.82c0.06,0.46,0.14,1.18-0.13,1.83c-1.57,3.87-11.38,11.32-23.68,16.52"/>
<path d="M83.04,58.4c0.03,0.25,0.07,0.66-0.07,1.03c-0.84,2.17-5.68,6.92-12.29,9.83"/>
<path d="M70.38,70.93c3.38,2.78,16.6,15.5,21.85,18.66c1.3,0.79,2.31,1.32,3.55,1.59"/>
<path d="M18.14,22.68c0.47,0.95,0.79,2.38,0.95,3.3c0.16,0.92,0.14,24.9-0.37,32.63c-0.72,11-2.47,25.08-7.97,35.72"/>
<path d="M20.03,24.25c1.89-0.06,10.94-2.34,12.35-2.46c2.52-0.23,3.15,1.31,3.15,2.67c0,3.23-0.04,44.72-0.04,63.05c0,12.61-5.32,3.55-6.93,2.04"/>
<path d="M19.6,42.65c4.15-0.35,11.74-1.54,15.38-1.57"/>
<path d="M18.68,59.09c3.42-0.08,11.35-0.78,15.99-1.2"/>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 4.2 KiB

After

Width:  |  Height:  |  Size: 4.2 KiB

75
find-ie.pl Executable file
View File

@@ -0,0 +1,75 @@
#!/home/ben/software/install/bin/perl
use warnings;
use strict;
use FindBin;
use XML::Parser;
use Image::SVG::Path 'extract_path_info';
use utf8;
use KanjiVG qw/handle_element/;
binmode STDOUT, "utf8";
my %data;
my $element = '豕';
my $start;
my $count;
handle_element ($element, \& handle_start, \%data);
sub handle_ie
{
my ($data, $count, $attr) = @_;
my $d = $attr->{d};
if ($count == 1 || $count == 2) {
my @values = extract_path_info ($d, {
no_shortcuts => 1,
absolute => 1,
});
my @start = @{$values[0]->{point}};
my @end = @{$values[-1]->{end}};
my $x_diff = $end[0] - $start[0];
my $y_diff = $end[1] - $start[1];
$data->{"line$count"} = [$x_diff, $y_diff];
my $f = $data->{file};
$f =~ s!.*/!!;
if ($count == 1 && ($x_diff < 10 || $y_diff > 0)) {
# print "$f: $count: $x_diff $y_diff\n";
}
elsif ($count == 2) {
print "$f: $count: $x_diff $y_diff\n";
}
}
if ($count == 2) {
# print $data->{line1}->[0]->[0];
}
}
sub handle_start
{
my ($kanjivg_element, $data, $parser, $xml_element, %attr) = @_;
if ($xml_element eq 'g') {
my $kvg = $attr{'kanjivg:element'};
if ($kvg) {
if ($kvg eq $kanjivg_element) {
my $kp = $attr{"kanjivg:part"};
if (defined $kp) {
# print "$kp\n";
if ($kp == 2) {
return;
}
}
# print "Found '$kvg' in '$data->{file}'\n";
$start = 1;
$count = 0;
}
}
else {
$start = undef;
$count = 0;
}
}
elsif ($start && $xml_element eq 'path') {
$count++;
handle_ie ($data, $count, \%attr);
}
}

View File

@@ -5,72 +5,26 @@ use FindBin;
use XML::Parser;
use Image::SVG::Path 'extract_path_info';
use utf8;
my $string = qr/kanjivg:element="氵"/;
my @files = <$FindBin::Bin/kanjivg/*.svg>;
my @matches;
for my $file (@files) {
open my $in, "<:encoding(utf8)", $file
or die $!;
while (<$in>) {
if (/$string/) {
push @matches, $file;
# print "$file matches.\n";
}
}
close $in or die $!;
}
my $start;
my $count;
my %global;
my $parser = XML::Parser->new (
Handlers => {
Start => \& handle_start
},
);
use KanjiVG qw/find_element/;
binmode STDOUT, "utf8";
my %data;
my $element = '氵';
for my $file (@matches) {
# print "Parsing '$file'.\n";
$global{file} = $file;
$parser->parsefile ($file);
}
sub handle_start
{
my ($parser, $element, %attr) = @_;
if ($element eq 'g') {
my $kvg = $attr{'kanjivg:element'};
if ($kvg) {
if ($kvg eq '氵') {
#print "Found '$kvg' in '$global{file}'\n";
$start = 1;
$count = 0;
}
}
else {
$start = undef;
$count = 0;
}
}
if ($start && $element eq 'path') {
$count++;
if ($count == 3) {
my $d = $attr{d};
my @values = extract_path_info ($d, {
no_shortcuts => 1,
absolute => 1,
});
my @start = @{$values[0]->{point}};
my @end = @{$values[-1]->{end}};
my $x_diff = $end[0] - $start[0];
my $y_diff = $end[1] - $start[1];
if ($x_diff < 0 || $y_diff > 0) {
printf ("file $global{file}: %d %d\n", $x_diff, $y_diff);
}
}
my ($data, $count, $d) = @_;
if ($count == 3) {
my @values = extract_path_info ($d, {
no_shortcuts => 1,
absolute => 1,
});
my @start = @{$values[0]->{point}};
my @end = @{$values[-1]->{end}};
my $x_diff = $end[0] - $start[0];
my $y_diff = $end[1] - $start[1];
# if ($x_diff < 0 || $y_diff > 0) {
printf ("file $global{file}: %d %d\n", $x_diff, $y_diff);
# }
}
}

59
find-sanzui.pl Executable file
View File

@@ -0,0 +1,59 @@
#!/home/ben/software/install/bin/perl
use warnings;
use strict;
use FindBin;
use XML::Parser;
use Image::SVG::Path 'extract_path_info';
use utf8;
use KanjiVG qw/handle_element/;
binmode STDOUT, "utf8";
my %data;
my $element = '氵';
my $start;
my $count;
handle_element ($element, \& handle_start, \%data);
sub handle_sanzui
{
my ($data, $count, $attr) = @_;
if ($count == 3) {
my $d = $attr->{d};
my @values = extract_path_info ($d, {
no_shortcuts => 1,
absolute => 1,
});
my @start = @{$values[0]->{point}};
my @end = @{$values[-1]->{end}};
my $x_diff = $end[0] - $start[0];
my $y_diff = $end[1] - $start[1];
# if ($x_diff < 0 || $y_diff > 0) {
printf ("file $data->{file}: %d %d\n", $x_diff, $y_diff);
# }
}
}
sub handle_start
{
my ($kanjivg_element, $data, $parser, $xml_element, %attr) = @_;
if ($xml_element eq 'g') {
my $kvg = $attr{'kanjivg:element'};
if ($kvg) {
if ($kvg eq $kanjivg_element) {
# print "Found '$kvg' in '$data->{file}'\n";
$start = 1;
$count = 0;
}
}
else {
$start = undef;
$count = 0;
}
}
elsif ($start && $xml_element eq 'path') {
$count++;
handle_sanzui ($data, $count, \%attr);
}
}