mirror of
https://github.com/KanjiVG/kanjivg.git
synced 2026-04-21 13:00:43 +02:00
Added script for 豕 errors; found one error in 6726 reversed l/r sides
Created perl module for generic searches for a particular pattern.
This commit is contained in:
53
KanjiVG.pm
Normal file
53
KanjiVG.pm
Normal file
@@ -0,0 +1,53 @@
|
||||
package KanjiVG;
|
||||
use parent Exporter;
|
||||
our @EXPORT_OK = qw/handle_element/;
|
||||
use warnings;
|
||||
use strict;
|
||||
use Carp;
|
||||
|
||||
my $dir = "$FindBin::Bin/kanjivg";
|
||||
|
||||
sub find_element
|
||||
{
|
||||
my ($element) = @_;
|
||||
if (! defined $element) {
|
||||
croak "No element";
|
||||
}
|
||||
my $string = qr/kanjivg:element="$element"/;
|
||||
my @files = <$dir/*.svg>;
|
||||
my @matches;
|
||||
for my $file (@files) {
|
||||
open my $in, "<:encoding(utf8)", $file
|
||||
or die $!;
|
||||
while (<$in>) {
|
||||
if (/$string/) {
|
||||
push @matches, $file;
|
||||
# print "$file matches.\n";
|
||||
}
|
||||
}
|
||||
close $in or die $!;
|
||||
}
|
||||
return @matches;
|
||||
}
|
||||
|
||||
sub handle_element
|
||||
{
|
||||
my ($element, $handle_start, $data) = @_;
|
||||
my @matches = find_element ($element);
|
||||
if (ref $data ne 'HASH') {
|
||||
croak "Give me a hash ref";
|
||||
}
|
||||
my $parser = XML::Parser->new (
|
||||
Handlers => {
|
||||
Start => sub { &{$handle_start} ($element, $data, @_)},
|
||||
},
|
||||
);
|
||||
for my $file (@matches) {
|
||||
# print "Parsing '$file'.\n";
|
||||
$data->{file} = $file;
|
||||
$parser->parsefile ($file);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
1;
|
||||
@@ -46,6 +46,10 @@ See http://creativecommons.org/licenses/by-sa/3.0/ for more details. -->
|
||||
<text transform="matrix(1 0 0 1 77.5 75.9414)">17</text>
|
||||
</g>
|
||||
<g id="StrokePaths" style="fill:none;stroke:#404040;stroke-width:3;stroke-linecap:round;stroke-linejoin:round;">
|
||||
<path d="M18.14,22.68c0.47,0.95,0.79,2.38,0.95,3.3c0.16,0.92,0.14,24.9-0.37,32.63c-0.72,11-2.47,25.08-7.97,35.72"/>
|
||||
<path d="M20.03,24.25c1.89-0.06,10.94-2.34,12.35-2.46c2.52-0.23,3.15,1.31,3.15,2.67c0,3.23-0.04,44.72-0.04,63.05c0,12.61-5.32,3.55-6.93,2.04"/>
|
||||
<path d="M19.6,42.65c4.15-0.35,11.74-1.54,15.38-1.57"/>
|
||||
<path d="M18.68,59.09c3.42-0.08,11.35-0.78,15.99-1.2"/>
|
||||
<path d="M44.08,21.71c0.99,0.53,2.14,0.63,3.14,0.53c9.11-0.96,30.9-3.46,41.4-3.53c1.65-0.01,2.64,0.25,3.47,0.51"/>
|
||||
<path d="M54.81,12.94c1.15,1.05,1.52,1.37,1.64,2.01c1.15,6.04,1.48,11.54,1.81,13.74"/>
|
||||
<path d="M75.55,11.69c0.64,0.75,1.07,1.89,0.86,2.99c-0.64,3.36-1.84,8.97-2.77,13.26"/>
|
||||
@@ -59,9 +63,5 @@ See http://creativecommons.org/licenses/by-sa/3.0/ for more details. -->
|
||||
<path d="M65.59,74.82c0.06,0.46,0.14,1.18-0.13,1.83c-1.57,3.87-11.38,11.32-23.68,16.52"/>
|
||||
<path d="M83.04,58.4c0.03,0.25,0.07,0.66-0.07,1.03c-0.84,2.17-5.68,6.92-12.29,9.83"/>
|
||||
<path d="M70.38,70.93c3.38,2.78,16.6,15.5,21.85,18.66c1.3,0.79,2.31,1.32,3.55,1.59"/>
|
||||
<path d="M18.14,22.68c0.47,0.95,0.79,2.38,0.95,3.3c0.16,0.92,0.14,24.9-0.37,32.63c-0.72,11-2.47,25.08-7.97,35.72"/>
|
||||
<path d="M20.03,24.25c1.89-0.06,10.94-2.34,12.35-2.46c2.52-0.23,3.15,1.31,3.15,2.67c0,3.23-0.04,44.72-0.04,63.05c0,12.61-5.32,3.55-6.93,2.04"/>
|
||||
<path d="M19.6,42.65c4.15-0.35,11.74-1.54,15.38-1.57"/>
|
||||
<path d="M18.68,59.09c3.42-0.08,11.35-0.78,15.99-1.2"/>
|
||||
</g>
|
||||
</svg>
|
||||
|
||||
|
Before Width: | Height: | Size: 4.2 KiB After Width: | Height: | Size: 4.2 KiB |
75
find-ie.pl
Executable file
75
find-ie.pl
Executable file
@@ -0,0 +1,75 @@
|
||||
#!/home/ben/software/install/bin/perl
|
||||
use warnings;
|
||||
use strict;
|
||||
use FindBin;
|
||||
use XML::Parser;
|
||||
use Image::SVG::Path 'extract_path_info';
|
||||
use utf8;
|
||||
use KanjiVG qw/handle_element/;
|
||||
binmode STDOUT, "utf8";
|
||||
my %data;
|
||||
my $element = '豕';
|
||||
|
||||
my $start;
|
||||
my $count;
|
||||
|
||||
handle_element ($element, \& handle_start, \%data);
|
||||
|
||||
sub handle_ie
|
||||
{
|
||||
my ($data, $count, $attr) = @_;
|
||||
my $d = $attr->{d};
|
||||
if ($count == 1 || $count == 2) {
|
||||
my @values = extract_path_info ($d, {
|
||||
no_shortcuts => 1,
|
||||
absolute => 1,
|
||||
});
|
||||
my @start = @{$values[0]->{point}};
|
||||
my @end = @{$values[-1]->{end}};
|
||||
my $x_diff = $end[0] - $start[0];
|
||||
my $y_diff = $end[1] - $start[1];
|
||||
$data->{"line$count"} = [$x_diff, $y_diff];
|
||||
my $f = $data->{file};
|
||||
$f =~ s!.*/!!;
|
||||
if ($count == 1 && ($x_diff < 10 || $y_diff > 0)) {
|
||||
# print "$f: $count: $x_diff $y_diff\n";
|
||||
}
|
||||
elsif ($count == 2) {
|
||||
print "$f: $count: $x_diff $y_diff\n";
|
||||
}
|
||||
}
|
||||
if ($count == 2) {
|
||||
# print $data->{line1}->[0]->[0];
|
||||
}
|
||||
}
|
||||
|
||||
sub handle_start
|
||||
{
|
||||
my ($kanjivg_element, $data, $parser, $xml_element, %attr) = @_;
|
||||
if ($xml_element eq 'g') {
|
||||
my $kvg = $attr{'kanjivg:element'};
|
||||
if ($kvg) {
|
||||
if ($kvg eq $kanjivg_element) {
|
||||
my $kp = $attr{"kanjivg:part"};
|
||||
if (defined $kp) {
|
||||
# print "$kp\n";
|
||||
if ($kp == 2) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
# print "Found '$kvg' in '$data->{file}'\n";
|
||||
$start = 1;
|
||||
$count = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
$start = undef;
|
||||
$count = 0;
|
||||
}
|
||||
}
|
||||
elsif ($start && $xml_element eq 'path') {
|
||||
$count++;
|
||||
handle_ie ($data, $count, \%attr);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,72 +5,26 @@ use FindBin;
|
||||
use XML::Parser;
|
||||
use Image::SVG::Path 'extract_path_info';
|
||||
use utf8;
|
||||
my $string = qr/kanjivg:element="氵"/;
|
||||
my @files = <$FindBin::Bin/kanjivg/*.svg>;
|
||||
my @matches;
|
||||
for my $file (@files) {
|
||||
open my $in, "<:encoding(utf8)", $file
|
||||
or die $!;
|
||||
while (<$in>) {
|
||||
if (/$string/) {
|
||||
push @matches, $file;
|
||||
# print "$file matches.\n";
|
||||
}
|
||||
}
|
||||
close $in or die $!;
|
||||
}
|
||||
|
||||
my $start;
|
||||
my $count;
|
||||
|
||||
my %global;
|
||||
|
||||
my $parser = XML::Parser->new (
|
||||
Handlers => {
|
||||
Start => \& handle_start
|
||||
},
|
||||
);
|
||||
|
||||
use KanjiVG qw/find_element/;
|
||||
binmode STDOUT, "utf8";
|
||||
my %data;
|
||||
my $element = '氵';
|
||||
|
||||
for my $file (@matches) {
|
||||
# print "Parsing '$file'.\n";
|
||||
$global{file} = $file;
|
||||
$parser->parsefile ($file);
|
||||
}
|
||||
|
||||
sub handle_start
|
||||
{
|
||||
my ($parser, $element, %attr) = @_;
|
||||
if ($element eq 'g') {
|
||||
my $kvg = $attr{'kanjivg:element'};
|
||||
if ($kvg) {
|
||||
if ($kvg eq '氵') {
|
||||
#print "Found '$kvg' in '$global{file}'\n";
|
||||
$start = 1;
|
||||
$count = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
$start = undef;
|
||||
$count = 0;
|
||||
}
|
||||
}
|
||||
if ($start && $element eq 'path') {
|
||||
$count++;
|
||||
if ($count == 3) {
|
||||
my $d = $attr{d};
|
||||
my @values = extract_path_info ($d, {
|
||||
no_shortcuts => 1,
|
||||
absolute => 1,
|
||||
});
|
||||
my @start = @{$values[0]->{point}};
|
||||
my @end = @{$values[-1]->{end}};
|
||||
my $x_diff = $end[0] - $start[0];
|
||||
my $y_diff = $end[1] - $start[1];
|
||||
if ($x_diff < 0 || $y_diff > 0) {
|
||||
printf ("file $global{file}: %d %d\n", $x_diff, $y_diff);
|
||||
}
|
||||
}
|
||||
my ($data, $count, $d) = @_;
|
||||
if ($count == 3) {
|
||||
my @values = extract_path_info ($d, {
|
||||
no_shortcuts => 1,
|
||||
absolute => 1,
|
||||
});
|
||||
my @start = @{$values[0]->{point}};
|
||||
my @end = @{$values[-1]->{end}};
|
||||
my $x_diff = $end[0] - $start[0];
|
||||
my $y_diff = $end[1] - $start[1];
|
||||
# if ($x_diff < 0 || $y_diff > 0) {
|
||||
printf ("file $global{file}: %d %d\n", $x_diff, $y_diff);
|
||||
# }
|
||||
}
|
||||
}
|
||||
|
||||
59
find-sanzui.pl
Executable file
59
find-sanzui.pl
Executable file
@@ -0,0 +1,59 @@
|
||||
#!/home/ben/software/install/bin/perl
|
||||
use warnings;
|
||||
use strict;
|
||||
use FindBin;
|
||||
use XML::Parser;
|
||||
use Image::SVG::Path 'extract_path_info';
|
||||
use utf8;
|
||||
use KanjiVG qw/handle_element/;
|
||||
binmode STDOUT, "utf8";
|
||||
my %data;
|
||||
my $element = '氵';
|
||||
|
||||
my $start;
|
||||
my $count;
|
||||
|
||||
handle_element ($element, \& handle_start, \%data);
|
||||
|
||||
sub handle_sanzui
|
||||
{
|
||||
my ($data, $count, $attr) = @_;
|
||||
if ($count == 3) {
|
||||
my $d = $attr->{d};
|
||||
my @values = extract_path_info ($d, {
|
||||
no_shortcuts => 1,
|
||||
absolute => 1,
|
||||
});
|
||||
my @start = @{$values[0]->{point}};
|
||||
my @end = @{$values[-1]->{end}};
|
||||
my $x_diff = $end[0] - $start[0];
|
||||
my $y_diff = $end[1] - $start[1];
|
||||
# if ($x_diff < 0 || $y_diff > 0) {
|
||||
printf ("file $data->{file}: %d %d\n", $x_diff, $y_diff);
|
||||
# }
|
||||
}
|
||||
}
|
||||
|
||||
sub handle_start
|
||||
{
|
||||
my ($kanjivg_element, $data, $parser, $xml_element, %attr) = @_;
|
||||
if ($xml_element eq 'g') {
|
||||
my $kvg = $attr{'kanjivg:element'};
|
||||
if ($kvg) {
|
||||
if ($kvg eq $kanjivg_element) {
|
||||
# print "Found '$kvg' in '$data->{file}'\n";
|
||||
$start = 1;
|
||||
$count = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
$start = undef;
|
||||
$count = 0;
|
||||
}
|
||||
}
|
||||
elsif ($start && $xml_element eq 'path') {
|
||||
$count++;
|
||||
handle_sanzui ($data, $count, \%attr);
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user