169 lines
4.3 KiB
Dart
Executable File
169 lines
4.3 KiB
Dart
Executable File
// https://en.wikipedia.org/wiki/JIS_encoding
|
|
// https://en.wikipedia.org/wiki/Japanese_Industrial_Standards
|
|
|
|
import 'dart:convert';
|
|
import 'dart:io';
|
|
import 'dart:typed_data';
|
|
|
|
import 'datatypes.dart';
|
|
import 'character_set_conversion.dart';
|
|
|
|
/// ISO-2002-JP (Usually abbreviated "JIS")
|
|
/// Encodes, decodes: JIS X 0202
|
|
///
|
|
///
|
|
// class ISO2002JP extends Encoding {}
|
|
|
|
enum ShiftJISCharacterSet {
|
|
jisx0201,
|
|
jisx0208,
|
|
lundeRange,
|
|
}
|
|
|
|
/// Shift JIS (Usually abbreviated "SJIS")
|
|
/// Encodes, decodes: JIS X 0208
|
|
///
|
|
/// https://en.wikipedia.org/wiki/Shift_JIS
|
|
class ShiftJIS extends Encoding {
|
|
int? decodeWithJsix0201(int char) {
|
|
throw UnimplementedError();
|
|
}
|
|
|
|
/// Decode 16 bit character
|
|
int? decodeWithJsix0208(int char) {
|
|
int s1 = char >> 8;
|
|
int s2 = char & 0x00FF;
|
|
|
|
if (!((s1.between(0x81, 0x9f) || s1.between(0xe0, 0xea)) &&
|
|
(s1.between(0x40, 0x7e) || s1.between(0x80, 0xfc)))) {
|
|
return null;
|
|
}
|
|
|
|
int t1 = (s1 < 0xe0 ? s1 - 0x81 : s1 - 0xc1);
|
|
int t2 = (s2 < 0x80 ? s2 - 0x40 : s2 - 0x41);
|
|
|
|
int ffoo = 2 * t1 + (t2 < 0x5e ? 0 : 1) + 0x21;
|
|
int ooff = (t2 < 0x5e ? t2 : t2 - 0x5e) + 0x21;
|
|
int ffff = (ffoo << 8) + ooff;
|
|
|
|
return JSIX0208Decoder().convert(ffff);
|
|
}
|
|
|
|
int? decodeWithLundeRange(int char) {
|
|
throw UnimplementedError();
|
|
}
|
|
|
|
int? encodeWithJisx0201(int char) {
|
|
throw UnimplementedError();
|
|
}
|
|
|
|
int? encodeWithJisx0208(int char) {
|
|
final jsixEncoded = JSIX0208Encoder().convert(char);
|
|
|
|
if (jsixEncoded == null) {
|
|
return null;
|
|
}
|
|
|
|
int c1 = jsixEncoded >> 8;
|
|
int c2 = jsixEncoded & 0x00FF;
|
|
|
|
if (!(c1.between(0x21, 0x74) && c2.between(0x21, 0x7e))) {
|
|
return null;
|
|
}
|
|
|
|
int t1 = (c1 - 0x21) >> 1;
|
|
int t2 = ((((c1 - 0x21) & 1) != 0) ? 0x5e : 0) + (c2 - 0x21);
|
|
|
|
int ffoo = (t1 < 0x1f ? t1 + 0x81 : t1 + 0xc1);
|
|
int ooff = (t2 < 0x3f ? t2 + 0x40 : t2 + 0x41);
|
|
|
|
return (ffoo << 8) + ooff;
|
|
}
|
|
|
|
int? encodeWithLundeRange(int char) {
|
|
throw UnimplementedError();
|
|
}
|
|
|
|
@override
|
|
List<int> encode(
|
|
String input, {
|
|
ShiftJISCharacterSet? forceCharacterSet,
|
|
bool ignoreUnknownCharacters = false,
|
|
}) {
|
|
List<int> result = [];
|
|
List<int> toEncode = input.codeUnits;
|
|
for (int i = 0; i < toEncode.length; i++) {
|
|
final int charcode = toEncode[i];
|
|
final int? encodedChar = encodeWithJisx0208(charcode);
|
|
if (encodedChar == null) {
|
|
if (ignoreUnknownCharacters) {
|
|
continue;
|
|
} else {
|
|
throw ConverterException(
|
|
"Unable to convert character '${String.fromCharCode(charcode)}' at index [$i]",
|
|
);
|
|
}
|
|
}
|
|
result.add(encodedChar);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
@override
|
|
String decode(
|
|
List<int> encoded, {
|
|
ShiftJISCharacterSet? forceCharacterSet,
|
|
bool ignoreUnknownCharacters = false,
|
|
}) {
|
|
List<int> result = [];
|
|
for (int i = 0; i < encoded.length; i++) {
|
|
final int encodedChar = encoded[i];
|
|
final int? translatedChar = decodeWithJsix0208(encodedChar);
|
|
if (translatedChar == null) {
|
|
if (ignoreUnknownCharacters) {
|
|
continue;
|
|
} else {
|
|
throw ConverterException(
|
|
"Unknown character: ${encodedChar.hex} at index [$i]",
|
|
);
|
|
}
|
|
}
|
|
result.add(translatedChar);
|
|
}
|
|
return String.fromCharCodes(result);
|
|
}
|
|
|
|
@override
|
|
Converter<String, List<int>> get encoder => throw UnimplementedError();
|
|
|
|
@override
|
|
Converter<List<int>, String> get decoder => throw UnimplementedError();
|
|
|
|
@override
|
|
String get name => "Shift JIS";
|
|
}
|
|
|
|
/// EUC-JP (Usually abbreviated "UJIS")
|
|
/// Unixized JIS
|
|
/// Encodes, decodes: JIS X 0201, JIS X 0208, JIS X 0212
|
|
///
|
|
/// https://en.wikipedia.org/wiki/Extended_Unix_Code#EUC-JP=
|
|
class EUCJP {}
|
|
|
|
void main() {
|
|
final file = File("test.sjis.txt");
|
|
final ByteData data = ByteData.sublistView(file.readAsBytesSync());
|
|
final List<int> characterCodes = [
|
|
for (int i = 0; i < ((data.lengthInBytes) / 2).floor(); i++)
|
|
data.getUint16(i * 2)
|
|
];
|
|
print(characterCodes);
|
|
|
|
// print(ShiftJIS().decode(characterCodes));
|
|
|
|
// print(JSIX0208Encoder().encode16BitCharacter('あ'.codeUnitAt(0)));
|
|
// print(JSIX0208Encoder().encode16BitCharacter('幹'.codeUnitAt(0)));
|
|
final x = ShiftJIS().encode("この文章は読めるかな?");
|
|
print(data.buffer.asUint8List());
|
|
File("test.sjis2.txt").writeAsBytesSync(Uint16List.fromList(x));
|
|
} |