Upgrade to 2.0.0, see changelog

This commit is contained in:
Oystein Kristoffer Tveit 2021-07-25 22:57:02 +02:00
parent 527f58d679
commit c75f252a9c
44 changed files with 3557 additions and 3573 deletions

14
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,14 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "unofficial-jisho-api-dart",
"request": "launch",
"type": "dart",
"program": "example/api/phrase_search_copy.dart"
}
]
}

View File

@ -1,3 +1,9 @@
## 2.0.0
- Upgrade library to use null-safety
- Wrap the result data in "Data" classes that are nullable if no result was found
- Add sound data to phrase scrape results
## 1.1.0
- Export object interfaces for both libraries

View File

@ -8,8 +8,6 @@ include: package:effective_dart/analysis_options.yaml
# Uncomment to specify additional rules.
linter:
rules:
public_member_api_docs: false
lines_longer_than_80_chars: false
omit_local_variable_types: false
analyzer:

View File

@ -1,8 +1,8 @@
import 'dart:convert' show jsonEncode;
import 'package:unofficial_jisho_api/api.dart' as jisho;
void main() async {
await jisho.searchForExamples('').then((result) {
void main() {
jisho.searchForExamples('').then((result) {
print('Jisho Uri: ${result.uri}');
print('');

View File

@ -1,23 +1,27 @@
import 'dart:convert' show jsonEncode;
import 'package:unofficial_jisho_api/api.dart' as jisho;
void main() async {
await jisho.searchForKanji('').then((result) {
void main() {
jisho.searchForKanji('').then((result) {
print('Found: ${result.found}');
print('Taught in: ${result.taughtIn}');
print('JLPT level: ${result.jlptLevel}');
print('Newspaper frequency rank: ${result.newspaperFrequencyRank}');
print('Stroke count: ${result.strokeCount}');
print('Meaning: ${result.meaning}');
print('Kunyomi: ${jsonEncode(result.kunyomi)}');
print('Kunyomi example: ${jsonEncode(result.kunyomiExamples[0])}');
print('Onyomi: ${jsonEncode(result.onyomi)}');
print('Onyomi example: ${jsonEncode(result.onyomiExamples[0])}');
print('Radical: ${jsonEncode(result.radical)}');
print('Parts: ${jsonEncode(result.parts)}');
print('Stroke order diagram: ${result.strokeOrderDiagramUri}');
print('Stroke order SVG: ${result.strokeOrderSvgUri}');
print('Stroke order GIF: ${result.strokeOrderGifUri}');
print('Jisho Uri: ${result.uri}');
final data = result.data;
if (data != null) {
print('Taught in: ${data.taughtIn}');
print('JLPT level: ${data.jlptLevel}');
print('Newspaper frequency rank: ${data.newspaperFrequencyRank}');
print('Stroke count: ${data.strokeCount}');
print('Meaning: ${data.meaning}');
print('Kunyomi: ${jsonEncode(data.kunyomi)}');
print('Kunyomi example: ${jsonEncode(data.kunyomiExamples[0])}');
print('Onyomi: ${jsonEncode(data.onyomi)}');
print('Onyomi example: ${jsonEncode(data.onyomiExamples[0])}');
print('Radical: ${jsonEncode(data.radical)}');
print('Parts: ${jsonEncode(data.parts)}');
print('Stroke order diagram: ${data.strokeOrderDiagramUri}');
print('Stroke order SVG: ${data.strokeOrderSvgUri}');
print('Stroke order GIF: ${data.strokeOrderGifUri}');
print('Jisho Uri: ${data.uri}');
}
});
}

View File

@ -2,8 +2,8 @@ import 'dart:convert';
import 'package:unofficial_jisho_api/api.dart' as jisho;
final JsonEncoder encoder = JsonEncoder.withIndent(' ');
void main() async {
await jisho.scrapeForPhrase('').then((data) {
void main() {
jisho.scrapeForPhrase('').then((data) {
print(encoder.convert(data));
});
}

View File

@ -2,8 +2,9 @@ import 'dart:convert';
import 'package:unofficial_jisho_api/api.dart' as jisho;
final JsonEncoder encoder = JsonEncoder.withIndent(' ');
void main() async {
await jisho.searchForPhrase('').then((result) {
void main() {
jisho.searchForPhrase('').then((result) {
// jisho.searchForPhrase('する').then((result) {
print(encoder.convert(result));
});
}

View File

@ -5,13 +5,14 @@ import 'package:unofficial_jisho_api/parser.dart' as jisho_parser;
final JsonEncoder encoder = JsonEncoder.withIndent(' ');
const String searchExample = '保護者';
final String searchURI = jisho_parser.uriForExampleSearch(searchExample);
final Uri searchURI = jisho_parser.uriForExampleSearch(searchExample);
void main() async {
await http.get(searchURI).then((result) {
final parsedResult = jisho_parser.parseExamplePageData(result.body, searchExample);
void main() {
http.get(searchURI).then((result) {
final parsedResult =
jisho_parser.parseExamplePageData(result.body, searchExample);
print('English: ${parsedResult.results[0].english}');
print('Kanji ${parsedResult.results[0].kanji}');
print('Kana: ${parsedResult.results[0].kana}');
});
}
}

View File

@ -5,13 +5,17 @@ import 'package:unofficial_jisho_api/parser.dart' as jisho_parser;
final JsonEncoder encoder = JsonEncoder.withIndent(' ');
const String searchKanji = '';
final String searchURI = jisho_parser.uriForKanjiSearch(searchKanji);
final Uri searchURI = jisho_parser.uriForKanjiSearch(searchKanji);
void main() async {
await http.get(searchURI).then((result) {
final parsedResult = jisho_parser.parseKanjiPageData(result.body, searchKanji);
print('JLPT level: ${parsedResult.jlptLevel}');
print('Stroke count: ${parsedResult.strokeCount}');
print('Meaning: ${parsedResult.meaning}');
void main() {
http.get(searchURI).then((result) {
final parsedResult =
jisho_parser.parseKanjiPageData(result.body, searchKanji);
final data = parsedResult.data;
if (data != null) {
print('JLPT level: ${data.jlptLevel}');
print('Stroke count: ${data.strokeCount}');
print('Meaning: ${data.meaning}');
}
});
}
}

View File

@ -5,12 +5,12 @@ import 'package:unofficial_jisho_api/parser.dart' as jisho_parser;
final JsonEncoder encoder = JsonEncoder.withIndent(' ');
const String searchExample = '保護者';
final String searchURI = jisho_parser.uriForPhraseScrape(searchExample);
final Uri searchURI = jisho_parser.uriForPhraseScrape(searchExample);
void main() async {
await http.get(searchURI).then((result) {
final parsedResult = jisho_parser.parsePhrasePageData(result.body, searchExample);
void main() {
http.get(searchURI).then((result) {
final parsedResult =
jisho_parser.parsePhrasePageData(result.body, searchExample);
print(encoder.convert(parsedResult));
});
}
}

View File

@ -7,11 +7,11 @@ library unofficial_jisho_api;
import 'dart:convert';
import 'package:http/http.dart' as http;
import './src/exampleSearch.dart';
import './src/kanjiSearch.dart';
import './src/example_search.dart';
import './src/kanji_search.dart';
import './src/objects.dart';
import './src/phraseScrape.dart';
import './src/phraseSearch.dart';
import './src/phrase_scrape.dart';
import './src/phrase_search.dart';
export './src/objects.dart';

View File

@ -5,9 +5,8 @@
/// for providing HTML.
library unofficial_jisho_parser;
export './src/exampleSearch.dart'
show uriForExampleSearch, parseExamplePageData;
export './src/kanjiSearch.dart' show uriForKanjiSearch, parseKanjiPageData;
export './src/objects.dart';
export './src/phraseScrape.dart' show uriForPhraseScrape, parsePhrasePageData;
export './src/phraseSearch.dart';
export 'src/example_search.dart' show uriForExampleSearch, parseExamplePageData;
export 'src/kanji_search.dart' show uriForKanjiSearch, parseKanjiPageData;
export 'src/phrase_scrape.dart' show uriForPhraseScrape, parsePhrasePageData;
export 'src/phrase_search.dart';

View File

@ -1,4 +1,6 @@
const String JISHO_API = 'https://jisho.org/api/v1/search/words';
const String SCRAPE_BASE_URI = 'https://jisho.org/search/';
const String STROKE_ORDER_DIAGRAM_BASE_URI =
// ignore_for_file: public_member_api_docs
const String jishoApi = 'https://jisho.org/api/v1/search/words';
const String scrapeBaseUri = 'https://jisho.org/search/';
const String strokeOrderDiagramBaseUri =
'https://classic.jisho.org/static/images/stroke_diagrams/';

View File

@ -1,14 +1,15 @@
import 'package:html/parser.dart';
import 'package:html/dom.dart';
import 'package:html/parser.dart';
import './base_uri.dart';
import './objects.dart';
import './scraping.dart';
final RegExp _kanjiRegex = RegExp(r'[\u4e00-\u9faf\u3400-\u4dbf]');
/// Provides the URI for an example search
String uriForExampleSearch(String phrase) {
return '$SCRAPE_BASE_URI${Uri.encodeComponent(phrase)}%23sentences';
Uri uriForExampleSearch(String phrase) {
return Uri.parse('$scrapeBaseUri${Uri.encodeComponent(phrase)}%23sentences');
}
List<Element> _getChildrenAndSymbols(Element ul) {
@ -16,7 +17,7 @@ List<Element> _getChildrenAndSymbols(Element ul) {
final ulCharArray = ulText.split('');
final ulChildren = ul.children;
var offsetPointer = 0;
List<Element> result = [];
final result = <Element>[];
for (var element in ulChildren) {
if (element.text !=
@ -40,8 +41,13 @@ List<Element> _getChildrenAndSymbols(Element ul) {
return result;
}
ExampleResultData _getKanjiAndKana(Element div) {
final ul = div.querySelector('ul');
/// Although return type is List<String>, it is to be interpreted as (String, String)
List<String> _getKanjiAndKana(Element div) {
final ul = assertNotNull(
variable: div.querySelector('ul'),
errorMessage:
"Could not parse kanji/kana div. Is the provided document corrupt, or has Jisho been updated?",
);
final contents = _getChildrenAndSymbols(ul);
var kanji = '';
@ -51,7 +57,11 @@ ExampleResultData _getKanjiAndKana(Element div) {
if (content.localName == 'li') {
final li = content;
final furigana = li.querySelector('.furigana')?.text;
final unlifted = li.querySelector('.unlinked')?.text;
final unlifted = assertNotNull(
variable: li.querySelector('.unlinked')?.text,
errorMessage:
"Could not parse a piece of the example sentence. Is the provided document corrupt, or has Jisho been updated?",
);
if (furigana != null) {
kanji += unlifted;
@ -74,39 +84,49 @@ ExampleResultData _getKanjiAndKana(Element div) {
}
} else {
final text = content.text.trim();
if (text != null) {
kanji += text;
kana += text;
}
kanji += text;
kana += text;
}
}
return ExampleResultData(
kanji: kanji,
kana: kana,
);
return [kanji, kana];
}
List<ExampleSentencePiece> getPieces(Element sentenceElement) {
final pieceElements = sentenceElement.querySelectorAll('li.clearfix');
final List<ExampleSentencePiece> pieces = [];
for (var pieceIndex = 0; pieceIndex < pieceElements.length; pieceIndex += 1) {
final pieceElement = pieceElements[pieceIndex];
pieces.add(ExampleSentencePiece(
lifted: pieceElement.querySelector('.furigana')?.text,
unlifted: pieceElement.querySelector('.unlinked')?.text,
));
}
return pieces;
return pieceElements.map((var e) {
final unlifted = assertNotNull(
variable: e.querySelector('.unlinked')?.text,
errorMessage:
"Could not parse a piece of the example sentence. Is the provided document corrupt, or has Jisho been updated?",
);
return ExampleSentencePiece(
lifted: e.querySelector('.furigana')?.text,
unlifted: unlifted,
);
}).toList();
}
ExampleResultData _parseExampleDiv(Element div) {
final result = _getKanjiAndKana(div);
result.english = div.querySelector('.english').text;
result.pieces = getPieces(div) ?? [];
final kanji = result[0];
final kana = result[1];
return result;
final english = assertNotNull(
variable: div.querySelector('.english')?.text,
errorMessage:
"Could not parse translation. Is the provided document corrupt, or has Jisho been updated?",
);
final pieces = getPieces(div);
return ExampleResultData(
english: english,
kanji: kanji,
kana: kana,
pieces: pieces,
);
}
/// Parses a jisho example sentence search page to an object
@ -117,9 +137,8 @@ ExampleResults parseExamplePageData(String pageHtml, String phrase) {
final results = divs.map(_parseExampleDiv).toList();
return ExampleResults(
query: phrase,
found: results.isNotEmpty,
results: results ?? [],
uri: uriForExampleSearch(phrase)
);
query: phrase,
found: results.isNotEmpty,
results: results,
uri: uriForExampleSearch(phrase).toString());
}

View File

@ -1,235 +0,0 @@
import 'package:html_unescape/html_unescape.dart' as html_entities;
import './base_uri.dart';
import './objects.dart';
final _htmlUnescape = html_entities.HtmlUnescape();
const _onyomiLocatorSymbol = 'On';
const _kunyomiLocatorSymbol = 'Kun';
String _removeNewlines(String str) {
return str.replaceAll(RegExp(r'(?:\r|\n)'), '').trim();
}
/// Provides the URI for a kanji search
String uriForKanjiSearch(String kanji) {
return '$SCRAPE_BASE_URI${Uri.encodeComponent(kanji)}%23kanji';
}
String _getUriForStrokeOrderDiagram(String kanji) {
return '$STROKE_ORDER_DIAGRAM_BASE_URI${kanji.codeUnitAt(0)}_frames.png';
}
bool _containsKanjiGlyph(String pageHtml, String kanji) {
final kanjiGlyphToken =
'<h1 class="character" data-area-name="print" lang="ja">$kanji</h1>';
return pageHtml.contains(kanjiGlyphToken);
}
String _getStringBetweenIndicies(String data, int startIndex, int endIndex) {
final result = data.substring(startIndex, endIndex);
return _removeNewlines(result).trim();
}
String _getStringBetweenStrings(
String data, String startString, String endString) {
final regex = RegExp(
'${RegExp.escape(startString)}(.*?)${RegExp.escape(endString)}',
dotAll: true);
final match = regex.allMatches(data).toList();
return match.isNotEmpty ? match[0].group(1).toString() : null;
}
int _getIntBetweenStrings(
String pageHtml, String startString, String endString) {
final stringBetweenStrings =
_getStringBetweenStrings(pageHtml, startString, endString);
return int.parse(stringBetweenStrings);
}
List<String> _getAllGlobalGroupMatches(String str, RegExp regex) {
var regexResults = regex.allMatches(str).toList();
List<String> results = [];
for (var match in regexResults) {
results.add(match.group(1));
}
return results;
}
List<String> _parseAnchorsToArray(String str) {
final regex = RegExp(r'<a href=".*?">(.*?)<\/a>');
return _getAllGlobalGroupMatches(str, regex);
}
List<String> _getYomi(String pageHtml, String yomiLocatorSymbol) {
final yomiSection = _getStringBetweenStrings(
pageHtml, '<dt>$yomiLocatorSymbol:</dt>', '</dl>');
return _parseAnchorsToArray(yomiSection ?? '');
}
List<String> _getKunyomi(String pageHtml) {
return _getYomi(pageHtml, _kunyomiLocatorSymbol);
}
List<String> _getOnyomi(String pageHtml) {
return _getYomi(pageHtml, _onyomiLocatorSymbol);
}
List<YomiExample> _getYomiExamples(String pageHtml, String yomiLocatorSymbol) {
final locatorString = '<h2>$yomiLocatorSymbol reading compounds</h2>';
final exampleSection =
_getStringBetweenStrings(pageHtml, locatorString, '</ul>');
if (exampleSection == null) {
return null;
}
final regex = RegExp(r'<li>(.*?)<\/li>', dotAll: true);
final regexResults =
_getAllGlobalGroupMatches(exampleSection, regex).map((s) => s.trim());
final examples = regexResults.map((regexResult) {
final examplesLines = regexResult.split('\n').map((s) => s.trim()).toList();
return YomiExample(
example: examplesLines[0],
reading: examplesLines[1].replaceAll('', '').replaceAll('', ''),
meaning: _htmlUnescape.convert(examplesLines[2]),
);
});
return examples.toList();
}
List<YomiExample> _getOnyomiExamples(String pageHtml) {
return _getYomiExamples(pageHtml, _onyomiLocatorSymbol);
}
List<YomiExample> _getKunyomiExamples(String pageHtml) {
return _getYomiExamples(pageHtml, _kunyomiLocatorSymbol);
}
Radical _getRadical(String pageHtml) {
const radicalMeaningStartString = '<span class="radical_meaning">';
const radicalMeaningEndString = '</span>';
var radicalMeaning = _getStringBetweenStrings(
pageHtml,
radicalMeaningStartString,
radicalMeaningEndString,
).trim();
if (radicalMeaning != null) {
final radicalMeaningStartIndex =
pageHtml.indexOf(radicalMeaningStartString);
final radicalMeaningEndIndex = pageHtml.indexOf(
radicalMeaningEndString,
radicalMeaningStartIndex,
);
final radicalSymbolStartIndex =
radicalMeaningEndIndex + radicalMeaningEndString.length;
const radicalSymbolEndString = '</span>';
final radicalSymbolEndIndex =
pageHtml.indexOf(radicalSymbolEndString, radicalSymbolStartIndex);
final radicalSymbolsString = _getStringBetweenIndicies(
pageHtml,
radicalSymbolStartIndex,
radicalSymbolEndIndex,
);
if (radicalSymbolsString.length > 1) {
final radicalForms = radicalSymbolsString
.substring(1)
.replaceAll('(', '')
.replaceAll(')', '')
.trim()
.split(', ');
return Radical(
symbol: radicalSymbolsString[0],
forms: radicalForms ?? [],
meaning: radicalMeaning);
}
return Radical(symbol: radicalSymbolsString, meaning: radicalMeaning);
}
return null;
}
List<String> _getParts(String pageHtml) {
const partsSectionStartString = '<dt>Parts:</dt>';
const partsSectionEndString = '</dl>';
final partsSection = _getStringBetweenStrings(
pageHtml,
partsSectionStartString,
partsSectionEndString,
);
var result = _parseAnchorsToArray(partsSection);
result.sort();
return (result);
}
String _getSvgUri(String pageHtml) {
var svgRegex = RegExp('\/\/.*?.cloudfront.net\/.*?.svg');
final regexResult = svgRegex.firstMatch(pageHtml).group(0).toString();
return regexResult.isNotEmpty ? 'https:$regexResult' : null;
}
String _getGifUri(String kanji) {
final unicodeString = kanji.codeUnitAt(0).toRadixString(16);
final fileName = '$unicodeString.gif';
final animationUri =
'https://raw.githubusercontent.com/mistval/kanji_images/master/gifs/$fileName';
return animationUri;
}
int _getNewspaperFrequencyRank(String pageHtml) {
final frequencySection =
_getStringBetweenStrings(pageHtml, '<div class="frequency">', '</div>');
return (frequencySection != null)
? int.parse(
_getStringBetweenStrings(frequencySection, '<strong>', '</strong>'))
: null;
}
/// Parses a jisho kanji search page to an object
KanjiResult parseKanjiPageData(String pageHtml, String kanji) {
final result = KanjiResult();
result.query = kanji;
result.found = _containsKanjiGlyph(pageHtml, kanji);
if (result.found == false) {
return result;
}
result.taughtIn =
_getStringBetweenStrings(pageHtml, 'taught in <strong>', '</strong>');
result.jlptLevel =
_getStringBetweenStrings(pageHtml, 'JLPT level <strong>', '</strong>');
result.newspaperFrequencyRank = _getNewspaperFrequencyRank(pageHtml);
result.strokeCount =
_getIntBetweenStrings(pageHtml, '<strong>', '</strong> strokes');
result.meaning = _htmlUnescape.convert(_removeNewlines(
_getStringBetweenStrings(
pageHtml, '<div class="kanji-details__main-meanings">', '</div>'))
.trim());
result.kunyomi = _getKunyomi(pageHtml) ?? [];
result.onyomi = _getOnyomi(pageHtml) ?? [];
result.onyomiExamples = _getOnyomiExamples(pageHtml) ?? [];
result.kunyomiExamples = _getKunyomiExamples(pageHtml) ?? [];
result.radical = _getRadical(pageHtml);
result.parts = _getParts(pageHtml) ?? [];
result.strokeOrderDiagramUri = _getUriForStrokeOrderDiagram(kanji);
result.strokeOrderSvgUri = _getSvgUri(pageHtml);
result.strokeOrderGifUri = _getGifUri(kanji);
result.uri = uriForKanjiSearch(kanji);
return result;
}

243
lib/src/kanji_search.dart Normal file
View File

@ -0,0 +1,243 @@
import 'package:html_unescape/html_unescape.dart' as html_entities;
import './base_uri.dart';
import './objects.dart';
import './scraping.dart';
final _htmlUnescape = html_entities.HtmlUnescape();
const _onyomiLocatorSymbol = 'On';
const _kunyomiLocatorSymbol = 'Kun';
/// Provides the URI for a kanji search
Uri uriForKanjiSearch(String kanji) {
return Uri.parse('$scrapeBaseUri${Uri.encodeComponent(kanji)}%23kanji');
}
String _getUriForStrokeOrderDiagram(String kanji) {
return '$strokeOrderDiagramBaseUri${kanji.codeUnitAt(0)}_frames.png';
}
bool _containsKanjiGlyph(String pageHtml, String kanji) {
final kanjiGlyphToken =
'<h1 class="character" data-area-name="print" lang="ja">$kanji</h1>';
return pageHtml.contains(kanjiGlyphToken);
}
List<String> _getYomi(String pageHtml, String yomiLocatorSymbol) {
final yomiSection = getStringBetweenStrings(
pageHtml, '<dt>$yomiLocatorSymbol:</dt>', '</dl>');
return parseAnchorsToArray(yomiSection ?? '');
}
List<String> _getKunyomi(String pageHtml) {
return _getYomi(pageHtml, _kunyomiLocatorSymbol);
}
List<String> _getOnyomi(String pageHtml) {
return _getYomi(pageHtml, _onyomiLocatorSymbol);
}
List<YomiExample> _getYomiExamples(String pageHtml, String yomiLocatorSymbol) {
final locatorString = '<h2>$yomiLocatorSymbol reading compounds</h2>';
final exampleSection =
getStringBetweenStrings(pageHtml, locatorString, '</ul>');
if (exampleSection == null) {
return [];
}
final regex = RegExp(r'<li>(.*?)<\/li>', dotAll: true);
final regexResults =
getAllGlobalGroupMatches(exampleSection, regex).map((s) => s.trim());
final examples = regexResults.map((regexResult) {
final examplesLines = regexResult.split('\n').map((s) => s.trim()).toList();
return YomiExample(
example: examplesLines[0],
reading: examplesLines[1].replaceAll('', '').replaceAll('', ''),
meaning: _htmlUnescape.convert(examplesLines[2]),
);
});
return examples.toList();
}
List<YomiExample> _getOnyomiExamples(String pageHtml) {
return _getYomiExamples(pageHtml, _onyomiLocatorSymbol);
}
List<YomiExample> _getKunyomiExamples(String pageHtml) {
return _getYomiExamples(pageHtml, _kunyomiLocatorSymbol);
}
Radical? _getRadical(String pageHtml) {
const radicalMeaningStartString = '<span class="radical_meaning">';
const radicalMeaningEndString = '</span>';
var radicalMeaning = getStringBetweenStrings(
pageHtml,
radicalMeaningStartString,
radicalMeaningEndString,
)?.trim();
if (radicalMeaning == null) {
return null;
}
final radicalMeaningStartIndex = pageHtml.indexOf(radicalMeaningStartString);
final radicalMeaningEndIndex = pageHtml.indexOf(
radicalMeaningEndString,
radicalMeaningStartIndex,
);
final radicalSymbolStartIndex =
radicalMeaningEndIndex + radicalMeaningEndString.length;
const radicalSymbolEndString = '</span>';
final radicalSymbolEndIndex =
pageHtml.indexOf(radicalSymbolEndString, radicalSymbolStartIndex);
final radicalSymbolsString = getStringBetweenIndicies(
pageHtml,
radicalSymbolStartIndex,
radicalSymbolEndIndex,
);
if (radicalSymbolsString.length > 1) {
final radicalForms = radicalSymbolsString
.substring(1)
.replaceAll('(', '')
.replaceAll(')', '')
.trim()
.split(', ');
return Radical(
symbol: radicalSymbolsString[0],
forms: radicalForms,
meaning: radicalMeaning,
);
}
return Radical(symbol: radicalSymbolsString, meaning: radicalMeaning);
}
String _getMeaning(String pageHtml) {
final rawMeaning = assertNotNull(
variable: getStringBetweenStrings(
pageHtml,
'<div class="kanji-details__main-meanings">',
'</div>',
),
errorMessage:
"Could not parse meaning. Is the provided document corrupt, or has Jisho been updated?",
);
return _htmlUnescape.convert(removeNewlines(rawMeaning).trim());
}
List<String> _getParts(String pageHtml) {
const partsSectionStartString = '<dt>Parts:</dt>';
const partsSectionEndString = '</dl>';
final partsSection = getStringBetweenStrings(
pageHtml,
partsSectionStartString,
partsSectionEndString,
);
if (partsSection == null) {
return [];
}
var result = parseAnchorsToArray(partsSection);
result.sort();
return result;
}
String _getSvgUri(String pageHtml) {
var svgRegex = RegExp('\/\/.*?.cloudfront.net\/.*?.svg');
final regexResult = assertNotNull(
variable: svgRegex.firstMatch(pageHtml)?.group(0)?.toString(),
errorMessage:
"Could not find SVG URI. Is the provided document corrupt, or has Jisho been updated?",
);
return 'https:$regexResult';
}
String _getGifUri(String kanji) {
final unicodeString = kanji.codeUnitAt(0).toRadixString(16);
final fileName = '$unicodeString.gif';
final animationUri =
'https://raw.githubusercontent.com/mistval/kanji_images/master/gifs/$fileName';
return animationUri;
}
int? _getNewspaperFrequencyRank(String pageHtml) {
final frequencySection = getStringBetweenStrings(
pageHtml,
'<div class="frequency">',
'</div>',
);
// ignore: avoid_returning_null
if (frequencySection == null) return null;
final frequencyRank =
getStringBetweenStrings(frequencySection, '<strong>', '</strong>');
return frequencyRank != null ? int.parse(frequencyRank) : null;
}
int _getStrokeCount(String pageHtml) {
final strokeCount = assertNotNull(
variable: getIntBetweenStrings(pageHtml, '<strong>', '</strong> strokes'),
errorMessage:
"Could not parse stroke count. Is the provided document corrupt, or has Jisho been updated?",
);
return strokeCount;
}
String? _getTaughtIn(String pageHtml) {
return getStringBetweenStrings(pageHtml, 'taught in <strong>', '</strong>');
}
String? _getJlptLevel(String pageHtml) {
return getStringBetweenStrings(pageHtml, 'JLPT level <strong>', '</strong>');
}
/// Parses a jisho kanji search page to an object
KanjiResult parseKanjiPageData(String pageHtml, String kanji) {
final result = KanjiResult(
query: kanji,
found: _containsKanjiGlyph(pageHtml, kanji),
);
if (result.found == false) {
return result;
}
result.data = KanjiResultData(
strokeCount: _getStrokeCount(pageHtml),
meaning: _getMeaning(pageHtml),
strokeOrderDiagramUri: _getUriForStrokeOrderDiagram(kanji),
strokeOrderSvgUri: _getSvgUri(pageHtml),
strokeOrderGifUri: _getGifUri(kanji),
uri: uriForKanjiSearch(kanji).toString(),
parts: _getParts(pageHtml),
taughtIn: _getTaughtIn(pageHtml),
jlptLevel: _getJlptLevel(pageHtml),
newspaperFrequencyRank: _getNewspaperFrequencyRank(pageHtml),
kunyomi: _getKunyomi(pageHtml),
onyomi: _getOnyomi(pageHtml),
kunyomiExamples: _getKunyomiExamples(pageHtml),
onyomiExamples: _getOnyomiExamples(pageHtml),
radical: _getRadical(pageHtml),
);
return result;
}

View File

@ -2,96 +2,157 @@
/* searchForKanji related classes */
/* -------------------------------------------------------------------------- */
/// An example of a word that contains the kanji in question.
class YomiExample {
/// The original text of the example.
String example;
/// The reading of the example.
String reading;
/// The meaning of the example.
String meaning;
YomiExample({this.example, this.reading, this.meaning});
// ignore: public_member_api_docs
YomiExample({
required this.example,
required this.reading,
required this.meaning,
});
Map<String, String> toJson() =>
{'example': example, 'reading': reading, 'meaning': meaning};
// ignore: public_member_api_docs
Map<String, String> toJson() => {
'example': example,
'reading': reading,
'meaning': meaning,
};
}
/// Information regarding the radical of a kanji.
class Radical {
/// The radical symbol, if applicable.
/// The radical symbol.
String symbol;
/// The radical forms used in this kanji, if applicable.
/// The radical forms used in this kanji.
List<String> forms;
/// The meaning of the radical, if applicable.
/// The meaning of the radical.
String meaning;
Radical({this.symbol, this.forms, this.meaning});
// ignore: public_member_api_docs
Radical({
required this.symbol,
this.forms = const [],
required this.meaning,
});
Map<String, dynamic> toJson() =>
{'symbol': symbol, 'forms': forms, 'meaning': meaning};
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {
'symbol': symbol,
'forms': forms,
'meaning': meaning,
};
}
/// The main wrapper containing data about the query and whether or not it was successful.
class KanjiResult {
/// True if results were found.
String query;
/// The term that you searched for.
bool found;
/// The school level that the kanji is taught in, if applicable.
String taughtIn;
/// The lowest JLPT exam that this kanji is likely to appear in, if applicable.
///
/// 'N5' or 'N4' or 'N3' or 'N2' or 'N1'.
String jlptLevel;
/// A number representing this kanji's frequency rank in newspapers, if applicable.
int newspaperFrequencyRank;
/// How many strokes this kanji is typically drawn in, if applicable.
int strokeCount;
/// The meaning of the kanji, if applicable.
String meaning;
/// This character's kunyomi, if applicable.
List<String> kunyomi;
/// This character's onyomi, if applicable.
List<String> onyomi;
/// Examples of this character's kunyomi being used, if applicable.
List<YomiExample> kunyomiExamples;
/// Examples of this character's onyomi being used, if applicable.
List<YomiExample> onyomiExamples;
/// Information about this character's radical, if applicable.
Radical radical;
/// The parts used in this kanji, if applicable.
List<String> parts;
/// The URL to a diagram showing how to draw this kanji step by step, if applicable.
String strokeOrderDiagramUri;
/// The URL to an SVG describing how to draw this kanji, if applicable.
String strokeOrderSvgUri;
/// The URL to a gif showing the kanji being draw and its stroke order, if applicable.
String strokeOrderGifUri;
/// The URI that these results were scraped from, if applicable.
String uri;
/// The result data if search was successful.
KanjiResultData? data;
KanjiResult(
{this.query,
this.found,
this.taughtIn,
this.jlptLevel,
this.newspaperFrequencyRank,
this.strokeCount,
this.meaning,
this.kunyomi,
this.onyomi,
this.kunyomiExamples,
this.onyomiExamples,
this.radical,
this.parts,
this.strokeOrderDiagramUri,
this.strokeOrderSvgUri,
this.strokeOrderGifUri,
this.uri});
// ignore: public_member_api_docs
KanjiResult({
required this.query,
required this.found,
this.data,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() {
return {
'query': query,
'found': found,
'data': data,
};
}
}
/// The main kanji data class, collecting all the result information in one place.
class KanjiResultData {
/// The school level that the kanji is taught in, if applicable.
String? taughtIn;
/// The lowest JLPT exam that this kanji is likely to appear in, if applicable.
///
/// 'N5' or 'N4' or 'N3' or 'N2' or 'N1'.
String? jlptLevel;
/// A number representing this kanji's frequency rank in newspapers, if applicable.
int? newspaperFrequencyRank;
/// How many strokes this kanji is typically drawn in.
int strokeCount;
/// The meaning of the kanji.
String meaning;
/// This character's kunyomi.
List<String> kunyomi;
/// This character's onyomi.
List<String> onyomi;
/// Examples of this character's kunyomi being used.
List<YomiExample> kunyomiExamples;
/// Examples of this character's onyomi being used.
List<YomiExample> onyomiExamples;
/// Information about this character's radical, if applicable.
Radical? radical;
/// The parts used in this kanji.
List<String> parts;
/// The URL to a diagram showing how to draw this kanji step by step.
String strokeOrderDiagramUri;
/// The URL to an SVG describing how to draw this kanji.
String strokeOrderSvgUri;
/// The URL to a gif showing the kanji being draw and its stroke order.
String strokeOrderGifUri;
/// The URI that these results were scraped from.
String uri;
// ignore: public_member_api_docs
KanjiResultData({
this.taughtIn,
this.jlptLevel,
this.newspaperFrequencyRank,
required this.strokeCount,
required this.meaning,
this.kunyomi = const [],
this.onyomi = const [],
this.kunyomiExamples = const [],
this.onyomiExamples = const [],
this.radical,
this.parts = const [],
required this.strokeOrderDiagramUri,
required this.strokeOrderSvgUri,
required this.strokeOrderGifUri,
required this.uri,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() {
return {
'taughtIn': taughtIn,
'jlptLevel': jlptLevel,
'newspaperFrequencyRank': newspaperFrequencyRank,
@ -101,12 +162,12 @@ class KanjiResult {
'onyomi': onyomi,
'onyomiExamples': onyomiExamples,
'kunyomiExamples': kunyomiExamples,
'radical': (radical != null) ? radical.toJson() : null,
'radical': radical?.toJson(),
'parts': parts,
'strokeOrderDiagramUri': strokeOrderDiagramUri,
'strokeOrderSvgUri': strokeOrderSvgUri,
'strokeOrderGifUri': strokeOrderGifUri,
'uri': uri
'uri': uri,
};
}
}
@ -115,54 +176,90 @@ class KanjiResult {
/* searchForExamples related classes */
/* -------------------------------------------------------------------------- */
/// A word in an example sentence, consisting of either just kana, or kanji with furigana.
class ExampleSentencePiece {
/// Baseline text shown on Jisho.org (below the lifted text / furigana)
String lifted;
/// Furigana text shown on Jisho.org (above the unlifted text)
/// Furigana text shown on Jisho.org (above the unlifted text), if applicable.
String? lifted;
/// Baseline text shown on Jisho.org (below the lifted text / furigana).
String unlifted;
ExampleSentencePiece({this.lifted, this.unlifted});
// ignore: public_member_api_docs
ExampleSentencePiece({
this.lifted,
required this.unlifted,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() {
return {'lifted': lifted, 'unlifted': unlifted};
return {
'lifted': lifted,
'unlifted': unlifted,
};
}
}
/// All data making up one example sentence.
class ExampleResultData {
/// The example sentence including kanji.
String kanji;
/// The example sentence without kanji (only kana). Sometimes this may include some Kanji, as furigana is not always available from Jisho.org.
String kana;
/// An English translation of the example.
String english;
/// The lifted/unlifted pairs that make up the sentence. Lifted text is furigana, unlifted is the text below the furigana.
List<ExampleSentencePiece> pieces;
ExampleResultData({this.english, this.kanji, this.kana, this.pieces});
// ignore: public_member_api_docs
ExampleResultData({
required this.english,
required this.kanji,
required this.kana,
required this.pieces,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() {
return {'english': english, 'kanji': kanji, 'kana': kana, 'pieces': pieces};
return {
'english': english,
'kanji': kanji,
'kana': kana,
'pieces': pieces,
};
}
}
/// The main wrapper containing data about the query and whether or not it was successful.
class ExampleResults {
/// The term that you searched for.
String query;
/// True if results were found.
bool found;
/// The URI that these results were scraped from.
String uri;
/// The examples that were found, if any.
List<ExampleResultData> results;
ExampleResults({this.query, this.found, this.results, this.uri});
// ignore: public_member_api_docs
ExampleResults({
required this.query,
required this.found,
required this.results,
required this.uri,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() {
return {
'query': query,
'found': found,
'results': results,
'uri': uri
'uri': uri,
};
}
}
@ -171,96 +268,178 @@ class ExampleResults {
/* scrapeForPhrase related classes */
/* -------------------------------------------------------------------------- */
/// An example sentence.
class PhraseScrapeSentence {
/// The English meaning of the sentence.
String english;
/// The Japanese text of the sentence.
String japanese;
/// The lifted/unlifted pairs that make up the sentence. Lifted text is furigana, unlifted is the text below the furigana.
List<ExampleSentencePiece> pieces;
PhraseScrapeSentence({this.english, this.japanese, this.pieces});
// ignore: public_member_api_docs
PhraseScrapeSentence({
required this.english,
required this.japanese,
required this.pieces,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() =>
{'english': english, 'japanese': japanese, 'pieces': pieces};
}
/// The data representing one "meaning" or "sense" of the word
class PhraseScrapeMeaning {
/// The words that Jisho lists as "see also".
List<String> seeAlsoTerms;
/// Example sentences for this meaning.
List<PhraseScrapeSentence> sentences;
/// The definition of the meaning
/// The definition of the meaning.
String definition;
/// Supplemental information.
/// For example "usually written using kana alone".
List<String> supplemental;
/// An "abstract" definition.
/// Often this is a Wikipedia definition.
String definitionAbstract;
String? definitionAbstract;
/// Tags associated with this meaning.
List<String> tags;
PhraseScrapeMeaning(
{this.seeAlsoTerms,
this.sentences,
this.definition,
this.supplemental,
this.definitionAbstract,
this.tags});
// ignore: public_member_api_docs
PhraseScrapeMeaning({
this.seeAlsoTerms = const [],
required this.sentences,
required this.definition,
this.supplemental = const [],
this.definitionAbstract,
this.tags = const [],
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {
'seeAlsoTerms': seeAlsoTerms,
'sentences': sentences,
'definition': definition,
'supplemental': supplemental,
'definitionAbstract': definitionAbstract,
'tags': tags
'tags': tags,
};
}
/// A pair of kanji and potential furigana.
class KanjiKanaPair {
/// Kanji
String kanji;
String kana;
KanjiKanaPair({this.kanji, this.kana});
/// Furigana, if applicable.
String? kana;
Map<String, String> toJson() => {'kanji': kanji, 'kana': kana};
// ignore: public_member_api_docs
KanjiKanaPair({
required this.kanji,
this.kana,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {
'kanji': kanji,
'kana': kana,
};
}
/// The main wrapper containing data about the query and whether or not it was successful.
class PhrasePageScrapeResult {
/// True if a result was found.
bool found;
/// The term that you searched for.
String query;
/// The URI that these results were scraped from, if a result was found.
String uri;
/// Other forms of the search term, if a result was found.
List<String> tags;
/// Information about the meanings associated with this search result.
List<PhraseScrapeMeaning> meanings;
/// Tags associated with this search result.
List<KanjiKanaPair> otherForms;
/// Notes associated with the search result.
List<String> notes;
PhrasePageScrapeResult(
{this.found,
this.query,
this.uri,
this.tags,
this.meanings,
this.otherForms,
this.notes});
/// The result data if search was successful.
PhrasePageScrapeResultData? data;
// ignore: public_member_api_docs
PhrasePageScrapeResult({
required this.found,
required this.query,
this.data,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {
'found': found,
'query': query,
'data': data,
};
}
/// Pronounciation audio.
class AudioFile {
/// The uri of the audio file.
String uri;
/// The mimetype of the audio.
String mimetype;
// ignore: public_member_api_docs
AudioFile({
required this.uri,
required this.mimetype,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {
'uri': uri,
'mimetype': mimetype,
};
}
/// The main scrape data class, collecting all the result information in one place.
class PhrasePageScrapeResultData {
/// The URI that these results were scraped from.
String uri;
/// Other forms of the search term.
List<String> tags;
/// Information about the meanings associated with this search result.
List<PhraseScrapeMeaning> meanings;
/// Tags associated with this search result.
List<KanjiKanaPair> otherForms;
/// Pronounciation of the search result.
List<AudioFile> audio;
/// Notes associated with the search result.
List<String> notes;
// ignore: public_member_api_docs
PhrasePageScrapeResultData({
required this.uri,
this.tags = const [],
this.meanings = const [],
this.otherForms = const [],
this.audio = const [],
this.notes = const [],
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {
'uri': uri,
'tags': tags,
'meanings': meanings,
'otherForms': otherForms,
'notes': notes
'audio': audio,