mirror of
https://github.com/h7x4/unofficial_jisho_api_dart.git
synced 2025-09-10 04:53:45 +02:00
Upgrade to 2.0.0, see changelog
This commit is contained in:
@@ -7,11 +7,11 @@ library unofficial_jisho_api;
|
||||
import 'dart:convert';
|
||||
import 'package:http/http.dart' as http;
|
||||
|
||||
import './src/exampleSearch.dart';
|
||||
import './src/kanjiSearch.dart';
|
||||
import './src/example_search.dart';
|
||||
import './src/kanji_search.dart';
|
||||
import './src/objects.dart';
|
||||
import './src/phraseScrape.dart';
|
||||
import './src/phraseSearch.dart';
|
||||
import './src/phrase_scrape.dart';
|
||||
import './src/phrase_search.dart';
|
||||
|
||||
export './src/objects.dart';
|
||||
|
||||
|
@@ -5,9 +5,8 @@
|
||||
/// for providing HTML.
|
||||
library unofficial_jisho_parser;
|
||||
|
||||
export './src/exampleSearch.dart'
|
||||
show uriForExampleSearch, parseExamplePageData;
|
||||
export './src/kanjiSearch.dart' show uriForKanjiSearch, parseKanjiPageData;
|
||||
export './src/objects.dart';
|
||||
export './src/phraseScrape.dart' show uriForPhraseScrape, parsePhrasePageData;
|
||||
export './src/phraseSearch.dart';
|
||||
export 'src/example_search.dart' show uriForExampleSearch, parseExamplePageData;
|
||||
export 'src/kanji_search.dart' show uriForKanjiSearch, parseKanjiPageData;
|
||||
export 'src/phrase_scrape.dart' show uriForPhraseScrape, parsePhrasePageData;
|
||||
export 'src/phrase_search.dart';
|
||||
|
@@ -1,4 +1,6 @@
|
||||
const String JISHO_API = 'https://jisho.org/api/v1/search/words';
|
||||
const String SCRAPE_BASE_URI = 'https://jisho.org/search/';
|
||||
const String STROKE_ORDER_DIAGRAM_BASE_URI =
|
||||
// ignore_for_file: public_member_api_docs
|
||||
|
||||
const String jishoApi = 'https://jisho.org/api/v1/search/words';
|
||||
const String scrapeBaseUri = 'https://jisho.org/search/';
|
||||
const String strokeOrderDiagramBaseUri =
|
||||
'https://classic.jisho.org/static/images/stroke_diagrams/';
|
||||
|
@@ -1,14 +1,15 @@
|
||||
import 'package:html/parser.dart';
|
||||
import 'package:html/dom.dart';
|
||||
import 'package:html/parser.dart';
|
||||
|
||||
import './base_uri.dart';
|
||||
import './objects.dart';
|
||||
import './scraping.dart';
|
||||
|
||||
final RegExp _kanjiRegex = RegExp(r'[\u4e00-\u9faf\u3400-\u4dbf]');
|
||||
|
||||
/// Provides the URI for an example search
|
||||
String uriForExampleSearch(String phrase) {
|
||||
return '$SCRAPE_BASE_URI${Uri.encodeComponent(phrase)}%23sentences';
|
||||
Uri uriForExampleSearch(String phrase) {
|
||||
return Uri.parse('$scrapeBaseUri${Uri.encodeComponent(phrase)}%23sentences');
|
||||
}
|
||||
|
||||
List<Element> _getChildrenAndSymbols(Element ul) {
|
||||
@@ -16,7 +17,7 @@ List<Element> _getChildrenAndSymbols(Element ul) {
|
||||
final ulCharArray = ulText.split('');
|
||||
final ulChildren = ul.children;
|
||||
var offsetPointer = 0;
|
||||
List<Element> result = [];
|
||||
final result = <Element>[];
|
||||
|
||||
for (var element in ulChildren) {
|
||||
if (element.text !=
|
||||
@@ -40,8 +41,13 @@ List<Element> _getChildrenAndSymbols(Element ul) {
|
||||
return result;
|
||||
}
|
||||
|
||||
ExampleResultData _getKanjiAndKana(Element div) {
|
||||
final ul = div.querySelector('ul');
|
||||
/// Although return type is List<String>, it is to be interpreted as (String, String)
|
||||
List<String> _getKanjiAndKana(Element div) {
|
||||
final ul = assertNotNull(
|
||||
variable: div.querySelector('ul'),
|
||||
errorMessage:
|
||||
"Could not parse kanji/kana div. Is the provided document corrupt, or has Jisho been updated?",
|
||||
);
|
||||
final contents = _getChildrenAndSymbols(ul);
|
||||
|
||||
var kanji = '';
|
||||
@@ -51,7 +57,11 @@ ExampleResultData _getKanjiAndKana(Element div) {
|
||||
if (content.localName == 'li') {
|
||||
final li = content;
|
||||
final furigana = li.querySelector('.furigana')?.text;
|
||||
final unlifted = li.querySelector('.unlinked')?.text;
|
||||
final unlifted = assertNotNull(
|
||||
variable: li.querySelector('.unlinked')?.text,
|
||||
errorMessage:
|
||||
"Could not parse a piece of the example sentence. Is the provided document corrupt, or has Jisho been updated?",
|
||||
);
|
||||
|
||||
if (furigana != null) {
|
||||
kanji += unlifted;
|
||||
@@ -74,39 +84,49 @@ ExampleResultData _getKanjiAndKana(Element div) {
|
||||
}
|
||||
} else {
|
||||
final text = content.text.trim();
|
||||
if (text != null) {
|
||||
kanji += text;
|
||||
kana += text;
|
||||
}
|
||||
kanji += text;
|
||||
kana += text;
|
||||
}
|
||||
}
|
||||
|
||||
return ExampleResultData(
|
||||
kanji: kanji,
|
||||
kana: kana,
|
||||
);
|
||||
return [kanji, kana];
|
||||
}
|
||||
|
||||
List<ExampleSentencePiece> getPieces(Element sentenceElement) {
|
||||
final pieceElements = sentenceElement.querySelectorAll('li.clearfix');
|
||||
final List<ExampleSentencePiece> pieces = [];
|
||||
for (var pieceIndex = 0; pieceIndex < pieceElements.length; pieceIndex += 1) {
|
||||
final pieceElement = pieceElements[pieceIndex];
|
||||
pieces.add(ExampleSentencePiece(
|
||||
lifted: pieceElement.querySelector('.furigana')?.text,
|
||||
unlifted: pieceElement.querySelector('.unlinked')?.text,
|
||||
));
|
||||
}
|
||||
|
||||
return pieces;
|
||||
return pieceElements.map((var e) {
|
||||
final unlifted = assertNotNull(
|
||||
variable: e.querySelector('.unlinked')?.text,
|
||||
errorMessage:
|
||||
"Could not parse a piece of the example sentence. Is the provided document corrupt, or has Jisho been updated?",
|
||||
);
|
||||
|
||||
return ExampleSentencePiece(
|
||||
lifted: e.querySelector('.furigana')?.text,
|
||||
unlifted: unlifted,
|
||||
);
|
||||
}).toList();
|
||||
}
|
||||
|
||||
ExampleResultData _parseExampleDiv(Element div) {
|
||||
final result = _getKanjiAndKana(div);
|
||||
result.english = div.querySelector('.english').text;
|
||||
result.pieces = getPieces(div) ?? [];
|
||||
final kanji = result[0];
|
||||
final kana = result[1];
|
||||
|
||||
return result;
|
||||
final english = assertNotNull(
|
||||
variable: div.querySelector('.english')?.text,
|
||||
errorMessage:
|
||||
"Could not parse translation. Is the provided document corrupt, or has Jisho been updated?",
|
||||
);
|
||||
final pieces = getPieces(div);
|
||||
|
||||
return ExampleResultData(
|
||||
english: english,
|
||||
kanji: kanji,
|
||||
kana: kana,
|
||||
pieces: pieces,
|
||||
);
|
||||
}
|
||||
|
||||
/// Parses a jisho example sentence search page to an object
|
||||
@@ -117,9 +137,8 @@ ExampleResults parseExamplePageData(String pageHtml, String phrase) {
|
||||
final results = divs.map(_parseExampleDiv).toList();
|
||||
|
||||
return ExampleResults(
|
||||
query: phrase,
|
||||
found: results.isNotEmpty,
|
||||
results: results ?? [],
|
||||
uri: uriForExampleSearch(phrase)
|
||||
);
|
||||
query: phrase,
|
||||
found: results.isNotEmpty,
|
||||
results: results,
|
||||
uri: uriForExampleSearch(phrase).toString());
|
||||
}
|
@@ -1,235 +0,0 @@
|
||||
import 'package:html_unescape/html_unescape.dart' as html_entities;
|
||||
|
||||
import './base_uri.dart';
|
||||
import './objects.dart';
|
||||
|
||||
final _htmlUnescape = html_entities.HtmlUnescape();
|
||||
|
||||
const _onyomiLocatorSymbol = 'On';
|
||||
const _kunyomiLocatorSymbol = 'Kun';
|
||||
|
||||
String _removeNewlines(String str) {
|
||||
return str.replaceAll(RegExp(r'(?:\r|\n)'), '').trim();
|
||||
}
|
||||
|
||||
/// Provides the URI for a kanji search
|
||||
String uriForKanjiSearch(String kanji) {
|
||||
return '$SCRAPE_BASE_URI${Uri.encodeComponent(kanji)}%23kanji';
|
||||
}
|
||||
|
||||
String _getUriForStrokeOrderDiagram(String kanji) {
|
||||
return '$STROKE_ORDER_DIAGRAM_BASE_URI${kanji.codeUnitAt(0)}_frames.png';
|
||||
}
|
||||
|
||||
bool _containsKanjiGlyph(String pageHtml, String kanji) {
|
||||
final kanjiGlyphToken =
|
||||
'<h1 class="character" data-area-name="print" lang="ja">$kanji</h1>';
|
||||
return pageHtml.contains(kanjiGlyphToken);
|
||||
}
|
||||
|
||||
String _getStringBetweenIndicies(String data, int startIndex, int endIndex) {
|
||||
final result = data.substring(startIndex, endIndex);
|
||||
return _removeNewlines(result).trim();
|
||||
}
|
||||
|
||||
String _getStringBetweenStrings(
|
||||
String data, String startString, String endString) {
|
||||
final regex = RegExp(
|
||||
'${RegExp.escape(startString)}(.*?)${RegExp.escape(endString)}',
|
||||
dotAll: true);
|
||||
final match = regex.allMatches(data).toList();
|
||||
|
||||
return match.isNotEmpty ? match[0].group(1).toString() : null;
|
||||
}
|
||||
|
||||
int _getIntBetweenStrings(
|
||||
String pageHtml, String startString, String endString) {
|
||||
final stringBetweenStrings =
|
||||
_getStringBetweenStrings(pageHtml, startString, endString);
|
||||
return int.parse(stringBetweenStrings);
|
||||
}
|
||||
|
||||
List<String> _getAllGlobalGroupMatches(String str, RegExp regex) {
|
||||
var regexResults = regex.allMatches(str).toList();
|
||||
List<String> results = [];
|
||||
for (var match in regexResults) {
|
||||
results.add(match.group(1));
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
List<String> _parseAnchorsToArray(String str) {
|
||||
final regex = RegExp(r'<a href=".*?">(.*?)<\/a>');
|
||||
return _getAllGlobalGroupMatches(str, regex);
|
||||
}
|
||||
|
||||
List<String> _getYomi(String pageHtml, String yomiLocatorSymbol) {
|
||||
final yomiSection = _getStringBetweenStrings(
|
||||
pageHtml, '<dt>$yomiLocatorSymbol:</dt>', '</dl>');
|
||||
return _parseAnchorsToArray(yomiSection ?? '');
|
||||
}
|
||||
|
||||
List<String> _getKunyomi(String pageHtml) {
|
||||
return _getYomi(pageHtml, _kunyomiLocatorSymbol);
|
||||
}
|
||||
|
||||
List<String> _getOnyomi(String pageHtml) {
|
||||
return _getYomi(pageHtml, _onyomiLocatorSymbol);
|
||||
}
|
||||
|
||||
List<YomiExample> _getYomiExamples(String pageHtml, String yomiLocatorSymbol) {
|
||||
final locatorString = '<h2>$yomiLocatorSymbol reading compounds</h2>';
|
||||
final exampleSection =
|
||||
_getStringBetweenStrings(pageHtml, locatorString, '</ul>');
|
||||
if (exampleSection == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final regex = RegExp(r'<li>(.*?)<\/li>', dotAll: true);
|
||||
final regexResults =
|
||||
_getAllGlobalGroupMatches(exampleSection, regex).map((s) => s.trim());
|
||||
|
||||
final examples = regexResults.map((regexResult) {
|
||||
final examplesLines = regexResult.split('\n').map((s) => s.trim()).toList();
|
||||
return YomiExample(
|
||||
example: examplesLines[0],
|
||||
reading: examplesLines[1].replaceAll('【', '').replaceAll('】', ''),
|
||||
meaning: _htmlUnescape.convert(examplesLines[2]),
|
||||
);
|
||||
});
|
||||
|
||||
return examples.toList();
|
||||
}
|
||||
|
||||
List<YomiExample> _getOnyomiExamples(String pageHtml) {
|
||||
return _getYomiExamples(pageHtml, _onyomiLocatorSymbol);
|
||||
}
|
||||
|
||||
List<YomiExample> _getKunyomiExamples(String pageHtml) {
|
||||
return _getYomiExamples(pageHtml, _kunyomiLocatorSymbol);
|
||||
}
|
||||
|
||||
Radical _getRadical(String pageHtml) {
|
||||
const radicalMeaningStartString = '<span class="radical_meaning">';
|
||||
const radicalMeaningEndString = '</span>';
|
||||
|
||||
var radicalMeaning = _getStringBetweenStrings(
|
||||
pageHtml,
|
||||
radicalMeaningStartString,
|
||||
radicalMeaningEndString,
|
||||
).trim();
|
||||
|
||||
if (radicalMeaning != null) {
|
||||
final radicalMeaningStartIndex =
|
||||
pageHtml.indexOf(radicalMeaningStartString);
|
||||
|
||||
final radicalMeaningEndIndex = pageHtml.indexOf(
|
||||
radicalMeaningEndString,
|
||||
radicalMeaningStartIndex,
|
||||
);
|
||||
|
||||
final radicalSymbolStartIndex =
|
||||
radicalMeaningEndIndex + radicalMeaningEndString.length;
|
||||
const radicalSymbolEndString = '</span>';
|
||||
final radicalSymbolEndIndex =
|
||||
pageHtml.indexOf(radicalSymbolEndString, radicalSymbolStartIndex);
|
||||
|
||||
final radicalSymbolsString = _getStringBetweenIndicies(
|
||||
pageHtml,
|
||||
radicalSymbolStartIndex,
|
||||
radicalSymbolEndIndex,
|
||||
);
|
||||
|
||||
if (radicalSymbolsString.length > 1) {
|
||||
final radicalForms = radicalSymbolsString
|
||||
.substring(1)
|
||||
.replaceAll('(', '')
|
||||
.replaceAll(')', '')
|
||||
.trim()
|
||||
.split(', ');
|
||||
|
||||
return Radical(
|
||||
symbol: radicalSymbolsString[0],
|
||||
forms: radicalForms ?? [],
|
||||
meaning: radicalMeaning);
|
||||
}
|
||||
|
||||
return Radical(symbol: radicalSymbolsString, meaning: radicalMeaning);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
List<String> _getParts(String pageHtml) {
|
||||
const partsSectionStartString = '<dt>Parts:</dt>';
|
||||
const partsSectionEndString = '</dl>';
|
||||
|
||||
final partsSection = _getStringBetweenStrings(
|
||||
pageHtml,
|
||||
partsSectionStartString,
|
||||
partsSectionEndString,
|
||||
);
|
||||
|
||||
var result = _parseAnchorsToArray(partsSection);
|
||||
result.sort();
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
String _getSvgUri(String pageHtml) {
|
||||
var svgRegex = RegExp('\/\/.*?.cloudfront.net\/.*?.svg');
|
||||
final regexResult = svgRegex.firstMatch(pageHtml).group(0).toString();
|
||||
return regexResult.isNotEmpty ? 'https:$regexResult' : null;
|
||||
}
|
||||
|
||||
String _getGifUri(String kanji) {
|
||||
final unicodeString = kanji.codeUnitAt(0).toRadixString(16);
|
||||
final fileName = '$unicodeString.gif';
|
||||
final animationUri =
|
||||
'https://raw.githubusercontent.com/mistval/kanji_images/master/gifs/$fileName';
|
||||
|
||||
return animationUri;
|
||||
}
|
||||
|
||||
int _getNewspaperFrequencyRank(String pageHtml) {
|
||||
final frequencySection =
|
||||
_getStringBetweenStrings(pageHtml, '<div class="frequency">', '</div>');
|
||||
return (frequencySection != null)
|
||||
? int.parse(
|
||||
_getStringBetweenStrings(frequencySection, '<strong>', '</strong>'))
|
||||
: null;
|
||||
}
|
||||
|
||||
/// Parses a jisho kanji search page to an object
|
||||
KanjiResult parseKanjiPageData(String pageHtml, String kanji) {
|
||||
final result = KanjiResult();
|
||||
result.query = kanji;
|
||||
result.found = _containsKanjiGlyph(pageHtml, kanji);
|
||||
if (result.found == false) {
|
||||
return result;
|
||||
}
|
||||
|
||||
result.taughtIn =
|
||||
_getStringBetweenStrings(pageHtml, 'taught in <strong>', '</strong>');
|
||||
result.jlptLevel =
|
||||
_getStringBetweenStrings(pageHtml, 'JLPT level <strong>', '</strong>');
|
||||
result.newspaperFrequencyRank = _getNewspaperFrequencyRank(pageHtml);
|
||||
result.strokeCount =
|
||||
_getIntBetweenStrings(pageHtml, '<strong>', '</strong> strokes');
|
||||
result.meaning = _htmlUnescape.convert(_removeNewlines(
|
||||
_getStringBetweenStrings(
|
||||
pageHtml, '<div class="kanji-details__main-meanings">', '</div>'))
|
||||
.trim());
|
||||
result.kunyomi = _getKunyomi(pageHtml) ?? [];
|
||||
result.onyomi = _getOnyomi(pageHtml) ?? [];
|
||||
result.onyomiExamples = _getOnyomiExamples(pageHtml) ?? [];
|
||||
result.kunyomiExamples = _getKunyomiExamples(pageHtml) ?? [];
|
||||
result.radical = _getRadical(pageHtml);
|
||||
result.parts = _getParts(pageHtml) ?? [];
|
||||
result.strokeOrderDiagramUri = _getUriForStrokeOrderDiagram(kanji);
|
||||
result.strokeOrderSvgUri = _getSvgUri(pageHtml);
|
||||
result.strokeOrderGifUri = _getGifUri(kanji);
|
||||
result.uri = uriForKanjiSearch(kanji);
|
||||
return result;
|
||||
}
|
243
lib/src/kanji_search.dart
Normal file
243
lib/src/kanji_search.dart
Normal file
@@ -0,0 +1,243 @@
|
||||
import 'package:html_unescape/html_unescape.dart' as html_entities;
|
||||
|
||||
import './base_uri.dart';
|
||||
import './objects.dart';
|
||||
import './scraping.dart';
|
||||
|
||||
final _htmlUnescape = html_entities.HtmlUnescape();
|
||||
|
||||
const _onyomiLocatorSymbol = 'On';
|
||||
const _kunyomiLocatorSymbol = 'Kun';
|
||||
|
||||
/// Provides the URI for a kanji search
|
||||
Uri uriForKanjiSearch(String kanji) {
|
||||
return Uri.parse('$scrapeBaseUri${Uri.encodeComponent(kanji)}%23kanji');
|
||||
}
|
||||
|
||||
String _getUriForStrokeOrderDiagram(String kanji) {
|
||||
return '$strokeOrderDiagramBaseUri${kanji.codeUnitAt(0)}_frames.png';
|
||||
}
|
||||
|
||||
bool _containsKanjiGlyph(String pageHtml, String kanji) {
|
||||
final kanjiGlyphToken =
|
||||
'<h1 class="character" data-area-name="print" lang="ja">$kanji</h1>';
|
||||
return pageHtml.contains(kanjiGlyphToken);
|
||||
}
|
||||
|
||||
List<String> _getYomi(String pageHtml, String yomiLocatorSymbol) {
|
||||
final yomiSection = getStringBetweenStrings(
|
||||
pageHtml, '<dt>$yomiLocatorSymbol:</dt>', '</dl>');
|
||||
return parseAnchorsToArray(yomiSection ?? '');
|
||||
}
|
||||
|
||||
List<String> _getKunyomi(String pageHtml) {
|
||||
return _getYomi(pageHtml, _kunyomiLocatorSymbol);
|
||||
}
|
||||
|
||||
List<String> _getOnyomi(String pageHtml) {
|
||||
return _getYomi(pageHtml, _onyomiLocatorSymbol);
|
||||
}
|
||||
|
||||
List<YomiExample> _getYomiExamples(String pageHtml, String yomiLocatorSymbol) {
|
||||
final locatorString = '<h2>$yomiLocatorSymbol reading compounds</h2>';
|
||||
final exampleSection =
|
||||
getStringBetweenStrings(pageHtml, locatorString, '</ul>');
|
||||
if (exampleSection == null) {
|
||||
return [];
|
||||
}
|
||||
|
||||
final regex = RegExp(r'<li>(.*?)<\/li>', dotAll: true);
|
||||
final regexResults =
|
||||
getAllGlobalGroupMatches(exampleSection, regex).map((s) => s.trim());
|
||||
|
||||
final examples = regexResults.map((regexResult) {
|
||||
final examplesLines = regexResult.split('\n').map((s) => s.trim()).toList();
|
||||
return YomiExample(
|
||||
example: examplesLines[0],
|
||||
reading: examplesLines[1].replaceAll('【', '').replaceAll('】', ''),
|
||||
meaning: _htmlUnescape.convert(examplesLines[2]),
|
||||
);
|
||||
});
|
||||
|
||||
return examples.toList();
|
||||
}
|
||||
|
||||
List<YomiExample> _getOnyomiExamples(String pageHtml) {
|
||||
return _getYomiExamples(pageHtml, _onyomiLocatorSymbol);
|
||||
}
|
||||
|
||||
List<YomiExample> _getKunyomiExamples(String pageHtml) {
|
||||
return _getYomiExamples(pageHtml, _kunyomiLocatorSymbol);
|
||||
}
|
||||
|
||||
Radical? _getRadical(String pageHtml) {
|
||||
const radicalMeaningStartString = '<span class="radical_meaning">';
|
||||
const radicalMeaningEndString = '</span>';
|
||||
|
||||
var radicalMeaning = getStringBetweenStrings(
|
||||
pageHtml,
|
||||
radicalMeaningStartString,
|
||||
radicalMeaningEndString,
|
||||
)?.trim();
|
||||
|
||||
if (radicalMeaning == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
final radicalMeaningStartIndex = pageHtml.indexOf(radicalMeaningStartString);
|
||||
|
||||
final radicalMeaningEndIndex = pageHtml.indexOf(
|
||||
radicalMeaningEndString,
|
||||
radicalMeaningStartIndex,
|
||||
);
|
||||
|
||||
final radicalSymbolStartIndex =
|
||||
radicalMeaningEndIndex + radicalMeaningEndString.length;
|
||||
const radicalSymbolEndString = '</span>';
|
||||
final radicalSymbolEndIndex =
|
||||
pageHtml.indexOf(radicalSymbolEndString, radicalSymbolStartIndex);
|
||||
|
||||
final radicalSymbolsString = getStringBetweenIndicies(
|
||||
pageHtml,
|
||||
radicalSymbolStartIndex,
|
||||
radicalSymbolEndIndex,
|
||||
);
|
||||
|
||||
if (radicalSymbolsString.length > 1) {
|
||||
final radicalForms = radicalSymbolsString
|
||||
.substring(1)
|
||||
.replaceAll('(', '')
|
||||
.replaceAll(')', '')
|
||||
.trim()
|
||||
.split(', ');
|
||||
|
||||
return Radical(
|
||||
symbol: radicalSymbolsString[0],
|
||||
forms: radicalForms,
|
||||
meaning: radicalMeaning,
|
||||
);
|
||||
}
|
||||
|
||||
return Radical(symbol: radicalSymbolsString, meaning: radicalMeaning);
|
||||
}
|
||||
|
||||
String _getMeaning(String pageHtml) {
|
||||
final rawMeaning = assertNotNull(
|
||||
variable: getStringBetweenStrings(
|
||||
pageHtml,
|
||||
'<div class="kanji-details__main-meanings">',
|
||||
'</div>',
|
||||
),
|
||||
errorMessage:
|
||||
"Could not parse meaning. Is the provided document corrupt, or has Jisho been updated?",
|
||||
);
|
||||
|
||||
return _htmlUnescape.convert(removeNewlines(rawMeaning).trim());
|
||||
}
|
||||
|
||||
List<String> _getParts(String pageHtml) {
|
||||
const partsSectionStartString = '<dt>Parts:</dt>';
|
||||
const partsSectionEndString = '</dl>';
|
||||
|
||||
final partsSection = getStringBetweenStrings(
|
||||
pageHtml,
|
||||
partsSectionStartString,
|
||||
partsSectionEndString,
|
||||
);
|
||||
|
||||
if (partsSection == null) {
|
||||
return [];
|
||||
}
|
||||
|
||||
var result = parseAnchorsToArray(partsSection);
|
||||
result.sort();
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
String _getSvgUri(String pageHtml) {
|
||||
var svgRegex = RegExp('\/\/.*?.cloudfront.net\/.*?.svg');
|
||||
|
||||
final regexResult = assertNotNull(
|
||||
variable: svgRegex.firstMatch(pageHtml)?.group(0)?.toString(),
|
||||
errorMessage:
|
||||
"Could not find SVG URI. Is the provided document corrupt, or has Jisho been updated?",
|
||||
);
|
||||
|
||||
return 'https:$regexResult';
|
||||
}
|
||||
|
||||
String _getGifUri(String kanji) {
|
||||
final unicodeString = kanji.codeUnitAt(0).toRadixString(16);
|
||||
final fileName = '$unicodeString.gif';
|
||||
final animationUri =
|
||||
'https://raw.githubusercontent.com/mistval/kanji_images/master/gifs/$fileName';
|
||||
|
||||
return animationUri;
|
||||
}
|
||||
|
||||
int? _getNewspaperFrequencyRank(String pageHtml) {
|
||||
final frequencySection = getStringBetweenStrings(
|
||||
pageHtml,
|
||||
'<div class="frequency">',
|
||||
'</div>',
|
||||
);
|
||||
|
||||
// ignore: avoid_returning_null
|
||||
if (frequencySection == null) return null;
|
||||
|
||||
final frequencyRank =
|
||||
getStringBetweenStrings(frequencySection, '<strong>', '</strong>');
|
||||
|
||||
return frequencyRank != null ? int.parse(frequencyRank) : null;
|
||||
}
|
||||
|
||||
int _getStrokeCount(String pageHtml) {
|
||||
final strokeCount = assertNotNull(
|
||||
variable: getIntBetweenStrings(pageHtml, '<strong>', '</strong> strokes'),
|
||||
errorMessage:
|
||||
"Could not parse stroke count. Is the provided document corrupt, or has Jisho been updated?",
|
||||
);
|
||||
|
||||
return strokeCount;
|
||||
}
|
||||
|
||||
String? _getTaughtIn(String pageHtml) {
|
||||
return getStringBetweenStrings(pageHtml, 'taught in <strong>', '</strong>');
|
||||
}
|
||||
|
||||
String? _getJlptLevel(String pageHtml) {
|
||||
return getStringBetweenStrings(pageHtml, 'JLPT level <strong>', '</strong>');
|
||||
}
|
||||
|
||||
/// Parses a jisho kanji search page to an object
|
||||
KanjiResult parseKanjiPageData(String pageHtml, String kanji) {
|
||||
final result = KanjiResult(
|
||||
query: kanji,
|
||||
found: _containsKanjiGlyph(pageHtml, kanji),
|
||||
);
|
||||
|
||||
if (result.found == false) {
|
||||
return result;
|
||||
}
|
||||
|
||||
result.data = KanjiResultData(
|
||||
strokeCount: _getStrokeCount(pageHtml),
|
||||
meaning: _getMeaning(pageHtml),
|
||||
strokeOrderDiagramUri: _getUriForStrokeOrderDiagram(kanji),
|
||||
strokeOrderSvgUri: _getSvgUri(pageHtml),
|
||||
strokeOrderGifUri: _getGifUri(kanji),
|
||||
uri: uriForKanjiSearch(kanji).toString(),
|
||||
parts: _getParts(pageHtml),
|
||||
taughtIn: _getTaughtIn(pageHtml),
|
||||
jlptLevel: _getJlptLevel(pageHtml),
|
||||
newspaperFrequencyRank: _getNewspaperFrequencyRank(pageHtml),
|
||||
kunyomi: _getKunyomi(pageHtml),
|
||||
onyomi: _getOnyomi(pageHtml),
|
||||
kunyomiExamples: _getKunyomiExamples(pageHtml),
|
||||
onyomiExamples: _getOnyomiExamples(pageHtml),
|
||||
radical: _getRadical(pageHtml),
|
||||
);
|
||||
|
||||
return result;
|
||||
}
|
@@ -2,96 +2,157 @@
|
||||
/* searchForKanji related classes */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/// An example of a word that contains the kanji in question.
|
||||
class YomiExample {
|
||||
/// The original text of the example.
|
||||
String example;
|
||||
|
||||
/// The reading of the example.
|
||||
String reading;
|
||||
|
||||
/// The meaning of the example.
|
||||
String meaning;
|
||||
|
||||
YomiExample({this.example, this.reading, this.meaning});
|
||||
// ignore: public_member_api_docs
|
||||
YomiExample({
|
||||
required this.example,
|
||||
required this.reading,
|
||||
required this.meaning,
|
||||
});
|
||||
|
||||
Map<String, String> toJson() =>
|
||||
{'example': example, 'reading': reading, 'meaning': meaning};
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, String> toJson() => {
|
||||
'example': example,
|
||||
'reading': reading,
|
||||
'meaning': meaning,
|
||||
};
|
||||
}
|
||||
|
||||
/// Information regarding the radical of a kanji.
|
||||
class Radical {
|
||||
/// The radical symbol, if applicable.
|
||||
/// The radical symbol.
|
||||
String symbol;
|
||||
/// The radical forms used in this kanji, if applicable.
|
||||
|
||||
/// The radical forms used in this kanji.
|
||||
List<String> forms;
|
||||
/// The meaning of the radical, if applicable.
|
||||
|
||||
/// The meaning of the radical.
|
||||
String meaning;
|
||||
|
||||
Radical({this.symbol, this.forms, this.meaning});
|
||||
// ignore: public_member_api_docs
|
||||
Radical({
|
||||
required this.symbol,
|
||||
this.forms = const [],
|
||||
required this.meaning,
|
||||
});
|
||||
|
||||
Map<String, dynamic> toJson() =>
|
||||
{'symbol': symbol, 'forms': forms, 'meaning': meaning};
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() => {
|
||||
'symbol': symbol,
|
||||
'forms': forms,
|
||||
'meaning': meaning,
|
||||
};
|
||||
}
|
||||
|
||||
/// The main wrapper containing data about the query and whether or not it was successful.
|
||||
class KanjiResult {
|
||||
/// True if results were found.
|
||||
String query;
|
||||
|
||||
/// The term that you searched for.
|
||||
bool found;
|
||||
|
||||
/// The school level that the kanji is taught in, if applicable.
|
||||
String taughtIn;
|
||||
/// The lowest JLPT exam that this kanji is likely to appear in, if applicable.
|
||||
///
|
||||
/// 'N5' or 'N4' or 'N3' or 'N2' or 'N1'.
|
||||
String jlptLevel;
|
||||
/// A number representing this kanji's frequency rank in newspapers, if applicable.
|
||||
int newspaperFrequencyRank;
|
||||
/// How many strokes this kanji is typically drawn in, if applicable.
|
||||
int strokeCount;
|
||||
/// The meaning of the kanji, if applicable.
|
||||
String meaning;
|
||||
/// This character's kunyomi, if applicable.
|
||||
List<String> kunyomi;
|
||||
/// This character's onyomi, if applicable.
|
||||
List<String> onyomi;
|
||||
/// Examples of this character's kunyomi being used, if applicable.
|
||||
List<YomiExample> kunyomiExamples;
|
||||
/// Examples of this character's onyomi being used, if applicable.
|
||||
List<YomiExample> onyomiExamples;
|
||||
/// Information about this character's radical, if applicable.
|
||||
Radical radical;
|
||||
/// The parts used in this kanji, if applicable.
|
||||
List<String> parts;
|
||||
/// The URL to a diagram showing how to draw this kanji step by step, if applicable.
|
||||
String strokeOrderDiagramUri;
|
||||
/// The URL to an SVG describing how to draw this kanji, if applicable.
|
||||
String strokeOrderSvgUri;
|
||||
/// The URL to a gif showing the kanji being draw and its stroke order, if applicable.
|
||||
String strokeOrderGifUri;
|
||||
/// The URI that these results were scraped from, if applicable.
|
||||
String uri;
|
||||
/// The result data if search was successful.
|
||||
KanjiResultData? data;
|
||||
|
||||
KanjiResult(
|
||||
{this.query,
|
||||
this.found,
|
||||
this.taughtIn,
|
||||
this.jlptLevel,
|
||||
this.newspaperFrequencyRank,
|
||||
this.strokeCount,
|
||||
this.meaning,
|
||||
this.kunyomi,
|
||||
this.onyomi,
|
||||
this.kunyomiExamples,
|
||||
this.onyomiExamples,
|
||||
this.radical,
|
||||
this.parts,
|
||||
this.strokeOrderDiagramUri,
|
||||
this.strokeOrderSvgUri,
|
||||
this.strokeOrderGifUri,
|
||||
this.uri});
|
||||
// ignore: public_member_api_docs
|
||||
KanjiResult({
|
||||
required this.query,
|
||||
required this.found,
|
||||
this.data,
|
||||
});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() {
|
||||
return {
|
||||
'query': query,
|
||||
'found': found,
|
||||
'data': data,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// The main kanji data class, collecting all the result information in one place.
|
||||
class KanjiResultData {
|
||||
/// The school level that the kanji is taught in, if applicable.
|
||||
String? taughtIn;
|
||||
|
||||
/// The lowest JLPT exam that this kanji is likely to appear in, if applicable.
|
||||
///
|
||||
/// 'N5' or 'N4' or 'N3' or 'N2' or 'N1'.
|
||||
String? jlptLevel;
|
||||
|
||||
/// A number representing this kanji's frequency rank in newspapers, if applicable.
|
||||
int? newspaperFrequencyRank;
|
||||
|
||||
/// How many strokes this kanji is typically drawn in.
|
||||
int strokeCount;
|
||||
|
||||
/// The meaning of the kanji.
|
||||
String meaning;
|
||||
|
||||
/// This character's kunyomi.
|
||||
List<String> kunyomi;
|
||||
|
||||
/// This character's onyomi.
|
||||
List<String> onyomi;
|
||||
|
||||
/// Examples of this character's kunyomi being used.
|
||||
List<YomiExample> kunyomiExamples;
|
||||
|
||||
/// Examples of this character's onyomi being used.
|
||||
List<YomiExample> onyomiExamples;
|
||||
|
||||
/// Information about this character's radical, if applicable.
|
||||
Radical? radical;
|
||||
|
||||
/// The parts used in this kanji.
|
||||
List<String> parts;
|
||||
|
||||
/// The URL to a diagram showing how to draw this kanji step by step.
|
||||
String strokeOrderDiagramUri;
|
||||
|
||||
/// The URL to an SVG describing how to draw this kanji.
|
||||
String strokeOrderSvgUri;
|
||||
|
||||
/// The URL to a gif showing the kanji being draw and its stroke order.
|
||||
String strokeOrderGifUri;
|
||||
|
||||
/// The URI that these results were scraped from.
|
||||
String uri;
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
KanjiResultData({
|
||||
this.taughtIn,
|
||||
this.jlptLevel,
|
||||
this.newspaperFrequencyRank,
|
||||
required this.strokeCount,
|
||||
required this.meaning,
|
||||
this.kunyomi = const [],
|
||||
this.onyomi = const [],
|
||||
this.kunyomiExamples = const [],
|
||||
this.onyomiExamples = const [],
|
||||
this.radical,
|
||||
this.parts = const [],
|
||||
required this.strokeOrderDiagramUri,
|
||||
required this.strokeOrderSvgUri,
|
||||
required this.strokeOrderGifUri,
|
||||
required this.uri,
|
||||
});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() {
|
||||
return {
|
||||
'taughtIn': taughtIn,
|
||||
'jlptLevel': jlptLevel,
|
||||
'newspaperFrequencyRank': newspaperFrequencyRank,
|
||||
@@ -101,12 +162,12 @@ class KanjiResult {
|
||||
'onyomi': onyomi,
|
||||
'onyomiExamples': onyomiExamples,
|
||||
'kunyomiExamples': kunyomiExamples,
|
||||
'radical': (radical != null) ? radical.toJson() : null,
|
||||
'radical': radical?.toJson(),
|
||||
'parts': parts,
|
||||
'strokeOrderDiagramUri': strokeOrderDiagramUri,
|
||||
'strokeOrderSvgUri': strokeOrderSvgUri,
|
||||
'strokeOrderGifUri': strokeOrderGifUri,
|
||||
'uri': uri
|
||||
'uri': uri,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -115,54 +176,90 @@ class KanjiResult {
|
||||
/* searchForExamples related classes */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/// A word in an example sentence, consisting of either just kana, or kanji with furigana.
|
||||
class ExampleSentencePiece {
|
||||
/// Baseline text shown on Jisho.org (below the lifted text / furigana)
|
||||
String lifted;
|
||||
/// Furigana text shown on Jisho.org (above the unlifted text)
|
||||
/// Furigana text shown on Jisho.org (above the unlifted text), if applicable.
|
||||
String? lifted;
|
||||
|
||||
/// Baseline text shown on Jisho.org (below the lifted text / furigana).
|
||||
String unlifted;
|
||||
|
||||
ExampleSentencePiece({this.lifted, this.unlifted});
|
||||
// ignore: public_member_api_docs
|
||||
ExampleSentencePiece({
|
||||
this.lifted,
|
||||
required this.unlifted,
|
||||
});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() {
|
||||
return {'lifted': lifted, 'unlifted': unlifted};
|
||||
return {
|
||||
'lifted': lifted,
|
||||
'unlifted': unlifted,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// All data making up one example sentence.
|
||||
class ExampleResultData {
|
||||
/// The example sentence including kanji.
|
||||
String kanji;
|
||||
|
||||
/// The example sentence without kanji (only kana). Sometimes this may include some Kanji, as furigana is not always available from Jisho.org.
|
||||
String kana;
|
||||
|
||||
/// An English translation of the example.
|
||||
String english;
|
||||
|
||||
/// The lifted/unlifted pairs that make up the sentence. Lifted text is furigana, unlifted is the text below the furigana.
|
||||
List<ExampleSentencePiece> pieces;
|
||||
|
||||
ExampleResultData({this.english, this.kanji, this.kana, this.pieces});
|
||||
// ignore: public_member_api_docs
|
||||
ExampleResultData({
|
||||
required this.english,
|
||||
required this.kanji,
|
||||
required this.kana,
|
||||
required this.pieces,
|
||||
});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() {
|
||||
return {'english': english, 'kanji': kanji, 'kana': kana, 'pieces': pieces};
|
||||
return {
|
||||
'english': english,
|
||||
'kanji': kanji,
|
||||
'kana': kana,
|
||||
'pieces': pieces,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/// The main wrapper containing data about the query and whether or not it was successful.
|
||||
class ExampleResults {
|
||||
/// The term that you searched for.
|
||||
String query;
|
||||
|
||||
/// True if results were found.
|
||||
bool found;
|
||||
|
||||
/// The URI that these results were scraped from.
|
||||
String uri;
|
||||
|
||||
/// The examples that were found, if any.
|
||||
List<ExampleResultData> results;
|
||||
|
||||
ExampleResults({this.query, this.found, this.results, this.uri});
|
||||
// ignore: public_member_api_docs
|
||||
ExampleResults({
|
||||
required this.query,
|
||||
required this.found,
|
||||
required this.results,
|
||||
required this.uri,
|
||||
});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() {
|
||||
return {
|
||||
'query': query,
|
||||
'found': found,
|
||||
'results': results,
|
||||
'uri': uri
|
||||
'uri': uri,
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -171,96 +268,178 @@ class ExampleResults {
|
||||
/* scrapeForPhrase related classes */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/// An example sentence.
|
||||
class PhraseScrapeSentence {
|
||||
/// The English meaning of the sentence.
|
||||
String english;
|
||||
|
||||
/// The Japanese text of the sentence.
|
||||
String japanese;
|
||||
|
||||
/// The lifted/unlifted pairs that make up the sentence. Lifted text is furigana, unlifted is the text below the furigana.
|
||||
List<ExampleSentencePiece> pieces;
|
||||
|
||||
PhraseScrapeSentence({this.english, this.japanese, this.pieces});
|
||||
// ignore: public_member_api_docs
|
||||
PhraseScrapeSentence({
|
||||
required this.english,
|
||||
required this.japanese,
|
||||
required this.pieces,
|
||||
});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() =>
|
||||
{'english': english, 'japanese': japanese, 'pieces': pieces};
|
||||
}
|
||||
|
||||
/// The data representing one "meaning" or "sense" of the word
|
||||
class PhraseScrapeMeaning {
|
||||
/// The words that Jisho lists as "see also".
|
||||
List<String> seeAlsoTerms;
|
||||
|
||||
/// Example sentences for this meaning.
|
||||
List<PhraseScrapeSentence> sentences;
|
||||
/// The definition of the meaning
|
||||
|
||||
/// The definition of the meaning.
|
||||
String definition;
|
||||
|
||||
/// Supplemental information.
|
||||
/// For example "usually written using kana alone".
|
||||
List<String> supplemental;
|
||||
|
||||
/// An "abstract" definition.
|
||||
/// Often this is a Wikipedia definition.
|
||||
String definitionAbstract;
|
||||
String? definitionAbstract;
|
||||
|
||||
/// Tags associated with this meaning.
|
||||
List<String> tags;
|
||||
|
||||
PhraseScrapeMeaning(
|
||||
{this.seeAlsoTerms,
|
||||
this.sentences,
|
||||
this.definition,
|
||||
this.supplemental,
|
||||
this.definitionAbstract,
|
||||
this.tags});
|
||||
// ignore: public_member_api_docs
|
||||
PhraseScrapeMeaning({
|
||||
this.seeAlsoTerms = const [],
|
||||
required this.sentences,
|
||||
required this.definition,
|
||||
this.supplemental = const [],
|
||||
this.definitionAbstract,
|
||||
this.tags = const [],
|
||||
});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() => {
|
||||
'seeAlsoTerms': seeAlsoTerms,
|
||||
'sentences': sentences,
|
||||
'definition': definition,
|
||||
'supplemental': supplemental,
|
||||
'definitionAbstract': definitionAbstract,
|
||||
'tags': tags
|
||||
'tags': tags,
|
||||
};
|
||||
}
|
||||
|
||||
/// A pair of kanji and potential furigana.
|
||||
class KanjiKanaPair {
|
||||
/// Kanji
|
||||
String kanji;
|
||||
String kana;
|
||||
|
||||
KanjiKanaPair({this.kanji, this.kana});
|
||||
/// Furigana, if applicable.
|
||||
String? kana;
|
||||
|
||||
Map<String, String> toJson() => {'kanji': kanji, 'kana': kana};
|
||||
// ignore: public_member_api_docs
|
||||
KanjiKanaPair({
|
||||
required this.kanji,
|
||||
this.kana,
|
||||
});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() => {
|
||||
'kanji': kanji,
|
||||
'kana': kana,
|
||||
};
|
||||
}
|
||||
|
||||
/// The main wrapper containing data about the query and whether or not it was successful.
|
||||
class PhrasePageScrapeResult {
|
||||
/// True if a result was found.
|
||||
bool found;
|
||||
|
||||
/// The term that you searched for.
|
||||
String query;
|
||||
/// The URI that these results were scraped from, if a result was found.
|
||||
String uri;
|
||||
/// Other forms of the search term, if a result was found.
|
||||
List<String> tags;
|
||||
/// Information about the meanings associated with this search result.
|
||||
List<PhraseScrapeMeaning> meanings;
|
||||
/// Tags associated with this search result.
|
||||
List<KanjiKanaPair> otherForms;
|
||||
/// Notes associated with the search result.
|
||||
List<String> notes;
|
||||
|
||||
PhrasePageScrapeResult(
|
||||
{this.found,
|
||||
this.query,
|
||||
this.uri,
|
||||
this.tags,
|
||||
this.meanings,
|
||||
this.otherForms,
|
||||
this.notes});
|
||||
/// The result data if search was successful.
|
||||
PhrasePageScrapeResultData? data;
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
PhrasePageScrapeResult({
|
||||
required this.found,
|
||||
required this.query,
|
||||
this.data,
|
||||
});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() => {
|
||||
'found': found,
|
||||
'query': query,
|
||||
'data': data,
|
||||
};
|
||||
}
|
||||
|
||||
/// Pronounciation audio.
|
||||
class AudioFile {
|
||||
/// The uri of the audio file.
|
||||
String uri;
|
||||
|
||||
/// The mimetype of the audio.
|
||||
String mimetype;
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
AudioFile({
|
||||
required this.uri,
|
||||
required this.mimetype,
|
||||
});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() => {
|
||||
'uri': uri,
|
||||
'mimetype': mimetype,
|
||||
};
|
||||
}
|
||||
|
||||
/// The main scrape data class, collecting all the result information in one place.
|
||||
class PhrasePageScrapeResultData {
|
||||
/// The URI that these results were scraped from.
|
||||
String uri;
|
||||
|
||||
/// Other forms of the search term.
|
||||
List<String> tags;
|
||||
|
||||
/// Information about the meanings associated with this search result.
|
||||
List<PhraseScrapeMeaning> meanings;
|
||||
|
||||
/// Tags associated with this search result.
|
||||
List<KanjiKanaPair> otherForms;
|
||||
|
||||
/// Pronounciation of the search result.
|
||||
List<AudioFile> audio;
|
||||
|
||||
/// Notes associated with the search result.
|
||||
List<String> notes;
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
PhrasePageScrapeResultData({
|
||||
required this.uri,
|
||||
this.tags = const [],
|
||||
this.meanings = const [],
|
||||
this.otherForms = const [],
|
||||
this.audio = const [],
|
||||
this.notes = const [],
|
||||
});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() => {
|
||||
'uri': uri,
|
||||
'tags': tags,
|
||||
'meanings': meanings,
|
||||
'otherForms': otherForms,
|
||||
'notes': notes
|
||||
'audio': audio,
|
||||
'notes': notes,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -268,85 +447,157 @@ class PhrasePageScrapeResult {
|
||||
/* searchForPhrase related classes */
|
||||
/* -------------------------------------------------------------------------- */
|
||||
|
||||
/// Kanji/Furigana pair, or just kana as word.
|
||||
///
|
||||
/// Which field acts as kanji and/or kana might be unreliable, which is why both are nullable.
|
||||
class JishoJapaneseWord {
|
||||
String word;
|
||||
String reading;
|
||||
/// Usually kanji or kana.
|
||||
String? word;
|
||||
|
||||
JishoJapaneseWord({this.word, this.reading});
|
||||
/// Usually furigana, if applicable.
|
||||
String? reading;
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
JishoJapaneseWord({
|
||||
this.word,
|
||||
this.reading,
|
||||
});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
factory JishoJapaneseWord.fromJson(Map<String, dynamic> json) {
|
||||
return JishoJapaneseWord(
|
||||
word: json['word'] as String, reading: json['reading'] as String);
|
||||
word: json['word'] as String?,
|
||||
reading: json['reading'] as String?,
|
||||
);
|
||||
}
|
||||
|
||||
Map<String, dynamic> toJson() => {'word': word, 'reading': reading};
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() => {
|
||||
'word': word,
|
||||
'reading': reading,
|
||||
};
|
||||
}
|
||||
|
||||
/// Relevant links of the search result.
|
||||
class JishoSenseLink {
|
||||
/// Description of the linked webpage.
|
||||
String text;
|
||||
|
||||
/// Link to the webpage.
|
||||
String url;
|
||||
|
||||
JishoSenseLink({this.text, this.url});
|
||||
// ignore: public_member_api_docs
|
||||
JishoSenseLink({required this.text, required this.url});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
factory JishoSenseLink.fromJson(Map<String, dynamic> json) {
|
||||
return JishoSenseLink(
|
||||
text: json['text'] as String, url: json['url'] as String);
|
||||
text: json['text'] as String,
|
||||
url: json['url'] as String,
|
||||
);
|
||||
}
|
||||
|
||||
Map<String, dynamic> toJson() => {'text': text, 'url': url};
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() => {
|
||||
'text': text,
|
||||
'url': url,
|
||||
};
|
||||
}
|
||||
|
||||
/// Origin of the word (from other languages).
|
||||
class JishoWordSource {
|
||||
/// Origin language.
|
||||
String language;
|
||||
|
||||
/// Origin word, if present.
|
||||
String? word;
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
JishoWordSource({
|
||||
required this.language,
|
||||
this.word,
|
||||
});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() => {
|
||||
'language:': language,
|
||||
'word': word,
|
||||
};
|
||||
}
|
||||
|
||||
/// One sense of the word.
|
||||
class JishoWordSense {
|
||||
List<String> english_definitions;
|
||||
List<String> parts_of_speech;
|
||||
/// The meaning(s) of the word.
|
||||
List<String> englishDefinitions;
|
||||
|
||||
/// Type of word (Noun, Verb, etc.).
|
||||
List<String> partsOfSpeech;
|
||||
|
||||
/// Relevant links.
|
||||
List<JishoSenseLink> links;
|
||||
|
||||
/// Relevant tags.
|
||||
List<String> tags;
|
||||
List<String> see_also;
|
||||
|
||||
/// Relevant words (might include synonyms).
|
||||
List<String> seeAlso;
|
||||
|
||||
/// Words with opposite meaning.
|
||||
List<String> antonyms;
|
||||
List<dynamic> source;
|
||||
|
||||
/// Origins of the word (from other languages).
|
||||
List<JishoWordSource> source;
|
||||
|
||||
/// Additional info.
|
||||
List<String> info;
|
||||
List<dynamic> restrictions;
|
||||
|
||||
JishoWordSense(
|
||||
{this.english_definitions,
|
||||
this.parts_of_speech,
|
||||
this.links,
|
||||
this.tags,
|
||||
this.see_also,
|
||||
this.antonyms,
|
||||
this.source,
|
||||
this.info,
|
||||
this.restrictions});
|
||||
/// Restrictions as to which variants of the japanese words are usable for this sense.
|
||||
List<String> restrictions;
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
JishoWordSense({
|
||||
required this.englishDefinitions,
|
||||
required this.partsOfSpeech,
|
||||
this.links = const [],
|
||||
this.tags = const [],
|
||||
this.seeAlso = const [],
|
||||
this.antonyms = const [],
|
||||
this.source = const [],
|
||||
this.info = const [],
|
||||
this.restrictions = const [],
|
||||
});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
factory JishoWordSense.fromJson(Map<String, dynamic> json) {
|
||||
return JishoWordSense(
|
||||
english_definitions: (json['english_definitions'] as List)
|
||||
englishDefinitions: (json['english_definitions'] as List)
|
||||
.map((result) => result as String)
|
||||
.toList(),
|
||||
parts_of_speech: (json['parts_of_speech'] as List)
|
||||
partsOfSpeech: (json['parts_of_speech'] as List)
|
||||
.map((result) => result as String)
|
||||
.toList(),
|
||||
links: (json['links'] as List)
|
||||
.map((result) => JishoSenseLink.fromJson(result))
|
||||
.toList(),
|
||||
tags: (json['tags'] as List).map((result) => result as String).toList(),
|
||||
see_also: (json['see_also'] as List)
|
||||
seeAlso: (json['see_also'] as List)
|
||||
.map((result) => result as String)
|
||||
.toList(),
|
||||
antonyms: (json['antonyms'] as List)
|
||||
.map((result) => result as String)
|
||||
.toList(),
|
||||
source: json['source'] as List<dynamic>,
|
||||
source: json['source'] as List<JishoWordSource>,
|
||||
info: (json['info'] as List).map((result) => result as String).toList(),
|
||||
restrictions: json['restrictions'] as List<dynamic>);
|
||||
restrictions: json['restrictions'] as List<String>);
|
||||
}
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() => {
|
||||
'english_definitions': english_definitions,
|
||||
'parts_of_speech': parts_of_speech,
|
||||
'english_definitions': englishDefinitions,
|
||||
'parts_of_speech': partsOfSpeech,
|
||||
'links': links,
|
||||
'tags': tags,
|
||||
'see_also': see_also,
|
||||
'see_also': seeAlso,
|
||||
'antonyms': antonyms,
|
||||
'source': source,
|
||||
'info': info,
|
||||
@@ -354,88 +605,143 @@ class JishoWordSense {
|
||||
};
|
||||
}
|
||||
|
||||
/// The original source(s) of the result.
|
||||
class JishoAttribution {
|
||||
/// Whether jmdict was a source.
|
||||
bool jmdict;
|
||||
|
||||
/// Whether jmnedict was a source.
|
||||
bool jmnedict;
|
||||
String dbpedia;
|
||||
|
||||
JishoAttribution({this.jmdict, this.jmnedict, this.dbpedia});
|
||||
/// Additional sources, if applicable.
|
||||
String? dbpedia;
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
JishoAttribution({
|
||||
required this.jmdict,
|
||||
required this.jmnedict,
|
||||
this.dbpedia,
|
||||
});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
factory JishoAttribution.fromJson(Map<String, dynamic> json) {
|
||||
return JishoAttribution(
|
||||
jmdict: (json['jmdict'].toString() == 'true'),
|
||||
jmnedict: (json['jmnedict'].toString() == 'true'),
|
||||
dbpedia: (json['dbpedia'].toString() != 'false')
|
||||
? json['dbpedia'].toString()
|
||||
: null);
|
||||
jmdict: (json['jmdict'].toString() == 'true'),
|
||||
jmnedict: (json['jmnedict'].toString() == 'true'),
|
||||
dbpedia: (json['dbpedia'].toString() != 'false')
|
||||
? json['dbpedia'].toString()
|
||||
: null,
|
||||
);
|
||||
}
|
||||
|
||||
Map<String, dynamic> toJson() =>
|
||||
{'jmdict': jmdict, 'jmnedict': jmnedict, 'dbpedia': dbpedia};
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() => {
|
||||
'jmdict': jmdict,
|
||||
'jmnedict': jmnedict,
|
||||
'dbpedia': dbpedia,
|
||||
};
|
||||
}
|
||||
|
||||
/// The main API data class, collecting all information of one result in one place.
|
||||
class JishoResult {
|
||||
/// The main version of the word
|
||||
///
|
||||
/// This value might sometimes appear as some kind of hash or encoded version of the word.
|
||||
/// Whenever it happens, the word usually originates taken from dbpedia
|
||||
String slug;
|
||||
bool is_common;
|
||||
|
||||
/// Whether the word is common.
|
||||
///
|
||||
/// Dbpedia sometimes omit this value.
|
||||
bool? isCommon;
|
||||
|
||||
/// Related tags.
|
||||
List<String> tags;
|
||||
|
||||
/// Relevant jlpt levels.
|
||||
List<String> jlpt;
|
||||
|
||||
/// Japanese versions of the word.
|
||||
List<JishoJapaneseWord> japanese;
|
||||
|
||||
/// Translations of the word.
|
||||
List<JishoWordSense> senses;
|
||||
|
||||
/// Sources.
|
||||
JishoAttribution attribution;
|
||||
|
||||
JishoResult(
|
||||
{this.slug,
|
||||
this.is_common,
|
||||
this.tags,
|
||||
this.jlpt,
|
||||
this.japanese,
|
||||
this.senses,
|
||||
this.attribution});
|
||||
// ignore: public_member_api_docs
|
||||
JishoResult({
|
||||
required this.slug,
|
||||
required this.isCommon,
|
||||
this.tags = const [],
|
||||
this.jlpt = const [],
|
||||
required this.japanese,
|
||||
required this.senses,
|
||||
required this.attribution,
|
||||
});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
factory JishoResult.fromJson(Map<String, dynamic> json) {
|
||||
return JishoResult(
|
||||
slug: json['slug'] as String,
|
||||
is_common: json['is_common'] as bool,
|
||||
tags: (json['tags'] as List).map((result) => result as String).toList(),
|
||||
jlpt: (json['jlpt'] as List).map((result) => result as String).toList(),
|
||||
japanese: (json['japanese'] as List)
|
||||
.map((result) => JishoJapaneseWord.fromJson(result))
|
||||
.toList(),
|
||||
senses: (json['senses'] as List)
|
||||
.map((result) => JishoWordSense.fromJson(result))
|
||||
.toList(),
|
||||
attribution: JishoAttribution.fromJson(json['attribution']));
|
||||
slug: json['slug'] as String,
|
||||
isCommon: json['is_common'] as bool?,
|
||||
tags: (json['tags'] as List).map((result) => result as String).toList(),
|
||||
jlpt: (json['jlpt'] as List).map((result) => result as String).toList(),
|
||||
japanese: (json['japanese'] as List)
|
||||
.map((result) => JishoJapaneseWord.fromJson(result))
|
||||
.toList(),
|
||||
senses: (json['senses'] as List)
|
||||
.map((result) => JishoWordSense.fromJson(result))
|
||||
.toList(),
|
||||
attribution: JishoAttribution.fromJson(json['attribution']),
|
||||
);
|
||||
}
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() => {
|
||||
'slug': slug,
|
||||
'is_common': is_common,
|
||||
'is_common': isCommon,
|
||||
'tags': tags,
|
||||
'jlpt': jlpt,
|
||||
'japanese': japanese,
|
||||
'senses': senses,
|
||||
'attribution': attribution
|
||||
'attribution': attribution,
|
||||
};
|
||||
}
|
||||
|
||||
/// Metadata with result status.
|
||||
class JishoResultMeta {
|
||||
/// HTTP status code.
|
||||
int status;
|
||||
|
||||
JishoResultMeta({this.status});
|
||||
// ignore: public_member_api_docs
|
||||
JishoResultMeta({required this.status});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
factory JishoResultMeta.fromJson(Map<String, dynamic> json) {
|
||||
return JishoResultMeta(status: json['status'] as int);
|
||||
}
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() => {'status': status};
|
||||
}
|
||||
|
||||
/// The main API result wrapper containing whether it was successful, and potential results.
|
||||
class JishoAPIResult {
|
||||
/// Metadata with result status.
|
||||
JishoResultMeta meta;
|
||||
List<JishoResult> data;
|
||||
|
||||
JishoAPIResult({this.meta, this.data});
|
||||
/// Results.
|
||||
List<JishoResult>? data;
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
JishoAPIResult({
|
||||
required this.meta,
|
||||
this.data,
|
||||
});
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
factory JishoAPIResult.fromJson(Map<String, dynamic> json) {
|
||||
return JishoAPIResult(
|
||||
meta: JishoResultMeta.fromJson(json['meta']),
|
||||
@@ -444,5 +750,6 @@ class JishoAPIResult {
|
||||
.toList());
|
||||
}
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
Map<String, dynamic> toJson() => {'meta': meta.toJson(), 'data': data};
|
||||
}
|
||||
|
@@ -1,157 +0,0 @@
|
||||
import 'package:html/parser.dart';
|
||||
import 'package:html/dom.dart';
|
||||
|
||||
import './exampleSearch.dart' show getPieces;
|
||||
import './objects.dart';
|
||||
|
||||
List<String> _getTags(Document document) {
|
||||
final List<String> tags = [];
|
||||
final tagElements = document.querySelectorAll('.concept_light-tag');
|
||||
|
||||
for (var i = 0; i < tagElements.length; i += 1) {
|
||||
final tagText = tagElements[i].text;
|
||||
tags.add(tagText);
|
||||
}
|
||||
|
||||
return tags;
|
||||
}
|
||||
|
||||
List<String> _getMostRecentWordTypes(Element child) {
|
||||
return child.text.split(',').map((s) => s.trim().toLowerCase()).toList();
|
||||
}
|
||||
|
||||
List<KanjiKanaPair> _getOtherForms(Element child) {
|
||||
return child.text
|
||||
.split('、')
|
||||
.map((s) => s.replaceAll('【', '').replaceAll('】', '').split(' '))
|
||||
.map((a) =>
|
||||
(KanjiKanaPair(kanji: a[0], kana: (a.length == 2) ? a[1] : null)))
|
||||
.toList();
|
||||
}
|
||||
|
||||
List<String> _getNotes(Element child) => child.text.split('\n');
|
||||
|
||||
String _getMeaning(Element child) =>
|
||||
child.querySelector('.meaning-meaning').text;
|
||||
|
||||
String _getMeaningAbstract(Element child) {
|
||||
final meaningAbstract = child.querySelector('.meaning-abstract');
|
||||
if (meaningAbstract == null) return null;
|
||||
|
||||
for (var element in meaningAbstract.querySelectorAll('a')) {
|
||||
element.remove();
|
||||
}
|
||||
|
||||
return child.querySelector('.meaning-abstract')?.text;
|
||||
}
|
||||
|
||||
List<String> _getSupplemental(Element child) {
|
||||
final supplemental = child.querySelector('.supplemental_info');
|
||||
if (supplemental == null) return [];
|
||||
return supplemental.text.split(',').map((s) => s.trim()).toList();
|
||||
}
|
||||
|
||||
List<String> _getSeeAlsoTerms(List<String> supplemental) {
|
||||
if (supplemental == null) return [];
|
||||
|
||||
final List<String> seeAlsoTerms = [];
|
||||
for (var i = supplemental.length - 1; i >= 0; i -= 1) {
|
||||
final supplementalEntry = supplemental[i];
|
||||
if (supplementalEntry.startsWith('See also')) {
|
||||
seeAlsoTerms.add(supplementalEntry.replaceAll('See also ', ''));
|
||||
supplemental.removeAt(i);
|
||||
}
|
||||
}
|
||||
return seeAlsoTerms;
|
||||
}
|
||||
|
||||
List<PhraseScrapeSentence> _getSentences(Element child) {
|
||||
final sentenceElements =
|
||||
child.querySelector('.sentences')?.querySelectorAll('.sentence');
|
||||
if (sentenceElements == null) return [];
|
||||
|
||||
final List<PhraseScrapeSentence> sentences = [];
|
||||
for (var sentenceIndex = 0;
|
||||
sentenceIndex < (sentenceElements?.length ?? 0);
|
||||
sentenceIndex += 1) {
|
||||
final sentenceElement = sentenceElements[sentenceIndex];
|
||||
|
||||
final english = sentenceElement.querySelector('.english').text;
|
||||
final pieces = getPieces(sentenceElement);
|
||||
|
||||
sentenceElement.querySelector('.english').remove();
|
||||
for (var element in sentenceElement.children[0].children) {
|
||||
element.querySelector('.furigana')?.remove();
|
||||
}
|
||||
|
||||
final japanese = sentenceElement.text;
|
||||
|
||||
sentences.add(PhraseScrapeSentence(
|
||||
english: english, japanese: japanese, pieces: pieces ?? []));
|
||||
}
|
||||
|
||||
return sentences;
|
||||
}
|
||||
|
||||
PhrasePageScrapeResult _getMeaningsOtherFormsAndNotes(Document document) {
|
||||
final returnValues = PhrasePageScrapeResult(otherForms: [], notes: []);
|
||||
|
||||
final meaningsWrapper = document.querySelector('.meanings-wrapper');
|
||||
if (meaningsWrapper == null) return PhrasePageScrapeResult(found: false);
|
||||
returnValues.found = true;
|
||||
|
||||
final meaningsChildren = meaningsWrapper.children;
|
||||
|
||||
final List<PhraseScrapeMeaning> meanings = [];
|
||||
var mostRecentWordTypes = [];
|
||||
for (var meaningIndex = 0;
|
||||
meaningIndex < meaningsChildren.length;
|
||||
meaningIndex += 1) {
|
||||
final child = meaningsChildren[meaningIndex];
|
||||
|
||||
if (child.className.contains('meaning-tags')) {
|
||||
mostRecentWordTypes = _getMostRecentWordTypes(child);
|
||||
} else if (mostRecentWordTypes[0] == 'other forms') {
|
||||
returnValues.otherForms = _getOtherForms(child);
|
||||
} else if (mostRecentWordTypes[0] == 'notes') {
|
||||
returnValues.notes = _getNotes(child);
|
||||
} else {
|
||||
final meaning = _getMeaning(child);
|
||||
final meaningAbstract = _getMeaningAbstract(child);
|
||||
final supplemental = _getSupplemental(child);
|
||||
final seeAlsoTerms = _getSeeAlsoTerms(supplemental);
|
||||
final sentences = _getSentences(child);
|
||||
|
||||
meanings.add(PhraseScrapeMeaning(
|
||||
seeAlsoTerms: seeAlsoTerms ?? [],
|
||||
sentences: sentences ?? [],
|
||||
definition: meaning,
|
||||
supplemental: supplemental ?? [],
|
||||
definitionAbstract: meaningAbstract,
|
||||
tags: mostRecentWordTypes ?? [],
|
||||
));
|
||||
}
|
||||
}
|
||||
|
||||
returnValues.meanings = meanings;
|
||||
|
||||
return returnValues;
|
||||
}
|
||||
|
||||
/// Provides the URI for a phrase scrape
|
||||
String uriForPhraseScrape(String searchTerm) {
|
||||
return 'https://jisho.org/word/${Uri.encodeComponent(searchTerm)}';
|
||||
}
|
||||
|
||||
/// Parses a jisho word search page to an object
|
||||
PhrasePageScrapeResult parsePhrasePageData(String pageHtml, String query) {
|
||||
final document = parse(pageHtml);
|
||||
final result = _getMeaningsOtherFormsAndNotes(document);
|
||||
|
||||
result.query = query;
|
||||
if (!result.found) return result;
|
||||
result.uri = uriForPhraseScrape(query);
|
||||
result.tags = _getTags(document);
|
||||
|
||||
return result;
|
||||
}
|
@@ -1,6 +0,0 @@
|
||||
import './base_uri.dart';
|
||||
|
||||
/// Provides the URI for a phrase search
|
||||
String uriForPhraseSearch(String phrase) {
|
||||
return '$JISHO_API?keyword=${Uri.encodeComponent(phrase)}';
|
||||
}
|
202
lib/src/phrase_scrape.dart
Normal file
202
lib/src/phrase_scrape.dart
Normal file
@@ -0,0 +1,202 @@
|
||||
import 'package:html/dom.dart';
|
||||
import 'package:html/parser.dart';
|
||||
|
||||
import './example_search.dart' show getPieces;
|
||||
import './objects.dart';
|
||||
import './scraping.dart';
|
||||
|
||||
List<String> _getTags(Document document) {
|
||||
final tagElements = document.querySelectorAll('.concept_light-tag');
|
||||
final tags = tagElements.map((tagElement) => tagElement.text).toList();
|
||||
return tags;
|
||||
}
|
||||
|
||||
List<String> _getMostRecentWordTypes(Element child) {
|
||||
return child.text.split(',').map((s) => s.trim().toLowerCase()).toList();
|
||||
}
|
||||
|
||||
List<KanjiKanaPair> _getOtherForms(Element child) {
|
||||
return child.text
|
||||
.split('、')
|
||||
.map((s) => s.replaceAll('【', '').replaceAll('】', '').split(' '))
|
||||
.map((a) => (KanjiKanaPair(
|
||||
kanji: a[0],
|
||||
kana: (a.length == 2) ? a[1] : null,
|
||||
)))
|
||||
.toList();
|
||||
}
|
||||
|
||||
List<String> _getNotes(Element child) => child.text.split('\n');
|
||||
|
||||
String _getMeaningString(Element child) {
|
||||
final meaning = assertNotNull(
|
||||
variable: child.querySelector('.meaning-meaning')?.text,
|
||||
errorMessage:
|
||||
"Could not parse meaning div. Is the provided document corrupt, or has Jisho been updated?",
|
||||
);
|
||||
|
||||
return meaning;
|
||||
}
|
||||
|
||||
String? _getMeaningAbstract(Element child) {
|
||||
final meaningAbstract = child.querySelector('.meaning-abstract');
|
||||
if (meaningAbstract == null) return null;
|
||||
|
||||
for (var element in meaningAbstract.querySelectorAll('a')) {
|
||||
element.remove();
|
||||
}
|
||||
|
||||
return child.querySelector('.meaning-abstract')?.text;
|
||||
}
|
||||
|
||||
List<String> _getSupplemental(Element child) {
|
||||
final supplemental = child.querySelector('.supplemental_info');
|
||||
if (supplemental == null) return [];
|
||||
return supplemental.text.split(',').map((s) => s.trim()).toList();
|
||||
}
|
||||
|
||||
List<String> _getSeeAlsoTerms(List<String> supplemental) {
|
||||
// if (supplemental == null) return [];
|
||||
|
||||
final seeAlsoTerms = <String>[];
|
||||
for (var i = supplemental.length - 1; i >= 0; i -= 1) {
|
||||
final supplementalEntry = supplemental[i];
|
||||
if (supplementalEntry.startsWith('See also')) {
|
||||
seeAlsoTerms.add(supplementalEntry.replaceAll('See also ', ''));
|
||||
supplemental.removeAt(i);
|
||||
}
|
||||
}
|
||||
return seeAlsoTerms;
|
||||
}
|
||||
|
||||
PhraseScrapeSentence _getSentence(Element sentenceElement) {
|
||||
final english = assertNotNull(
|
||||
variable: sentenceElement.querySelector('.english')?.text,
|
||||
errorMessage:
|
||||
'Could not parse sentence translation. Is the provided document corrupt, or has Jisho been updated?',
|
||||
);
|
||||
|
||||
final pieces = getPieces(sentenceElement);
|
||||
|
||||
sentenceElement.querySelector('.english')?.remove();
|
||||
|
||||
for (var element in sentenceElement.children[0].children) {
|
||||
element.querySelector('.furigana')?.remove();
|
||||
}
|
||||
|
||||
final japanese = sentenceElement.text;
|
||||
return PhraseScrapeSentence(
|
||||
english: english,
|
||||
japanese: japanese,
|
||||
pieces: pieces,
|
||||
);
|
||||
}
|
||||
|
||||
List<PhraseScrapeSentence> _getSentences(Element child) {
|
||||
final sentenceElements =
|
||||
child.querySelector('.sentences')?.querySelectorAll('.sentence');
|
||||
if (sentenceElements == null) return [];
|
||||
|
||||
return sentenceElements.map(_getSentence).toList();
|
||||
}
|
||||
|
||||
PhraseScrapeMeaning _getMeaning(Element child) {
|
||||
final meaning = _getMeaningString(child);
|
||||
final meaningAbstract = _getMeaningAbstract(child);
|
||||
final supplemental = _getSupplemental(child);
|
||||
final seeAlsoTerms = _getSeeAlsoTerms(supplemental);
|
||||
final sentences = _getSentences(child);
|
||||
|
||||
return PhraseScrapeMeaning(
|
||||
seeAlsoTerms: seeAlsoTerms,
|
||||
sentences: sentences,
|
||||
definition: meaning,
|
||||
supplemental: supplemental,
|
||||
definitionAbstract: meaningAbstract,
|
||||
// tags: mostRecentWordTypes ?? [],
|
||||
);
|
||||
}
|
||||
|
||||
List<AudioFile> _getAudio(Document document) {
|
||||
return document
|
||||
.querySelector('.concept_light-status')
|
||||
?.querySelectorAll('audio > source')
|
||||
.map((element) {
|
||||
final src = assertNotNull(
|
||||
variable: element.attributes["src"],
|
||||
errorMessage:
|
||||
'Could not parse audio source. Is the provided document corrupt, or has Jisho been updated?',
|
||||
);
|
||||
final type = assertNotNull(
|
||||
variable: element.attributes['type'],
|
||||
errorMessage:
|
||||
'Could not parse audio type. Is the provided document corrupt, or has Jisho been updated?',
|
||||
);
|
||||
return AudioFile(
|
||||
uri: 'https:$src',
|
||||
mimetype: type,
|
||||
);
|
||||
}).toList() ??
|
||||
[];
|
||||
}
|
||||
|
||||
/// Provides the URI for a phrase scrape
|
||||
Uri uriForPhraseScrape(String searchTerm) {
|
||||
return Uri.parse('https://jisho.org/word/${Uri.encodeComponent(searchTerm)}');
|
||||
}
|
||||
|
||||
PhrasePageScrapeResultData _getMeaningsOtherFormsAndNotes(
|
||||
String query, Document document) {
|
||||
final meaningsWrapper = assertNotNull(
|
||||
variable: document.querySelector('.meanings-wrapper'),
|
||||
errorMessage:
|
||||
"Could not parse meanings. Is the provided document corrupt, or has Jisho been updated?",
|
||||
);
|
||||
|
||||
final meanings = <PhraseScrapeMeaning>[];
|
||||
var mostRecentWordTypes = [];
|
||||
var otherForms;
|
||||
var notes;
|
||||
|
||||
for (var child in meaningsWrapper.children) {
|
||||
if (child.className.contains('meaning-tags')) {
|
||||
mostRecentWordTypes = _getMostRecentWordTypes(child);
|
||||
} else if (mostRecentWordTypes[0] == 'other forms') {
|
||||
otherForms = _getOtherForms(child);
|
||||
} else if (mostRecentWordTypes[0] == 'notes') {
|
||||
notes = _getNotes(child);
|
||||
} else {
|
||||
meanings.add(_getMeaning(child));
|
||||
}
|
||||
}
|
||||
|
||||
return PhrasePageScrapeResultData(
|
||||
uri: uriForPhraseScrape(query).toString(),
|
||||
tags: _getTags(document),
|
||||
meanings: meanings,
|
||||
otherForms: otherForms ?? [],
|
||||
audio: _getAudio(document),
|
||||
notes: notes ?? [],
|
||||
);
|
||||
}
|
||||
|
||||
bool _resultWasFound(Document document) {
|
||||
return document.querySelector('.meanings-wrapper') != null;
|
||||
}
|
||||
|
||||
/// Parses a jisho word search page to an object
|
||||
PhrasePageScrapeResult parsePhrasePageData(String pageHtml, String query) {
|
||||
final document = parse(pageHtml);
|
||||
|
||||
if (!_resultWasFound(document)) {
|
||||
return PhrasePageScrapeResult(found: false, query: query);
|
||||
}
|
||||
|
||||
final data = _getMeaningsOtherFormsAndNotes(query, document);
|
||||
|
||||
return PhrasePageScrapeResult(
|
||||
found: true,
|
||||
query: query,
|
||||
data: data,
|
||||
);
|
||||
}
|
6
lib/src/phrase_search.dart
Normal file
6
lib/src/phrase_search.dart
Normal file
@@ -0,0 +1,6 @@
|
||||
import './base_uri.dart';
|
||||
|
||||
/// Provides the URI for a phrase search
|
||||
Uri uriForPhraseSearch(String phrase) {
|
||||
return Uri.parse('$jishoApi?keyword=${Uri.encodeComponent(phrase)}');
|
||||
}
|
75
lib/src/scraping.dart
Normal file
75
lib/src/scraping.dart
Normal file
@@ -0,0 +1,75 @@
|
||||
/// Remove all newlines from a string
|
||||
String removeNewlines(String str) {
|
||||
return str.replaceAll(RegExp(r'(?:\r|\n)'), '').trim();
|
||||
}
|
||||
|
||||
/// Remove alltext between two positions, and remove all newlines
|
||||
String getStringBetweenIndicies(String data, int startIndex, int endIndex) {
|
||||
final result = data.substring(startIndex, endIndex);
|
||||
return removeNewlines(result).trim();
|
||||
}
|
||||
|
||||
/// Try to find a string between two pieces of text
|
||||
String? getStringBetweenStrings(
|
||||
String data,
|
||||
String startString,
|
||||
String endString,
|
||||
) {
|
||||
final regex = RegExp(
|
||||
'${RegExp.escape(startString)}(.*?)${RegExp.escape(endString)}',
|
||||
dotAll: true,
|
||||
);
|
||||
|
||||
final match = regex.allMatches(data).toList();
|
||||
return match.isNotEmpty ? match[0].group(1).toString() : null;
|
||||
}
|
||||
|
||||
/// Try to find an int inbetween two pieces of text
|
||||
int? getIntBetweenStrings(
|
||||
String data,
|
||||
String startString,
|
||||
String endString,
|
||||
) {
|
||||
final stringBetweenStrings =
|
||||
getStringBetweenStrings(data, startString, endString);
|
||||
return stringBetweenStrings != null ? int.parse(stringBetweenStrings) : null;
|
||||
}
|
||||
|
||||
/// Get all regex matches
|
||||
List<String> getAllGlobalGroupMatches(String str, RegExp regex) {
|
||||
final regexResults = regex.allMatches(str).toList();
|
||||
final results = <String>[];
|
||||
for (var match in regexResults) {
|
||||
final m = match.group(1);
|
||||
if (m != null) results.add(m);
|
||||
}
|
||||
|
||||
return results;
|
||||
}
|
||||
|
||||
/// Get all matches of `<a>DATA</a>`
|
||||
List<String> parseAnchorsToArray(String str) {
|
||||
final regex = RegExp(r'<a href=".*?">(.*?)<\/a>');
|
||||
return getAllGlobalGroupMatches(str, regex);
|
||||
}
|
||||
|
||||
/// An exception to be thrown whenever a parser fails by not finding an expected pattern.
|
||||
class ParserException implements Exception {
|
||||
/// The error message to report
|
||||
final String message;
|
||||
|
||||
// ignore: public_member_api_docs
|
||||
const ParserException(this.message);
|
||||
}
|
||||
|
||||
/// Throw a `ParserException` if variable is null
|
||||
dynamic assertNotNull({
|
||||
required dynamic variable,
|
||||
String errorMessage =
|
||||
"Unexpected null-value occured. Is the provided document corrupt, or has Jisho been updated?",
|
||||
}) {
|
||||
if (variable == null) {
|
||||
throw ParserException(errorMessage);
|
||||
}
|
||||
return variable!;
|
||||
}
|
Reference in New Issue
Block a user