1
0
mirror of https://github.com/h7x4/unofficial_jisho_api_dart.git synced 2025-09-10 04:53:45 +02:00

Upgrade to 2.0.0, see changelog

This commit is contained in:
2021-07-25 22:57:02 +02:00
parent 527f58d679
commit c75f252a9c
44 changed files with 3557 additions and 3573 deletions

View File

@@ -7,11 +7,11 @@ library unofficial_jisho_api;
import 'dart:convert';
import 'package:http/http.dart' as http;
import './src/exampleSearch.dart';
import './src/kanjiSearch.dart';
import './src/example_search.dart';
import './src/kanji_search.dart';
import './src/objects.dart';
import './src/phraseScrape.dart';
import './src/phraseSearch.dart';
import './src/phrase_scrape.dart';
import './src/phrase_search.dart';
export './src/objects.dart';

View File

@@ -5,9 +5,8 @@
/// for providing HTML.
library unofficial_jisho_parser;
export './src/exampleSearch.dart'
show uriForExampleSearch, parseExamplePageData;
export './src/kanjiSearch.dart' show uriForKanjiSearch, parseKanjiPageData;
export './src/objects.dart';
export './src/phraseScrape.dart' show uriForPhraseScrape, parsePhrasePageData;
export './src/phraseSearch.dart';
export 'src/example_search.dart' show uriForExampleSearch, parseExamplePageData;
export 'src/kanji_search.dart' show uriForKanjiSearch, parseKanjiPageData;
export 'src/phrase_scrape.dart' show uriForPhraseScrape, parsePhrasePageData;
export 'src/phrase_search.dart';

View File

@@ -1,4 +1,6 @@
const String JISHO_API = 'https://jisho.org/api/v1/search/words';
const String SCRAPE_BASE_URI = 'https://jisho.org/search/';
const String STROKE_ORDER_DIAGRAM_BASE_URI =
// ignore_for_file: public_member_api_docs
const String jishoApi = 'https://jisho.org/api/v1/search/words';
const String scrapeBaseUri = 'https://jisho.org/search/';
const String strokeOrderDiagramBaseUri =
'https://classic.jisho.org/static/images/stroke_diagrams/';

View File

@@ -1,14 +1,15 @@
import 'package:html/parser.dart';
import 'package:html/dom.dart';
import 'package:html/parser.dart';
import './base_uri.dart';
import './objects.dart';
import './scraping.dart';
final RegExp _kanjiRegex = RegExp(r'[\u4e00-\u9faf\u3400-\u4dbf]');
/// Provides the URI for an example search
String uriForExampleSearch(String phrase) {
return '$SCRAPE_BASE_URI${Uri.encodeComponent(phrase)}%23sentences';
Uri uriForExampleSearch(String phrase) {
return Uri.parse('$scrapeBaseUri${Uri.encodeComponent(phrase)}%23sentences');
}
List<Element> _getChildrenAndSymbols(Element ul) {
@@ -16,7 +17,7 @@ List<Element> _getChildrenAndSymbols(Element ul) {
final ulCharArray = ulText.split('');
final ulChildren = ul.children;
var offsetPointer = 0;
List<Element> result = [];
final result = <Element>[];
for (var element in ulChildren) {
if (element.text !=
@@ -40,8 +41,13 @@ List<Element> _getChildrenAndSymbols(Element ul) {
return result;
}
ExampleResultData _getKanjiAndKana(Element div) {
final ul = div.querySelector('ul');
/// Although return type is List<String>, it is to be interpreted as (String, String)
List<String> _getKanjiAndKana(Element div) {
final ul = assertNotNull(
variable: div.querySelector('ul'),
errorMessage:
"Could not parse kanji/kana div. Is the provided document corrupt, or has Jisho been updated?",
);
final contents = _getChildrenAndSymbols(ul);
var kanji = '';
@@ -51,7 +57,11 @@ ExampleResultData _getKanjiAndKana(Element div) {
if (content.localName == 'li') {
final li = content;
final furigana = li.querySelector('.furigana')?.text;
final unlifted = li.querySelector('.unlinked')?.text;
final unlifted = assertNotNull(
variable: li.querySelector('.unlinked')?.text,
errorMessage:
"Could not parse a piece of the example sentence. Is the provided document corrupt, or has Jisho been updated?",
);
if (furigana != null) {
kanji += unlifted;
@@ -74,39 +84,49 @@ ExampleResultData _getKanjiAndKana(Element div) {
}
} else {
final text = content.text.trim();
if (text != null) {
kanji += text;
kana += text;
}
kanji += text;
kana += text;
}
}
return ExampleResultData(
kanji: kanji,
kana: kana,
);
return [kanji, kana];
}
List<ExampleSentencePiece> getPieces(Element sentenceElement) {
final pieceElements = sentenceElement.querySelectorAll('li.clearfix');
final List<ExampleSentencePiece> pieces = [];
for (var pieceIndex = 0; pieceIndex < pieceElements.length; pieceIndex += 1) {
final pieceElement = pieceElements[pieceIndex];
pieces.add(ExampleSentencePiece(
lifted: pieceElement.querySelector('.furigana')?.text,
unlifted: pieceElement.querySelector('.unlinked')?.text,
));
}
return pieces;
return pieceElements.map((var e) {
final unlifted = assertNotNull(
variable: e.querySelector('.unlinked')?.text,
errorMessage:
"Could not parse a piece of the example sentence. Is the provided document corrupt, or has Jisho been updated?",
);
return ExampleSentencePiece(
lifted: e.querySelector('.furigana')?.text,
unlifted: unlifted,
);
}).toList();
}
ExampleResultData _parseExampleDiv(Element div) {
final result = _getKanjiAndKana(div);
result.english = div.querySelector('.english').text;
result.pieces = getPieces(div) ?? [];
final kanji = result[0];
final kana = result[1];
return result;
final english = assertNotNull(
variable: div.querySelector('.english')?.text,
errorMessage:
"Could not parse translation. Is the provided document corrupt, or has Jisho been updated?",
);
final pieces = getPieces(div);
return ExampleResultData(
english: english,
kanji: kanji,
kana: kana,
pieces: pieces,
);
}
/// Parses a jisho example sentence search page to an object
@@ -117,9 +137,8 @@ ExampleResults parseExamplePageData(String pageHtml, String phrase) {
final results = divs.map(_parseExampleDiv).toList();
return ExampleResults(
query: phrase,
found: results.isNotEmpty,
results: results ?? [],
uri: uriForExampleSearch(phrase)
);
query: phrase,
found: results.isNotEmpty,
results: results,
uri: uriForExampleSearch(phrase).toString());
}

View File

@@ -1,235 +0,0 @@
import 'package:html_unescape/html_unescape.dart' as html_entities;
import './base_uri.dart';
import './objects.dart';
final _htmlUnescape = html_entities.HtmlUnescape();
const _onyomiLocatorSymbol = 'On';
const _kunyomiLocatorSymbol = 'Kun';
String _removeNewlines(String str) {
return str.replaceAll(RegExp(r'(?:\r|\n)'), '').trim();
}
/// Provides the URI for a kanji search
String uriForKanjiSearch(String kanji) {
return '$SCRAPE_BASE_URI${Uri.encodeComponent(kanji)}%23kanji';
}
String _getUriForStrokeOrderDiagram(String kanji) {
return '$STROKE_ORDER_DIAGRAM_BASE_URI${kanji.codeUnitAt(0)}_frames.png';
}
bool _containsKanjiGlyph(String pageHtml, String kanji) {
final kanjiGlyphToken =
'<h1 class="character" data-area-name="print" lang="ja">$kanji</h1>';
return pageHtml.contains(kanjiGlyphToken);
}
String _getStringBetweenIndicies(String data, int startIndex, int endIndex) {
final result = data.substring(startIndex, endIndex);
return _removeNewlines(result).trim();
}
String _getStringBetweenStrings(
String data, String startString, String endString) {
final regex = RegExp(
'${RegExp.escape(startString)}(.*?)${RegExp.escape(endString)}',
dotAll: true);
final match = regex.allMatches(data).toList();
return match.isNotEmpty ? match[0].group(1).toString() : null;
}
int _getIntBetweenStrings(
String pageHtml, String startString, String endString) {
final stringBetweenStrings =
_getStringBetweenStrings(pageHtml, startString, endString);
return int.parse(stringBetweenStrings);
}
List<String> _getAllGlobalGroupMatches(String str, RegExp regex) {
var regexResults = regex.allMatches(str).toList();
List<String> results = [];
for (var match in regexResults) {
results.add(match.group(1));
}
return results;
}
List<String> _parseAnchorsToArray(String str) {
final regex = RegExp(r'<a href=".*?">(.*?)<\/a>');
return _getAllGlobalGroupMatches(str, regex);
}
List<String> _getYomi(String pageHtml, String yomiLocatorSymbol) {
final yomiSection = _getStringBetweenStrings(
pageHtml, '<dt>$yomiLocatorSymbol:</dt>', '</dl>');
return _parseAnchorsToArray(yomiSection ?? '');
}
List<String> _getKunyomi(String pageHtml) {
return _getYomi(pageHtml, _kunyomiLocatorSymbol);
}
List<String> _getOnyomi(String pageHtml) {
return _getYomi(pageHtml, _onyomiLocatorSymbol);
}
List<YomiExample> _getYomiExamples(String pageHtml, String yomiLocatorSymbol) {
final locatorString = '<h2>$yomiLocatorSymbol reading compounds</h2>';
final exampleSection =
_getStringBetweenStrings(pageHtml, locatorString, '</ul>');
if (exampleSection == null) {
return null;
}
final regex = RegExp(r'<li>(.*?)<\/li>', dotAll: true);
final regexResults =
_getAllGlobalGroupMatches(exampleSection, regex).map((s) => s.trim());
final examples = regexResults.map((regexResult) {
final examplesLines = regexResult.split('\n').map((s) => s.trim()).toList();
return YomiExample(
example: examplesLines[0],
reading: examplesLines[1].replaceAll('', '').replaceAll('', ''),
meaning: _htmlUnescape.convert(examplesLines[2]),
);
});
return examples.toList();
}
List<YomiExample> _getOnyomiExamples(String pageHtml) {
return _getYomiExamples(pageHtml, _onyomiLocatorSymbol);
}
List<YomiExample> _getKunyomiExamples(String pageHtml) {
return _getYomiExamples(pageHtml, _kunyomiLocatorSymbol);
}
Radical _getRadical(String pageHtml) {
const radicalMeaningStartString = '<span class="radical_meaning">';
const radicalMeaningEndString = '</span>';
var radicalMeaning = _getStringBetweenStrings(
pageHtml,
radicalMeaningStartString,
radicalMeaningEndString,
).trim();
if (radicalMeaning != null) {
final radicalMeaningStartIndex =
pageHtml.indexOf(radicalMeaningStartString);
final radicalMeaningEndIndex = pageHtml.indexOf(
radicalMeaningEndString,
radicalMeaningStartIndex,
);
final radicalSymbolStartIndex =
radicalMeaningEndIndex + radicalMeaningEndString.length;
const radicalSymbolEndString = '</span>';
final radicalSymbolEndIndex =
pageHtml.indexOf(radicalSymbolEndString, radicalSymbolStartIndex);
final radicalSymbolsString = _getStringBetweenIndicies(
pageHtml,
radicalSymbolStartIndex,
radicalSymbolEndIndex,
);
if (radicalSymbolsString.length > 1) {
final radicalForms = radicalSymbolsString
.substring(1)
.replaceAll('(', '')
.replaceAll(')', '')
.trim()
.split(', ');
return Radical(
symbol: radicalSymbolsString[0],
forms: radicalForms ?? [],
meaning: radicalMeaning);
}
return Radical(symbol: radicalSymbolsString, meaning: radicalMeaning);
}
return null;
}
List<String> _getParts(String pageHtml) {
const partsSectionStartString = '<dt>Parts:</dt>';
const partsSectionEndString = '</dl>';
final partsSection = _getStringBetweenStrings(
pageHtml,
partsSectionStartString,
partsSectionEndString,
);
var result = _parseAnchorsToArray(partsSection);
result.sort();
return (result);
}
String _getSvgUri(String pageHtml) {
var svgRegex = RegExp('\/\/.*?.cloudfront.net\/.*?.svg');
final regexResult = svgRegex.firstMatch(pageHtml).group(0).toString();
return regexResult.isNotEmpty ? 'https:$regexResult' : null;
}
String _getGifUri(String kanji) {
final unicodeString = kanji.codeUnitAt(0).toRadixString(16);
final fileName = '$unicodeString.gif';
final animationUri =
'https://raw.githubusercontent.com/mistval/kanji_images/master/gifs/$fileName';
return animationUri;
}
int _getNewspaperFrequencyRank(String pageHtml) {
final frequencySection =
_getStringBetweenStrings(pageHtml, '<div class="frequency">', '</div>');
return (frequencySection != null)
? int.parse(
_getStringBetweenStrings(frequencySection, '<strong>', '</strong>'))
: null;
}
/// Parses a jisho kanji search page to an object
KanjiResult parseKanjiPageData(String pageHtml, String kanji) {
final result = KanjiResult();
result.query = kanji;
result.found = _containsKanjiGlyph(pageHtml, kanji);
if (result.found == false) {
return result;
}
result.taughtIn =
_getStringBetweenStrings(pageHtml, 'taught in <strong>', '</strong>');
result.jlptLevel =
_getStringBetweenStrings(pageHtml, 'JLPT level <strong>', '</strong>');
result.newspaperFrequencyRank = _getNewspaperFrequencyRank(pageHtml);
result.strokeCount =
_getIntBetweenStrings(pageHtml, '<strong>', '</strong> strokes');
result.meaning = _htmlUnescape.convert(_removeNewlines(
_getStringBetweenStrings(
pageHtml, '<div class="kanji-details__main-meanings">', '</div>'))
.trim());
result.kunyomi = _getKunyomi(pageHtml) ?? [];
result.onyomi = _getOnyomi(pageHtml) ?? [];
result.onyomiExamples = _getOnyomiExamples(pageHtml) ?? [];
result.kunyomiExamples = _getKunyomiExamples(pageHtml) ?? [];
result.radical = _getRadical(pageHtml);
result.parts = _getParts(pageHtml) ?? [];
result.strokeOrderDiagramUri = _getUriForStrokeOrderDiagram(kanji);
result.strokeOrderSvgUri = _getSvgUri(pageHtml);
result.strokeOrderGifUri = _getGifUri(kanji);
result.uri = uriForKanjiSearch(kanji);
return result;
}

243
lib/src/kanji_search.dart Normal file
View File

@@ -0,0 +1,243 @@
import 'package:html_unescape/html_unescape.dart' as html_entities;
import './base_uri.dart';
import './objects.dart';
import './scraping.dart';
final _htmlUnescape = html_entities.HtmlUnescape();
const _onyomiLocatorSymbol = 'On';
const _kunyomiLocatorSymbol = 'Kun';
/// Provides the URI for a kanji search
Uri uriForKanjiSearch(String kanji) {
return Uri.parse('$scrapeBaseUri${Uri.encodeComponent(kanji)}%23kanji');
}
String _getUriForStrokeOrderDiagram(String kanji) {
return '$strokeOrderDiagramBaseUri${kanji.codeUnitAt(0)}_frames.png';
}
bool _containsKanjiGlyph(String pageHtml, String kanji) {
final kanjiGlyphToken =
'<h1 class="character" data-area-name="print" lang="ja">$kanji</h1>';
return pageHtml.contains(kanjiGlyphToken);
}
List<String> _getYomi(String pageHtml, String yomiLocatorSymbol) {
final yomiSection = getStringBetweenStrings(
pageHtml, '<dt>$yomiLocatorSymbol:</dt>', '</dl>');
return parseAnchorsToArray(yomiSection ?? '');
}
List<String> _getKunyomi(String pageHtml) {
return _getYomi(pageHtml, _kunyomiLocatorSymbol);
}
List<String> _getOnyomi(String pageHtml) {
return _getYomi(pageHtml, _onyomiLocatorSymbol);
}
List<YomiExample> _getYomiExamples(String pageHtml, String yomiLocatorSymbol) {
final locatorString = '<h2>$yomiLocatorSymbol reading compounds</h2>';
final exampleSection =
getStringBetweenStrings(pageHtml, locatorString, '</ul>');
if (exampleSection == null) {
return [];
}
final regex = RegExp(r'<li>(.*?)<\/li>', dotAll: true);
final regexResults =
getAllGlobalGroupMatches(exampleSection, regex).map((s) => s.trim());
final examples = regexResults.map((regexResult) {
final examplesLines = regexResult.split('\n').map((s) => s.trim()).toList();
return YomiExample(
example: examplesLines[0],
reading: examplesLines[1].replaceAll('', '').replaceAll('', ''),
meaning: _htmlUnescape.convert(examplesLines[2]),
);
});
return examples.toList();
}
List<YomiExample> _getOnyomiExamples(String pageHtml) {
return _getYomiExamples(pageHtml, _onyomiLocatorSymbol);
}
List<YomiExample> _getKunyomiExamples(String pageHtml) {
return _getYomiExamples(pageHtml, _kunyomiLocatorSymbol);
}
Radical? _getRadical(String pageHtml) {
const radicalMeaningStartString = '<span class="radical_meaning">';
const radicalMeaningEndString = '</span>';
var radicalMeaning = getStringBetweenStrings(
pageHtml,
radicalMeaningStartString,
radicalMeaningEndString,
)?.trim();
if (radicalMeaning == null) {
return null;
}
final radicalMeaningStartIndex = pageHtml.indexOf(radicalMeaningStartString);
final radicalMeaningEndIndex = pageHtml.indexOf(
radicalMeaningEndString,
radicalMeaningStartIndex,
);
final radicalSymbolStartIndex =
radicalMeaningEndIndex + radicalMeaningEndString.length;
const radicalSymbolEndString = '</span>';
final radicalSymbolEndIndex =
pageHtml.indexOf(radicalSymbolEndString, radicalSymbolStartIndex);
final radicalSymbolsString = getStringBetweenIndicies(
pageHtml,
radicalSymbolStartIndex,
radicalSymbolEndIndex,
);
if (radicalSymbolsString.length > 1) {
final radicalForms = radicalSymbolsString
.substring(1)
.replaceAll('(', '')
.replaceAll(')', '')
.trim()
.split(', ');
return Radical(
symbol: radicalSymbolsString[0],
forms: radicalForms,
meaning: radicalMeaning,
);
}
return Radical(symbol: radicalSymbolsString, meaning: radicalMeaning);
}
String _getMeaning(String pageHtml) {
final rawMeaning = assertNotNull(
variable: getStringBetweenStrings(
pageHtml,
'<div class="kanji-details__main-meanings">',
'</div>',
),
errorMessage:
"Could not parse meaning. Is the provided document corrupt, or has Jisho been updated?",
);
return _htmlUnescape.convert(removeNewlines(rawMeaning).trim());
}
List<String> _getParts(String pageHtml) {
const partsSectionStartString = '<dt>Parts:</dt>';
const partsSectionEndString = '</dl>';
final partsSection = getStringBetweenStrings(
pageHtml,
partsSectionStartString,
partsSectionEndString,
);
if (partsSection == null) {
return [];
}
var result = parseAnchorsToArray(partsSection);
result.sort();
return result;
}
String _getSvgUri(String pageHtml) {
var svgRegex = RegExp('\/\/.*?.cloudfront.net\/.*?.svg');
final regexResult = assertNotNull(
variable: svgRegex.firstMatch(pageHtml)?.group(0)?.toString(),
errorMessage:
"Could not find SVG URI. Is the provided document corrupt, or has Jisho been updated?",
);
return 'https:$regexResult';
}
String _getGifUri(String kanji) {
final unicodeString = kanji.codeUnitAt(0).toRadixString(16);
final fileName = '$unicodeString.gif';
final animationUri =
'https://raw.githubusercontent.com/mistval/kanji_images/master/gifs/$fileName';
return animationUri;
}
int? _getNewspaperFrequencyRank(String pageHtml) {
final frequencySection = getStringBetweenStrings(
pageHtml,
'<div class="frequency">',
'</div>',
);
// ignore: avoid_returning_null
if (frequencySection == null) return null;
final frequencyRank =
getStringBetweenStrings(frequencySection, '<strong>', '</strong>');
return frequencyRank != null ? int.parse(frequencyRank) : null;
}
int _getStrokeCount(String pageHtml) {
final strokeCount = assertNotNull(
variable: getIntBetweenStrings(pageHtml, '<strong>', '</strong> strokes'),
errorMessage:
"Could not parse stroke count. Is the provided document corrupt, or has Jisho been updated?",
);
return strokeCount;
}
String? _getTaughtIn(String pageHtml) {
return getStringBetweenStrings(pageHtml, 'taught in <strong>', '</strong>');
}
String? _getJlptLevel(String pageHtml) {
return getStringBetweenStrings(pageHtml, 'JLPT level <strong>', '</strong>');
}
/// Parses a jisho kanji search page to an object
KanjiResult parseKanjiPageData(String pageHtml, String kanji) {
final result = KanjiResult(
query: kanji,
found: _containsKanjiGlyph(pageHtml, kanji),
);
if (result.found == false) {
return result;
}
result.data = KanjiResultData(
strokeCount: _getStrokeCount(pageHtml),
meaning: _getMeaning(pageHtml),
strokeOrderDiagramUri: _getUriForStrokeOrderDiagram(kanji),
strokeOrderSvgUri: _getSvgUri(pageHtml),
strokeOrderGifUri: _getGifUri(kanji),
uri: uriForKanjiSearch(kanji).toString(),
parts: _getParts(pageHtml),
taughtIn: _getTaughtIn(pageHtml),
jlptLevel: _getJlptLevel(pageHtml),
newspaperFrequencyRank: _getNewspaperFrequencyRank(pageHtml),
kunyomi: _getKunyomi(pageHtml),
onyomi: _getOnyomi(pageHtml),
kunyomiExamples: _getKunyomiExamples(pageHtml),
onyomiExamples: _getOnyomiExamples(pageHtml),
radical: _getRadical(pageHtml),
);
return result;
}

View File

@@ -2,96 +2,157 @@
/* searchForKanji related classes */
/* -------------------------------------------------------------------------- */
/// An example of a word that contains the kanji in question.
class YomiExample {
/// The original text of the example.
String example;
/// The reading of the example.
String reading;
/// The meaning of the example.
String meaning;
YomiExample({this.example, this.reading, this.meaning});
// ignore: public_member_api_docs
YomiExample({
required this.example,
required this.reading,
required this.meaning,
});
Map<String, String> toJson() =>
{'example': example, 'reading': reading, 'meaning': meaning};
// ignore: public_member_api_docs
Map<String, String> toJson() => {
'example': example,
'reading': reading,
'meaning': meaning,
};
}
/// Information regarding the radical of a kanji.
class Radical {
/// The radical symbol, if applicable.
/// The radical symbol.
String symbol;
/// The radical forms used in this kanji, if applicable.
/// The radical forms used in this kanji.
List<String> forms;
/// The meaning of the radical, if applicable.
/// The meaning of the radical.
String meaning;
Radical({this.symbol, this.forms, this.meaning});
// ignore: public_member_api_docs
Radical({
required this.symbol,
this.forms = const [],
required this.meaning,
});
Map<String, dynamic> toJson() =>
{'symbol': symbol, 'forms': forms, 'meaning': meaning};
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {
'symbol': symbol,
'forms': forms,
'meaning': meaning,
};
}
/// The main wrapper containing data about the query and whether or not it was successful.
class KanjiResult {
/// True if results were found.
String query;
/// The term that you searched for.
bool found;
/// The school level that the kanji is taught in, if applicable.
String taughtIn;
/// The lowest JLPT exam that this kanji is likely to appear in, if applicable.
///
/// 'N5' or 'N4' or 'N3' or 'N2' or 'N1'.
String jlptLevel;
/// A number representing this kanji's frequency rank in newspapers, if applicable.
int newspaperFrequencyRank;
/// How many strokes this kanji is typically drawn in, if applicable.
int strokeCount;
/// The meaning of the kanji, if applicable.
String meaning;
/// This character's kunyomi, if applicable.
List<String> kunyomi;
/// This character's onyomi, if applicable.
List<String> onyomi;
/// Examples of this character's kunyomi being used, if applicable.
List<YomiExample> kunyomiExamples;
/// Examples of this character's onyomi being used, if applicable.
List<YomiExample> onyomiExamples;
/// Information about this character's radical, if applicable.
Radical radical;
/// The parts used in this kanji, if applicable.
List<String> parts;
/// The URL to a diagram showing how to draw this kanji step by step, if applicable.
String strokeOrderDiagramUri;
/// The URL to an SVG describing how to draw this kanji, if applicable.
String strokeOrderSvgUri;
/// The URL to a gif showing the kanji being draw and its stroke order, if applicable.
String strokeOrderGifUri;
/// The URI that these results were scraped from, if applicable.
String uri;
/// The result data if search was successful.
KanjiResultData? data;
KanjiResult(
{this.query,
this.found,
this.taughtIn,
this.jlptLevel,
this.newspaperFrequencyRank,
this.strokeCount,
this.meaning,
this.kunyomi,
this.onyomi,
this.kunyomiExamples,
this.onyomiExamples,
this.radical,
this.parts,
this.strokeOrderDiagramUri,
this.strokeOrderSvgUri,
this.strokeOrderGifUri,
this.uri});
// ignore: public_member_api_docs
KanjiResult({
required this.query,
required this.found,
this.data,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() {
return {
'query': query,
'found': found,
'data': data,
};
}
}
/// The main kanji data class, collecting all the result information in one place.
class KanjiResultData {
/// The school level that the kanji is taught in, if applicable.
String? taughtIn;
/// The lowest JLPT exam that this kanji is likely to appear in, if applicable.
///
/// 'N5' or 'N4' or 'N3' or 'N2' or 'N1'.
String? jlptLevel;
/// A number representing this kanji's frequency rank in newspapers, if applicable.
int? newspaperFrequencyRank;
/// How many strokes this kanji is typically drawn in.
int strokeCount;
/// The meaning of the kanji.
String meaning;
/// This character's kunyomi.
List<String> kunyomi;
/// This character's onyomi.
List<String> onyomi;
/// Examples of this character's kunyomi being used.
List<YomiExample> kunyomiExamples;
/// Examples of this character's onyomi being used.
List<YomiExample> onyomiExamples;
/// Information about this character's radical, if applicable.
Radical? radical;
/// The parts used in this kanji.
List<String> parts;
/// The URL to a diagram showing how to draw this kanji step by step.
String strokeOrderDiagramUri;
/// The URL to an SVG describing how to draw this kanji.
String strokeOrderSvgUri;
/// The URL to a gif showing the kanji being draw and its stroke order.
String strokeOrderGifUri;
/// The URI that these results were scraped from.
String uri;
// ignore: public_member_api_docs
KanjiResultData({
this.taughtIn,
this.jlptLevel,
this.newspaperFrequencyRank,
required this.strokeCount,
required this.meaning,
this.kunyomi = const [],
this.onyomi = const [],
this.kunyomiExamples = const [],
this.onyomiExamples = const [],
this.radical,
this.parts = const [],
required this.strokeOrderDiagramUri,
required this.strokeOrderSvgUri,
required this.strokeOrderGifUri,
required this.uri,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() {
return {
'taughtIn': taughtIn,
'jlptLevel': jlptLevel,
'newspaperFrequencyRank': newspaperFrequencyRank,
@@ -101,12 +162,12 @@ class KanjiResult {
'onyomi': onyomi,
'onyomiExamples': onyomiExamples,
'kunyomiExamples': kunyomiExamples,
'radical': (radical != null) ? radical.toJson() : null,
'radical': radical?.toJson(),
'parts': parts,
'strokeOrderDiagramUri': strokeOrderDiagramUri,
'strokeOrderSvgUri': strokeOrderSvgUri,
'strokeOrderGifUri': strokeOrderGifUri,
'uri': uri
'uri': uri,
};
}
}
@@ -115,54 +176,90 @@ class KanjiResult {
/* searchForExamples related classes */
/* -------------------------------------------------------------------------- */
/// A word in an example sentence, consisting of either just kana, or kanji with furigana.
class ExampleSentencePiece {
/// Baseline text shown on Jisho.org (below the lifted text / furigana)
String lifted;
/// Furigana text shown on Jisho.org (above the unlifted text)
/// Furigana text shown on Jisho.org (above the unlifted text), if applicable.
String? lifted;
/// Baseline text shown on Jisho.org (below the lifted text / furigana).
String unlifted;
ExampleSentencePiece({this.lifted, this.unlifted});
// ignore: public_member_api_docs
ExampleSentencePiece({
this.lifted,
required this.unlifted,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() {
return {'lifted': lifted, 'unlifted': unlifted};
return {
'lifted': lifted,
'unlifted': unlifted,
};
}
}
/// All data making up one example sentence.
class ExampleResultData {
/// The example sentence including kanji.
String kanji;
/// The example sentence without kanji (only kana). Sometimes this may include some Kanji, as furigana is not always available from Jisho.org.
String kana;
/// An English translation of the example.
String english;
/// The lifted/unlifted pairs that make up the sentence. Lifted text is furigana, unlifted is the text below the furigana.
List<ExampleSentencePiece> pieces;
ExampleResultData({this.english, this.kanji, this.kana, this.pieces});
// ignore: public_member_api_docs
ExampleResultData({
required this.english,
required this.kanji,
required this.kana,
required this.pieces,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() {
return {'english': english, 'kanji': kanji, 'kana': kana, 'pieces': pieces};
return {
'english': english,
'kanji': kanji,
'kana': kana,
'pieces': pieces,
};
}
}
/// The main wrapper containing data about the query and whether or not it was successful.
class ExampleResults {
/// The term that you searched for.
String query;
/// True if results were found.
bool found;
/// The URI that these results were scraped from.
String uri;
/// The examples that were found, if any.
List<ExampleResultData> results;
ExampleResults({this.query, this.found, this.results, this.uri});
// ignore: public_member_api_docs
ExampleResults({
required this.query,
required this.found,
required this.results,
required this.uri,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() {
return {
'query': query,
'found': found,
'results': results,
'uri': uri
'uri': uri,
};
}
}
@@ -171,96 +268,178 @@ class ExampleResults {
/* scrapeForPhrase related classes */
/* -------------------------------------------------------------------------- */
/// An example sentence.
class PhraseScrapeSentence {
/// The English meaning of the sentence.
String english;
/// The Japanese text of the sentence.
String japanese;
/// The lifted/unlifted pairs that make up the sentence. Lifted text is furigana, unlifted is the text below the furigana.
List<ExampleSentencePiece> pieces;
PhraseScrapeSentence({this.english, this.japanese, this.pieces});
// ignore: public_member_api_docs
PhraseScrapeSentence({
required this.english,
required this.japanese,
required this.pieces,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() =>
{'english': english, 'japanese': japanese, 'pieces': pieces};
}
/// The data representing one "meaning" or "sense" of the word
class PhraseScrapeMeaning {
/// The words that Jisho lists as "see also".
List<String> seeAlsoTerms;
/// Example sentences for this meaning.
List<PhraseScrapeSentence> sentences;
/// The definition of the meaning
/// The definition of the meaning.
String definition;
/// Supplemental information.
/// For example "usually written using kana alone".
List<String> supplemental;
/// An "abstract" definition.
/// Often this is a Wikipedia definition.
String definitionAbstract;
String? definitionAbstract;
/// Tags associated with this meaning.
List<String> tags;
PhraseScrapeMeaning(
{this.seeAlsoTerms,
this.sentences,
this.definition,
this.supplemental,
this.definitionAbstract,
this.tags});
// ignore: public_member_api_docs
PhraseScrapeMeaning({
this.seeAlsoTerms = const [],
required this.sentences,
required this.definition,
this.supplemental = const [],
this.definitionAbstract,
this.tags = const [],
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {
'seeAlsoTerms': seeAlsoTerms,
'sentences': sentences,
'definition': definition,
'supplemental': supplemental,
'definitionAbstract': definitionAbstract,
'tags': tags
'tags': tags,
};
}
/// A pair of kanji and potential furigana.
class KanjiKanaPair {
/// Kanji
String kanji;
String kana;
KanjiKanaPair({this.kanji, this.kana});
/// Furigana, if applicable.
String? kana;
Map<String, String> toJson() => {'kanji': kanji, 'kana': kana};
// ignore: public_member_api_docs
KanjiKanaPair({
required this.kanji,
this.kana,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {
'kanji': kanji,
'kana': kana,
};
}
/// The main wrapper containing data about the query and whether or not it was successful.
class PhrasePageScrapeResult {
/// True if a result was found.
bool found;
/// The term that you searched for.
String query;
/// The URI that these results were scraped from, if a result was found.
String uri;
/// Other forms of the search term, if a result was found.
List<String> tags;
/// Information about the meanings associated with this search result.
List<PhraseScrapeMeaning> meanings;
/// Tags associated with this search result.
List<KanjiKanaPair> otherForms;
/// Notes associated with the search result.
List<String> notes;
PhrasePageScrapeResult(
{this.found,
this.query,
this.uri,
this.tags,
this.meanings,
this.otherForms,
this.notes});
/// The result data if search was successful.
PhrasePageScrapeResultData? data;
// ignore: public_member_api_docs
PhrasePageScrapeResult({
required this.found,
required this.query,
this.data,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {
'found': found,
'query': query,
'data': data,
};
}
/// Pronounciation audio.
class AudioFile {
/// The uri of the audio file.
String uri;
/// The mimetype of the audio.
String mimetype;
// ignore: public_member_api_docs
AudioFile({
required this.uri,
required this.mimetype,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {
'uri': uri,
'mimetype': mimetype,
};
}
/// The main scrape data class, collecting all the result information in one place.
class PhrasePageScrapeResultData {
/// The URI that these results were scraped from.
String uri;
/// Other forms of the search term.
List<String> tags;
/// Information about the meanings associated with this search result.
List<PhraseScrapeMeaning> meanings;
/// Tags associated with this search result.
List<KanjiKanaPair> otherForms;
/// Pronounciation of the search result.
List<AudioFile> audio;
/// Notes associated with the search result.
List<String> notes;
// ignore: public_member_api_docs
PhrasePageScrapeResultData({
required this.uri,
this.tags = const [],
this.meanings = const [],
this.otherForms = const [],
this.audio = const [],
this.notes = const [],
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {
'uri': uri,
'tags': tags,
'meanings': meanings,
'otherForms': otherForms,
'notes': notes
'audio': audio,
'notes': notes,
};
}
@@ -268,85 +447,157 @@ class PhrasePageScrapeResult {
/* searchForPhrase related classes */
/* -------------------------------------------------------------------------- */
/// Kanji/Furigana pair, or just kana as word.
///
/// Which field acts as kanji and/or kana might be unreliable, which is why both are nullable.
class JishoJapaneseWord {
String word;
String reading;
/// Usually kanji or kana.
String? word;
JishoJapaneseWord({this.word, this.reading});
/// Usually furigana, if applicable.
String? reading;
// ignore: public_member_api_docs
JishoJapaneseWord({
this.word,
this.reading,
});
// ignore: public_member_api_docs
factory JishoJapaneseWord.fromJson(Map<String, dynamic> json) {
return JishoJapaneseWord(
word: json['word'] as String, reading: json['reading'] as String);
word: json['word'] as String?,
reading: json['reading'] as String?,
);
}
Map<String, dynamic> toJson() => {'word': word, 'reading': reading};
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {
'word': word,
'reading': reading,
};
}
/// Relevant links of the search result.
class JishoSenseLink {
/// Description of the linked webpage.
String text;
/// Link to the webpage.
String url;
JishoSenseLink({this.text, this.url});
// ignore: public_member_api_docs
JishoSenseLink({required this.text, required this.url});
// ignore: public_member_api_docs
factory JishoSenseLink.fromJson(Map<String, dynamic> json) {
return JishoSenseLink(
text: json['text'] as String, url: json['url'] as String);
text: json['text'] as String,
url: json['url'] as String,
);
}
Map<String, dynamic> toJson() => {'text': text, 'url': url};
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {
'text': text,
'url': url,
};
}
/// Origin of the word (from other languages).
class JishoWordSource {
/// Origin language.
String language;
/// Origin word, if present.
String? word;
// ignore: public_member_api_docs
JishoWordSource({
required this.language,
this.word,
});
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {
'language:': language,
'word': word,
};
}
/// One sense of the word.
class JishoWordSense {
List<String> english_definitions;
List<String> parts_of_speech;
/// The meaning(s) of the word.
List<String> englishDefinitions;
/// Type of word (Noun, Verb, etc.).
List<String> partsOfSpeech;
/// Relevant links.
List<JishoSenseLink> links;
/// Relevant tags.
List<String> tags;
List<String> see_also;
/// Relevant words (might include synonyms).
List<String> seeAlso;
/// Words with opposite meaning.
List<String> antonyms;
List<dynamic> source;
/// Origins of the word (from other languages).
List<JishoWordSource> source;
/// Additional info.
List<String> info;
List<dynamic> restrictions;
JishoWordSense(
{this.english_definitions,
this.parts_of_speech,
this.links,
this.tags,
this.see_also,
this.antonyms,
this.source,
this.info,
this.restrictions});
/// Restrictions as to which variants of the japanese words are usable for this sense.
List<String> restrictions;
// ignore: public_member_api_docs
JishoWordSense({
required this.englishDefinitions,
required this.partsOfSpeech,
this.links = const [],
this.tags = const [],
this.seeAlso = const [],
this.antonyms = const [],
this.source = const [],
this.info = const [],
this.restrictions = const [],
});
// ignore: public_member_api_docs
factory JishoWordSense.fromJson(Map<String, dynamic> json) {
return JishoWordSense(
english_definitions: (json['english_definitions'] as List)
englishDefinitions: (json['english_definitions'] as List)
.map((result) => result as String)
.toList(),
parts_of_speech: (json['parts_of_speech'] as List)
partsOfSpeech: (json['parts_of_speech'] as List)
.map((result) => result as String)
.toList(),
links: (json['links'] as List)
.map((result) => JishoSenseLink.fromJson(result))
.toList(),
tags: (json['tags'] as List).map((result) => result as String).toList(),
see_also: (json['see_also'] as List)
seeAlso: (json['see_also'] as List)
.map((result) => result as String)
.toList(),
antonyms: (json['antonyms'] as List)
.map((result) => result as String)
.toList(),
source: json['source'] as List<dynamic>,
source: json['source'] as List<JishoWordSource>,
info: (json['info'] as List).map((result) => result as String).toList(),
restrictions: json['restrictions'] as List<dynamic>);
restrictions: json['restrictions'] as List<String>);
}
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {
'english_definitions': english_definitions,
'parts_of_speech': parts_of_speech,
'english_definitions': englishDefinitions,
'parts_of_speech': partsOfSpeech,
'links': links,
'tags': tags,
'see_also': see_also,
'see_also': seeAlso,
'antonyms': antonyms,
'source': source,
'info': info,
@@ -354,88 +605,143 @@ class JishoWordSense {
};
}
/// The original source(s) of the result.
class JishoAttribution {
/// Whether jmdict was a source.
bool jmdict;
/// Whether jmnedict was a source.
bool jmnedict;
String dbpedia;
JishoAttribution({this.jmdict, this.jmnedict, this.dbpedia});
/// Additional sources, if applicable.
String? dbpedia;
// ignore: public_member_api_docs
JishoAttribution({
required this.jmdict,
required this.jmnedict,
this.dbpedia,
});
// ignore: public_member_api_docs
factory JishoAttribution.fromJson(Map<String, dynamic> json) {
return JishoAttribution(
jmdict: (json['jmdict'].toString() == 'true'),
jmnedict: (json['jmnedict'].toString() == 'true'),
dbpedia: (json['dbpedia'].toString() != 'false')
? json['dbpedia'].toString()
: null);
jmdict: (json['jmdict'].toString() == 'true'),
jmnedict: (json['jmnedict'].toString() == 'true'),
dbpedia: (json['dbpedia'].toString() != 'false')
? json['dbpedia'].toString()
: null,
);
}
Map<String, dynamic> toJson() =>
{'jmdict': jmdict, 'jmnedict': jmnedict, 'dbpedia': dbpedia};
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {
'jmdict': jmdict,
'jmnedict': jmnedict,
'dbpedia': dbpedia,
};
}
/// The main API data class, collecting all information of one result in one place.
class JishoResult {
/// The main version of the word
///
/// This value might sometimes appear as some kind of hash or encoded version of the word.
/// Whenever it happens, the word usually originates taken from dbpedia
String slug;
bool is_common;
/// Whether the word is common.
///
/// Dbpedia sometimes omit this value.
bool? isCommon;
/// Related tags.
List<String> tags;
/// Relevant jlpt levels.
List<String> jlpt;
/// Japanese versions of the word.
List<JishoJapaneseWord> japanese;
/// Translations of the word.
List<JishoWordSense> senses;
/// Sources.
JishoAttribution attribution;
JishoResult(
{this.slug,
this.is_common,
this.tags,
this.jlpt,
this.japanese,
this.senses,
this.attribution});
// ignore: public_member_api_docs
JishoResult({
required this.slug,
required this.isCommon,
this.tags = const [],
this.jlpt = const [],
required this.japanese,
required this.senses,
required this.attribution,
});
// ignore: public_member_api_docs
factory JishoResult.fromJson(Map<String, dynamic> json) {
return JishoResult(
slug: json['slug'] as String,
is_common: json['is_common'] as bool,
tags: (json['tags'] as List).map((result) => result as String).toList(),
jlpt: (json['jlpt'] as List).map((result) => result as String).toList(),
japanese: (json['japanese'] as List)
.map((result) => JishoJapaneseWord.fromJson(result))
.toList(),
senses: (json['senses'] as List)
.map((result) => JishoWordSense.fromJson(result))
.toList(),
attribution: JishoAttribution.fromJson(json['attribution']));
slug: json['slug'] as String,
isCommon: json['is_common'] as bool?,
tags: (json['tags'] as List).map((result) => result as String).toList(),
jlpt: (json['jlpt'] as List).map((result) => result as String).toList(),
japanese: (json['japanese'] as List)
.map((result) => JishoJapaneseWord.fromJson(result))
.toList(),
senses: (json['senses'] as List)
.map((result) => JishoWordSense.fromJson(result))
.toList(),
attribution: JishoAttribution.fromJson(json['attribution']),
);
}
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {
'slug': slug,
'is_common': is_common,
'is_common': isCommon,
'tags': tags,
'jlpt': jlpt,
'japanese': japanese,
'senses': senses,
'attribution': attribution
'attribution': attribution,
};
}
/// Metadata with result status.
class JishoResultMeta {
/// HTTP status code.
int status;
JishoResultMeta({this.status});
// ignore: public_member_api_docs
JishoResultMeta({required this.status});
// ignore: public_member_api_docs
factory JishoResultMeta.fromJson(Map<String, dynamic> json) {
return JishoResultMeta(status: json['status'] as int);
}
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {'status': status};
}
/// The main API result wrapper containing whether it was successful, and potential results.
class JishoAPIResult {
/// Metadata with result status.
JishoResultMeta meta;
List<JishoResult> data;
JishoAPIResult({this.meta, this.data});
/// Results.
List<JishoResult>? data;
// ignore: public_member_api_docs
JishoAPIResult({
required this.meta,
this.data,
});
// ignore: public_member_api_docs
factory JishoAPIResult.fromJson(Map<String, dynamic> json) {
return JishoAPIResult(
meta: JishoResultMeta.fromJson(json['meta']),
@@ -444,5 +750,6 @@ class JishoAPIResult {
.toList());
}
// ignore: public_member_api_docs
Map<String, dynamic> toJson() => {'meta': meta.toJson(), 'data': data};
}

View File

@@ -1,157 +0,0 @@
import 'package:html/parser.dart';
import 'package:html/dom.dart';
import './exampleSearch.dart' show getPieces;
import './objects.dart';
List<String> _getTags(Document document) {
final List<String> tags = [];
final tagElements = document.querySelectorAll('.concept_light-tag');
for (var i = 0; i < tagElements.length; i += 1) {
final tagText = tagElements[i].text;
tags.add(tagText);
}
return tags;
}
List<String> _getMostRecentWordTypes(Element child) {
return child.text.split(',').map((s) => s.trim().toLowerCase()).toList();
}
List<KanjiKanaPair> _getOtherForms(Element child) {
return child.text
.split('')
.map((s) => s.replaceAll('', '').replaceAll('', '').split(' '))
.map((a) =>
(KanjiKanaPair(kanji: a[0], kana: (a.length == 2) ? a[1] : null)))
.toList();
}
List<String> _getNotes(Element child) => child.text.split('\n');
String _getMeaning(Element child) =>
child.querySelector('.meaning-meaning').text;
String _getMeaningAbstract(Element child) {
final meaningAbstract = child.querySelector('.meaning-abstract');
if (meaningAbstract == null) return null;
for (var element in meaningAbstract.querySelectorAll('a')) {
element.remove();
}
return child.querySelector('.meaning-abstract')?.text;
}
List<String> _getSupplemental(Element child) {
final supplemental = child.querySelector('.supplemental_info');
if (supplemental == null) return [];
return supplemental.text.split(',').map((s) => s.trim()).toList();
}
List<String> _getSeeAlsoTerms(List<String> supplemental) {
if (supplemental == null) return [];
final List<String> seeAlsoTerms = [];
for (var i = supplemental.length - 1; i >= 0; i -= 1) {
final supplementalEntry = supplemental[i];
if (supplementalEntry.startsWith('See also')) {
seeAlsoTerms.add(supplementalEntry.replaceAll('See also ', ''));
supplemental.removeAt(i);
}
}
return seeAlsoTerms;
}
List<PhraseScrapeSentence> _getSentences(Element child) {
final sentenceElements =
child.querySelector('.sentences')?.querySelectorAll('.sentence');
if (sentenceElements == null) return [];
final List<PhraseScrapeSentence> sentences = [];
for (var sentenceIndex = 0;
sentenceIndex < (sentenceElements?.length ?? 0);
sentenceIndex += 1) {
final sentenceElement = sentenceElements[sentenceIndex];
final english = sentenceElement.querySelector('.english').text;
final pieces = getPieces(sentenceElement);
sentenceElement.querySelector('.english').remove();
for (var element in sentenceElement.children[0].children) {
element.querySelector('.furigana')?.remove();
}
final japanese = sentenceElement.text;
sentences.add(PhraseScrapeSentence(
english: english, japanese: japanese, pieces: pieces ?? []));
}
return sentences;
}
PhrasePageScrapeResult _getMeaningsOtherFormsAndNotes(Document document) {
final returnValues = PhrasePageScrapeResult(otherForms: [], notes: []);
final meaningsWrapper = document.querySelector('.meanings-wrapper');
if (meaningsWrapper == null) return PhrasePageScrapeResult(found: false);
returnValues.found = true;
final meaningsChildren = meaningsWrapper.children;
final List<PhraseScrapeMeaning> meanings = [];
var mostRecentWordTypes = [];
for (var meaningIndex = 0;
meaningIndex < meaningsChildren.length;
meaningIndex += 1) {
final child = meaningsChildren[meaningIndex];
if (child.className.contains('meaning-tags')) {
mostRecentWordTypes = _getMostRecentWordTypes(child);
} else if (mostRecentWordTypes[0] == 'other forms') {
returnValues.otherForms = _getOtherForms(child);
} else if (mostRecentWordTypes[0] == 'notes') {
returnValues.notes = _getNotes(child);
} else {
final meaning = _getMeaning(child);
final meaningAbstract = _getMeaningAbstract(child);
final supplemental = _getSupplemental(child);
final seeAlsoTerms = _getSeeAlsoTerms(supplemental);
final sentences = _getSentences(child);
meanings.add(PhraseScrapeMeaning(
seeAlsoTerms: seeAlsoTerms ?? [],
sentences: sentences ?? [],
definition: meaning,
supplemental: supplemental ?? [],
definitionAbstract: meaningAbstract,
tags: mostRecentWordTypes ?? [],
));
}
}
returnValues.meanings = meanings;
return returnValues;
}
/// Provides the URI for a phrase scrape
String uriForPhraseScrape(String searchTerm) {
return 'https://jisho.org/word/${Uri.encodeComponent(searchTerm)}';
}
/// Parses a jisho word search page to an object
PhrasePageScrapeResult parsePhrasePageData(String pageHtml, String query) {
final document = parse(pageHtml);
final result = _getMeaningsOtherFormsAndNotes(document);
result.query = query;
if (!result.found) return result;
result.uri = uriForPhraseScrape(query);
result.tags = _getTags(document);
return result;
}

View File

@@ -1,6 +0,0 @@
import './base_uri.dart';
/// Provides the URI for a phrase search
String uriForPhraseSearch(String phrase) {
return '$JISHO_API?keyword=${Uri.encodeComponent(phrase)}';
}

202
lib/src/phrase_scrape.dart Normal file
View File

@@ -0,0 +1,202 @@
import 'package:html/dom.dart';
import 'package:html/parser.dart';
import './example_search.dart' show getPieces;
import './objects.dart';
import './scraping.dart';
List<String> _getTags(Document document) {
final tagElements = document.querySelectorAll('.concept_light-tag');
final tags = tagElements.map((tagElement) => tagElement.text).toList();
return tags;
}
List<String> _getMostRecentWordTypes(Element child) {
return child.text.split(',').map((s) => s.trim().toLowerCase()).toList();
}
List<KanjiKanaPair> _getOtherForms(Element child) {
return child.text
.split('')
.map((s) => s.replaceAll('', '').replaceAll('', '').split(' '))
.map((a) => (KanjiKanaPair(
kanji: a[0],
kana: (a.length == 2) ? a[1] : null,
)))
.toList();
}
List<String> _getNotes(Element child) => child.text.split('\n');
String _getMeaningString(Element child) {
final meaning = assertNotNull(
variable: child.querySelector('.meaning-meaning')?.text,
errorMessage:
"Could not parse meaning div. Is the provided document corrupt, or has Jisho been updated?",
);
return meaning;
}
String? _getMeaningAbstract(Element child) {
final meaningAbstract = child.querySelector('.meaning-abstract');
if (meaningAbstract == null) return null;
for (var element in meaningAbstract.querySelectorAll('a')) {
element.remove();
}
return child.querySelector('.meaning-abstract')?.text;
}
List<String> _getSupplemental(Element child) {
final supplemental = child.querySelector('.supplemental_info');
if (supplemental == null) return [];
return supplemental.text.split(',').map((s) => s.trim()).toList();
}
List<String> _getSeeAlsoTerms(List<String> supplemental) {
// if (supplemental == null) return [];
final seeAlsoTerms = <String>[];
for (var i = supplemental.length - 1; i >= 0; i -= 1) {
final supplementalEntry = supplemental[i];
if (supplementalEntry.startsWith('See also')) {
seeAlsoTerms.add(supplementalEntry.replaceAll('See also ', ''));
supplemental.removeAt(i);
}
}
return seeAlsoTerms;
}
PhraseScrapeSentence _getSentence(Element sentenceElement) {
final english = assertNotNull(
variable: sentenceElement.querySelector('.english')?.text,
errorMessage:
'Could not parse sentence translation. Is the provided document corrupt, or has Jisho been updated?',
);
final pieces = getPieces(sentenceElement);
sentenceElement.querySelector('.english')?.remove();
for (var element in sentenceElement.children[0].children) {
element.querySelector('.furigana')?.remove();
}
final japanese = sentenceElement.text;
return PhraseScrapeSentence(
english: english,
japanese: japanese,
pieces: pieces,
);
}
List<PhraseScrapeSentence> _getSentences(Element child) {
final sentenceElements =
child.querySelector('.sentences')?.querySelectorAll('.sentence');
if (sentenceElements == null) return [];
return sentenceElements.map(_getSentence).toList();
}
PhraseScrapeMeaning _getMeaning(Element child) {
final meaning = _getMeaningString(child);
final meaningAbstract = _getMeaningAbstract(child);
final supplemental = _getSupplemental(child);
final seeAlsoTerms = _getSeeAlsoTerms(supplemental);
final sentences = _getSentences(child);
return PhraseScrapeMeaning(
seeAlsoTerms: seeAlsoTerms,
sentences: sentences,
definition: meaning,
supplemental: supplemental,
definitionAbstract: meaningAbstract,
// tags: mostRecentWordTypes ?? [],
);
}
List<AudioFile> _getAudio(Document document) {
return document
.querySelector('.concept_light-status')
?.querySelectorAll('audio > source')
.map((element) {
final src = assertNotNull(
variable: element.attributes["src"],
errorMessage:
'Could not parse audio source. Is the provided document corrupt, or has Jisho been updated?',
);
final type = assertNotNull(
variable: element.attributes['type'],
errorMessage:
'Could not parse audio type. Is the provided document corrupt, or has Jisho been updated?',
);
return AudioFile(
uri: 'https:$src',
mimetype: type,
);
}).toList() ??
[];
}
/// Provides the URI for a phrase scrape
Uri uriForPhraseScrape(String searchTerm) {
return Uri.parse('https://jisho.org/word/${Uri.encodeComponent(searchTerm)}');
}
PhrasePageScrapeResultData _getMeaningsOtherFormsAndNotes(
String query, Document document) {
final meaningsWrapper = assertNotNull(
variable: document.querySelector('.meanings-wrapper'),
errorMessage:
"Could not parse meanings. Is the provided document corrupt, or has Jisho been updated?",
);
final meanings = <PhraseScrapeMeaning>[];
var mostRecentWordTypes = [];
var otherForms;
var notes;
for (var child in meaningsWrapper.children) {
if (child.className.contains('meaning-tags')) {
mostRecentWordTypes = _getMostRecentWordTypes(child);
} else if (mostRecentWordTypes[0] == 'other forms') {
otherForms = _getOtherForms(child);
} else if (mostRecentWordTypes[0] == 'notes') {
notes = _getNotes(child);
} else {
meanings.add(_getMeaning(child));
}
}
return PhrasePageScrapeResultData(
uri: uriForPhraseScrape(query).toString(),
tags: _getTags(document),
meanings: meanings,
otherForms: otherForms ?? [],
audio: _getAudio(document),
notes: notes ?? [],
);
}
bool _resultWasFound(Document document) {
return document.querySelector('.meanings-wrapper') != null;
}
/// Parses a jisho word search page to an object
PhrasePageScrapeResult parsePhrasePageData(String pageHtml, String query) {
final document = parse(pageHtml);
if (!_resultWasFound(document)) {
return PhrasePageScrapeResult(found: false, query: query);
}
final data = _getMeaningsOtherFormsAndNotes(query, document);
return PhrasePageScrapeResult(
found: true,
query: query,
data: data,
);
}

View File

@@ -0,0 +1,6 @@
import './base_uri.dart';
/// Provides the URI for a phrase search
Uri uriForPhraseSearch(String phrase) {
return Uri.parse('$jishoApi?keyword=${Uri.encodeComponent(phrase)}');
}

75
lib/src/scraping.dart Normal file
View File

@@ -0,0 +1,75 @@
/// Remove all newlines from a string
String removeNewlines(String str) {
return str.replaceAll(RegExp(r'(?:\r|\n)'), '').trim();
}
/// Remove alltext between two positions, and remove all newlines
String getStringBetweenIndicies(String data, int startIndex, int endIndex) {
final result = data.substring(startIndex, endIndex);
return removeNewlines(result).trim();
}
/// Try to find a string between two pieces of text
String? getStringBetweenStrings(
String data,
String startString,
String endString,
) {
final regex = RegExp(
'${RegExp.escape(startString)}(.*?)${RegExp.escape(endString)}',
dotAll: true,
);
final match = regex.allMatches(data).toList();
return match.isNotEmpty ? match[0].group(1).toString() : null;
}
/// Try to find an int inbetween two pieces of text
int? getIntBetweenStrings(
String data,
String startString,
String endString,
) {
final stringBetweenStrings =
getStringBetweenStrings(data, startString, endString);
return stringBetweenStrings != null ? int.parse(stringBetweenStrings) : null;
}
/// Get all regex matches
List<String> getAllGlobalGroupMatches(String str, RegExp regex) {
final regexResults = regex.allMatches(str).toList();
final results = <String>[];
for (var match in regexResults) {
final m = match.group(1);
if (m != null) results.add(m);
}
return results;
}
/// Get all matches of `<a>DATA</a>`
List<String> parseAnchorsToArray(String str) {
final regex = RegExp(r'<a href=".*?">(.*?)<\/a>');
return getAllGlobalGroupMatches(str, regex);
}
/// An exception to be thrown whenever a parser fails by not finding an expected pattern.
class ParserException implements Exception {
/// The error message to report
final String message;
// ignore: public_member_api_docs
const ParserException(this.message);
}
/// Throw a `ParserException` if variable is null
dynamic assertNotNull({
required dynamic variable,
String errorMessage =
"Unexpected null-value occured. Is the provided document corrupt, or has Jisho been updated?",
}) {
if (variable == null) {
throw ParserException(errorMessage);
}
return variable!;
}