1
0
mirror of https://github.com/h7x4/unofficial_jisho_api_dart.git synced 2025-09-21 04:55:56 +02:00

Split into files

This commit is contained in:
2020-06-22 15:18:48 +02:00
parent 39c2eed084
commit 86b8ccfdbb
8 changed files with 488 additions and 779 deletions

108
lib/src/exampleSearch.dart Normal file
View File

@@ -0,0 +1,108 @@
import './baseURI.dart';
import './objects.dart';
import 'package:html/parser.dart';
import 'package:html/dom.dart';
final RegExp kanjiRegex = RegExp(r'[\u4e00-\u9faf\u3400-\u4dbf]');
String uriForExampleSearch(String phrase) {
return '${SCRAPE_BASE_URI}${Uri.encodeComponent(phrase)}%23sentences';
}
/* TODO: This is the wrong approach.
* Symbols such as 、「」。 are missing in mid sentence
* Maybe also JP fullwidth numbers?
*/
String getEndSymbolsOfExampleSentence(Element ul) {
final endSymbols = RegExp(r'<\/li>([^<>]+)$');
return endSymbols.firstMatch(ul.innerHtml).group(1);
}
ExampleResultData getKanjiAndKana(Element div) {
final ul = div.querySelector('ul');
final contents = ul.children;
var kanji = '';
var kana = '';
for (var i = 0; i < contents.length; i += 1) {
final content = contents[i];
if (content.localName == 'li') {
final li = content;
final furigana = li.querySelector('.furigana')?.text;
final unlifted = li.querySelector('.unlinked')?.text;
if (furigana != null) {
kanji += unlifted;
kana += furigana;
final kanaEnding = [];
for (var j = unlifted.length - 1; j > 0; j -= 1) {
final char = unlifted[j];
if (!kanjiRegex.hasMatch(char)) {
kanaEnding.add(char);
} else {
break;
}
}
kana += kanaEnding.reversed.join('');
} else {
kanji += unlifted;
kana += unlifted;
}
} else {
final text = content.text.trim();
if (text != null) {
kanji += text;
kana += text;
}
}
}
final endSymbols = getEndSymbolsOfExampleSentence(ul).trim();
kanji+= endSymbols;
kana += endSymbols;
return ExampleResultData(
kanji: kanji,
kana: kana,
);
}
List<ExampleSentencePiece> getPieces(Element sentenceElement) {
final pieceElements = sentenceElement.querySelectorAll('li.clearfix');
final List<ExampleSentencePiece> pieces = [];
for (var pieceIndex = 0; pieceIndex < pieceElements.length; pieceIndex += 1) {
final pieceElement = pieceElements[pieceIndex];
pieces.add(ExampleSentencePiece(
lifted: pieceElement.querySelector('.furigana')?.text,
unlifted: pieceElement.querySelector('.unlinked')?.text,
));
}
return pieces;
}
ExampleResultData parseExampleDiv(Element div) {
final result = getKanjiAndKana(div);
result.english = div.querySelector('.english').text;
result.pieces = getPieces(div) ?? [];
return result;
}
ExampleResults parseExamplePageData(String pageHtml, String phrase) {
final document = parse(pageHtml);
final divs = document.querySelectorAll('.sentence_content');
final results = divs.map((div) => parseExampleDiv(div)).toList();
return ExampleResults(
query: phrase,
found: results.isNotEmpty,
results: results ?? [],
uri: uriForExampleSearch(phrase),
phrase: phrase,
);
}