diff --git a/lib/src/objects.dart b/lib/src/objects.dart index ccce5ac..5efb7e8 100644 --- a/lib/src/objects.dart +++ b/lib/src/objects.dart @@ -36,11 +36,24 @@ class PhraseScrapeMeaning { } } +class KanjiKanaPair { + String kanji; + String kana; + + KanjiKanaPair({ + String kanji, + String kana + }){ + this.kanji = kanji; + this.kana = kana; + } +} + class PhrasePageScrapeResult { bool found; String query; String uri; - List otherForms; + List otherForms; List meanings; List tags; List notes; @@ -49,7 +62,7 @@ class PhrasePageScrapeResult { bool found, String query, String uri, - List otherForms, + List otherForms, List meanings, List tags, List notes, diff --git a/lib/src/unofficial_jisho_api_base.dart b/lib/src/unofficial_jisho_api_base.dart index fdc9f55..58b408d 100644 --- a/lib/src/unofficial_jisho_api_base.dart +++ b/lib/src/unofficial_jisho_api_base.dart @@ -1,8 +1,8 @@ import 'package:unofficial_jisho_api/src/objects.dart'; import 'package:http/http.dart' as http; -import 'package:xml/xml.dart' as xml; import 'package:html_unescape/html_unescape.dart' as html_entities; import 'dart:convert'; +import 'dart:html'; final htmlUnescape = html_entities.HtmlUnescape(); @@ -230,33 +230,35 @@ KanjiResult parseKanjiPageData(String pageHtml, String kanji) { /* EXAMPLE SEARCH FUNCTIONS START */ -RegExp kanjiRegex = RegExp(r'[\u4e00-\u9faf\u3400-\u4dbf]'); +final RegExp kanjiRegex = RegExp(r'[\u4e00-\u9faf\u3400-\u4dbf]'); String uriForExampleSearch(String phrase) { return '${SCRAPE_BASE_URI}${Uri.encodeComponent(phrase)}%23sentences'; } -ExampleResultData getKanjiAndKana(xml.XmlNode div) { - final ul = div.find('ul').eq(0); - final contents = ul.contents(); +ExampleResultData getKanjiAndKana(Element div) { + final ul = div.querySelector('ul'); + final contents = ul.children; + var kanji = ''; var kana = ''; for (var i = 0; i < contents.length; i += 1) { - final content = contents.eq(i); - if (content[0].name == 'li') { + final content = contents[i]; + if (content.tagName == 'li') { final li = content; - final furigana = li.find('.furigana').text(); - final unlifted = li.find('.unlinked').text(); + final furigana = li.querySelector('.furigana').text; + final unlifted = li.querySelector('.unlinked').text; - if (furigana) { + if (furigana != null) { kanji += unlifted; kana += furigana; final kanaEnding = []; for (var j = unlifted.length - 1; j > 0; j -= 1) { - if (!unlifted[j].match(kanjiRegex)) { - kanaEnding.add(unlifted[j]); + final char = unlifted[j]; + if (!kanjiRegex.hasMatch(char)) { + kanaEnding.add(char); } else { break; } @@ -268,8 +270,8 @@ ExampleResultData getKanjiAndKana(xml.XmlNode div) { kana += unlifted; } } else { - final text = content.text().trim(); - if (text) { + final text = content.text.trim(); + if (text != null) { kanji += text; kana += text; } @@ -282,31 +284,32 @@ ExampleResultData getKanjiAndKana(xml.XmlNode div) { ); } -List getPieces(xml.XmlNode sentenceElement) { - final pieceElements = sentenceElement.find('li.clearfix'); +List getPieces(Element sentenceElement) { + final pieceElements = sentenceElement.querySelectorAll('li.clearfix'); final pieces = []; for (var pieceIndex = 0; pieceIndex < pieceElements.length; pieceIndex += 1) { - final pieceElement = pieceElements.eq(pieceIndex); + final pieceElement = pieceElements[pieceIndex]; pieces.add(ExampleSentencePiece( - lifted: pieceElement.children('.furigana').text(), - unlifted: pieceElement.children('.unlinked').text(), + lifted: pieceElement.querySelector('.furigana').text, + unlifted: pieceElement.querySelector('.unlinked').text, )); } return pieces; } -ExampleResultData parseExampleDiv(xml.XmlNode div) { +ExampleResultData parseExampleDiv(Element div) { final result = getKanjiAndKana(div); - result.english = div.find('.english').text(); + result.english = div.querySelector('.english').text; result.pieces = getPieces(div); return result; } ExampleResults parseExamplePageData(String pageHtml, String phrase) { - final document = xml.parse(pageHtml); - final divs = document.descendants.where((node) => node.attributes[0].value == 'sentence_content').toList(); + final parser = DomParser(); + final document = parser.parseFromString(pageHtml, 'text/html'); + final divs = document.querySelectorAll('.sentence_content'); final results = divs.map((div) => parseExampleDiv(div)); @@ -323,77 +326,72 @@ ExampleResults parseExamplePageData(String pageHtml, String phrase) { /* PHRASE SCRAPE FUNCTIONS START */ -List getTags(xml.XmlDocument document) { +List getTags(Document document) { final tags = []; - final tagElements = document.descendants.where((node) => node.attributes[0].value == 'concept_light-tag').toList(); + final tagElements = document.querySelectorAll('.concept_light-tag'); for (var i = 0; i < tagElements.length; i += 1) { - final tagText = tagElements.eq(i).text(); + final tagText = tagElements[i].text; tags.add(tagText); } return tags; } -PhrasePageScrapeResult getMeaningsOtherFormsAndNotes(xml.XmlDocument document) { +PhrasePageScrapeResult getMeaningsOtherFormsAndNotes(Document document) { final returnValues = PhrasePageScrapeResult( otherForms: [], notes: [] ); - //TODO: Fix // const meaningsWrapper = $('#page_container > div > div > article > div > div.concept_light-meanings.medium-9.columns > div'); - final meaningsWrapper = document.descendants.where((node) => node.attributes[0].value == 'page_container').toList(); + final meaningsWrapper = document.querySelector('.meanings-wrapper'); - - - final meaningsChildren = meaningsWrapper.children(); + final meaningsChildren = meaningsWrapper.children; final meanings = []; var mostRecentWordTypes = []; for (var meaningIndex = 0; meaningIndex < meaningsChildren.length; meaningIndex += 1) { - final child = meaningsChildren.eq(meaningIndex); - if (child.hasClass('meaning-tags')) { - mostRecentWordTypes = child.text().split(',').map((s) => s.trim().toLowerCase()); + final child = meaningsChildren[meaningIndex]; + if (child.className.contains('meaning-tags')) { + mostRecentWordTypes = child.text.split(',').map((s) => s.trim().toLowerCase()).toList(); } else if (mostRecentWordTypes[0] == 'other forms') { - returnValues.otherForms = child.text().split('、') - .map((s) => s.replaceAll('【', '').replaceAll('】', '').split(' ')) - .map((a) => (ExampleResultData( kanji: a[0], kana: a[1] ))); - } else if (mostRecentWordTypes[0] == 'notes') { - returnValues.notes = child.text().split('\n'); - } else { - final meaning = child.find('.meaning-meaning').text(); - final meaningAbstract = child.find('.meaning-abstract') - .find('a') - .remove() - .end() - .text(); - final supplemental = child.find('.supplemental_info').text().split(',') + returnValues.otherForms = child.text.split('、') + .map((s) => s.replaceAll('【', '').replaceAll('】', '').split(' ')) + .map((a) => (KanjiKanaPair( kanji: a[0], kana: a[1] ))); + + } else if (mostRecentWordTypes[0] == 'notes') { + returnValues.notes = child.text.split('\n'); + } else { + final meaning = child.querySelector('.meaning-meaning').text; + child.querySelector('.meaning-abstract') + .querySelector('a') + .remove(); + final meaningAbstract = child.querySelector('.meaning-abstract').text; + + final supplemental = child.querySelector('.supplemental_info').text.split(',') .map((s) => s.trim()) - .filter((s) => s); + .toList(); final seeAlsoTerms = []; for (var i = supplemental.length - 1; i >= 0; i -= 1) { final supplementalEntry = supplemental[i]; if (supplementalEntry.startsWith('See also')) { seeAlsoTerms.add(supplementalEntry.replaceAll('See also ', '')); - supplemental.splice(i, 1); + supplemental.removeAt(i); } } final sentences = []; - final sentenceElements = child.find('.sentences').children('.sentence'); + final sentenceElements = child.querySelector('.sentences').querySelectorAll('.sentence'); for (var sentenceIndex = 0; sentenceIndex < sentenceElements.length; sentenceIndex += 1) { - final sentenceElement = sentenceElements.eq(sentenceIndex); + final sentenceElement = sentenceElements[sentenceIndex]; - final english = sentenceElement.find('.english').text(); + final english = sentenceElement.querySelector('.english').text; final pieces = getPieces(sentenceElement); - final japanese = sentenceElement - .find('.english').remove().end() - .find('.furigana') - .remove() - .end() - .text(); + sentenceElement.querySelector('.english').remove(); + sentenceElement.querySelector('.furigana').remove(); + final japanese = sentenceElement.text; sentences.add(PhraseScrapeSentence(english: english, japanese: japanese, pieces: pieces)); } @@ -419,7 +417,8 @@ String uriForPhraseScrape(String searchTerm) { } PhrasePageScrapeResult parsePhrasePageData(String pageHtml, String query) { - final document = xml.parse(pageHtml); + final parser = DomParser(); + final document = parser.parseFromString(pageHtml, 'text/html'); final result = getMeaningsOtherFormsAndNotes(document); result.found = true; diff --git a/test/unofficial_jisho_api_test.dart b/test/unofficial_jisho_api_test.dart index fd570b0..8566fa3 100644 --- a/test/unofficial_jisho_api_test.dart +++ b/test/unofficial_jisho_api_test.dart @@ -14,29 +14,13 @@ List getFilePaths(String dirname) { return filenames.map((filename) => path.join(currentdir, 'test', dirname, filename.path)).toList(); } -void runTestCases(List testCaseFiles, String apiFunction) async { +void runTestCases(List testCaseFiles, Function apiFunction) async { for (var testCount = 0; testCount < testCaseFiles.length; testCount++) { final file = await File(testCaseFiles[testCount]).readAsString(); final testCase = jsonDecode(file); await test('Test ${testCount}', () async { - switch(apiFunction) { - case 'searchForKanji': { - final result = await jisho.searchForKanji(testCase['query']); - expect(result.toJson(), testCase['expectedResult']); - break; - } - case 'searchForExamples': { - final result = await jisho.searchForExamples(testCase['query']); - expect(result, testCase['expectedResult']); - break; - } - case 'scrapeForPhrase': { - final result = await jisho.scrapeForPhrase(testCase['query']); - expect(result, testCase['expectedResult']); - break; - } - throw 'No API function provided'; - } + final result = await apiFunction(testCase['query']); + expect(result, testCase['expectedResult']); }); } } @@ -45,10 +29,8 @@ void main() async { await test_local_functions(); - await runTestCases(getFilePaths('kanji_test_cases'), 'searchForKanji'); - - await runTestCases(getFilePaths('example_test_cases'), 'searchForExamples'); - - await runTestCases(getFilePaths('phrase_scrape_test_cases'), 'scrapeForPhrase'); + await runTestCases(getFilePaths('kanji_test_cases'), jisho.searchForKanji); + await runTestCases(getFilePaths('example_test_cases'), jisho.searchForExamples); + await runTestCases(getFilePaths('phrase_scrape_test_cases'), jisho.scrapeForPhrase); }