Update code
This commit is contained in:
parent
510b27b802
commit
55390c0d0f
|
@ -334,7 +334,7 @@ ExampleResults parseExamplePageData(String pageHtml, String phrase) {
|
||||||
/* PHRASE SCRAPE FUNCTIONS START */
|
/* PHRASE SCRAPE FUNCTIONS START */
|
||||||
|
|
||||||
List<String> getTags(Document document) {
|
List<String> getTags(Document document) {
|
||||||
final tags = [];
|
final List<String> tags = [];
|
||||||
final tagElements = document.querySelectorAll('.concept_light-tag');
|
final tagElements = document.querySelectorAll('.concept_light-tag');
|
||||||
|
|
||||||
for (var i = 0; i < tagElements.length; i += 1) {
|
for (var i = 0; i < tagElements.length; i += 1) {
|
||||||
|
@ -345,6 +345,37 @@ List<String> getTags(Document document) {
|
||||||
return tags;
|
return tags;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
List<String> getSeeAlsoTerms(List<String> supplemental) {
|
||||||
|
final List<String> seeAlsoTerms = [];
|
||||||
|
for (var i = supplemental.length - 1; i >= 0; i -= 1) {
|
||||||
|
final supplementalEntry = supplemental[i];
|
||||||
|
if (supplementalEntry.startsWith('See also')) {
|
||||||
|
seeAlsoTerms.add(supplementalEntry.replaceAll('See also ', ''));
|
||||||
|
supplemental.removeAt(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return seeAlsoTerms;
|
||||||
|
}
|
||||||
|
|
||||||
|
List<PhraseScrapeSentence> getSentences(sentenceElements) {
|
||||||
|
final List<PhraseScrapeSentence> sentences = [];
|
||||||
|
|
||||||
|
for (var sentenceIndex = 0; sentenceIndex < (sentenceElements?.length ?? 0); sentenceIndex += 1) {
|
||||||
|
final sentenceElement = sentenceElements[sentenceIndex];
|
||||||
|
|
||||||
|
final english = sentenceElement.querySelector('.english').text;
|
||||||
|
final pieces = getPieces(sentenceElement);
|
||||||
|
|
||||||
|
sentenceElement.querySelector('.english')?.remove();
|
||||||
|
sentenceElement.querySelector('.furigana')?.remove();
|
||||||
|
final japanese = sentenceElement.text;
|
||||||
|
|
||||||
|
sentences.add(PhraseScrapeSentence(english: english, japanese: japanese, pieces: pieces));
|
||||||
|
}
|
||||||
|
|
||||||
|
return sentences;
|
||||||
|
}
|
||||||
|
|
||||||
PhrasePageScrapeResult getMeaningsOtherFormsAndNotes(Document document) {
|
PhrasePageScrapeResult getMeaningsOtherFormsAndNotes(Document document) {
|
||||||
final returnValues = PhrasePageScrapeResult( otherForms: [], notes: [] );
|
final returnValues = PhrasePageScrapeResult( otherForms: [], notes: [] );
|
||||||
|
|
||||||
|
@ -352,7 +383,7 @@ PhrasePageScrapeResult getMeaningsOtherFormsAndNotes(Document document) {
|
||||||
final meaningsWrapper = document.querySelector('.meanings-wrapper');
|
final meaningsWrapper = document.querySelector('.meanings-wrapper');
|
||||||
|
|
||||||
final meaningsChildren = meaningsWrapper.children;
|
final meaningsChildren = meaningsWrapper.children;
|
||||||
final meanings = [];
|
final List<PhraseScrapeMeaning> meanings = [];
|
||||||
|
|
||||||
var mostRecentWordTypes = [];
|
var mostRecentWordTypes = [];
|
||||||
for (var meaningIndex = 0; meaningIndex < meaningsChildren.length; meaningIndex += 1) {
|
for (var meaningIndex = 0; meaningIndex < meaningsChildren.length; meaningIndex += 1) {
|
||||||
|
@ -360,48 +391,22 @@ PhrasePageScrapeResult getMeaningsOtherFormsAndNotes(Document document) {
|
||||||
if (child.className.contains('meaning-tags')) {
|
if (child.className.contains('meaning-tags')) {
|
||||||
mostRecentWordTypes = child.text.split(',').map((s) => s.trim().toLowerCase()).toList();
|
mostRecentWordTypes = child.text.split(',').map((s) => s.trim().toLowerCase()).toList();
|
||||||
} else if (mostRecentWordTypes[0] == 'other forms') {
|
} else if (mostRecentWordTypes[0] == 'other forms') {
|
||||||
|
|
||||||
returnValues.otherForms = child.text.split('、')
|
returnValues.otherForms = child.text.split('、')
|
||||||
.map((s) => s.replaceAll('【', '').replaceAll('】', '').split(' '))
|
.map((s) => s.replaceAll('【', '').replaceAll('】', '').split(' '))
|
||||||
.map((a) => (KanjiKanaPair( kanji: a[0], kana: a[1] )));
|
.map((a) => (KanjiKanaPair( kanji: a[0], kana: (a.length == 2) ? a[1] : null ))).toList();
|
||||||
|
|
||||||
} else if (mostRecentWordTypes[0] == 'notes') {
|
} else if (mostRecentWordTypes[0] == 'notes') {
|
||||||
returnValues.notes = child.text.split('\n');
|
returnValues.notes = child.text.split('\n');
|
||||||
} else {
|
} else {
|
||||||
final meaning = child.querySelector('.meaning-meaning').text;
|
final meaning = child.querySelector('.meaning-meaning').text;
|
||||||
child.querySelector('.meaning-abstract')
|
child.querySelector('.meaning-abstract')?.querySelector('a')?.remove();
|
||||||
.querySelector('a')
|
final meaningAbstract = child.querySelector('.meaning-abstract')?.text;
|
||||||
.remove();
|
|
||||||
final meaningAbstract = child.querySelector('.meaning-abstract').text;
|
|
||||||
|
|
||||||
final supplemental = child.querySelector('.supplemental_info').text.split(',')
|
final supplemental = child.querySelector('.supplemental_info')?.text?.split(',')?.map((s) => s.trim())?.toList();
|
||||||
.map((s) => s.trim())
|
final seeAlsoTerms = (supplemental != null) ? getSeeAlsoTerms(supplemental) : null;
|
||||||
.toList();
|
|
||||||
|
|
||||||
final seeAlsoTerms = [];
|
final sentenceElements = child.querySelector('.sentences')?.querySelectorAll('.sentence');
|
||||||
for (var i = supplemental.length - 1; i >= 0; i -= 1) {
|
final sentences = (sentenceElements != null) ? getSentences(sentenceElements) : null;
|
||||||
final supplementalEntry = supplemental[i];
|
|
||||||
if (supplementalEntry.startsWith('See also')) {
|
|
||||||
seeAlsoTerms.add(supplementalEntry.replaceAll('See also ', ''));
|
|
||||||
supplemental.removeAt(i);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
final sentences = [];
|
|
||||||
final sentenceElements = child.querySelector('.sentences').querySelectorAll('.sentence');
|
|
||||||
|
|
||||||
for (var sentenceIndex = 0; sentenceIndex < sentenceElements.length; sentenceIndex += 1) {
|
|
||||||
final sentenceElement = sentenceElements[sentenceIndex];
|
|
||||||
|
|
||||||
final english = sentenceElement.querySelector('.english').text;
|
|
||||||
final pieces = getPieces(sentenceElement);
|
|
||||||
|
|
||||||
sentenceElement.querySelector('.english').remove();
|
|
||||||
sentenceElement.querySelector('.furigana').remove();
|
|
||||||
final japanese = sentenceElement.text;
|
|
||||||
|
|
||||||
sentences.add(PhraseScrapeSentence(english: english, japanese: japanese, pieces: pieces));
|
|
||||||
}
|
|
||||||
|
|
||||||
meanings.add(PhraseScrapeMeaning(
|
meanings.add(PhraseScrapeMeaning(
|
||||||
seeAlsoTerms: seeAlsoTerms,
|
seeAlsoTerms: seeAlsoTerms,
|
||||||
|
@ -465,19 +470,19 @@ class JishoApi {
|
||||||
/// @async
|
/// @async
|
||||||
Future<PhrasePageScrapeResult> scrapeForPhrase(String phrase) async {
|
Future<PhrasePageScrapeResult> scrapeForPhrase(String phrase) async {
|
||||||
final uri = uriForPhraseScrape(phrase);
|
final uri = uriForPhraseScrape(phrase);
|
||||||
try {
|
// try {
|
||||||
final response = await http.get(uri);
|
final response = await http.get(uri);
|
||||||
return parsePhrasePageData(response.body, phrase);
|
return parsePhrasePageData(response.body, phrase);
|
||||||
} catch (err) {
|
// } catch (err) {
|
||||||
if (err.response.status == 404) {
|
// // if (err.response?.status == 404) {
|
||||||
return PhrasePageScrapeResult(
|
// // return PhrasePageScrapeResult(
|
||||||
query: phrase,
|
// // query: phrase,
|
||||||
found: false,
|
// // found: false,
|
||||||
);
|
// // );
|
||||||
}
|
// // }
|
||||||
|
|
||||||
throw err;
|
// throw err;
|
||||||
}
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Scrape Jisho.org for information about a kanji character.
|
/// Scrape Jisho.org for information about a kanji character.
|
||||||
|
|
Loading…
Reference in New Issue