From 8701d50e640cdd726b040a9b485dd0ea6ea031ac Mon Sep 17 00:00:00 2001 From: h7x4 Date: Wed, 27 May 2020 17:28:01 +0200 Subject: [PATCH] Add data buffering --- .gitignore | 5 +++++ Makefile | 1 + jishoScrape/index.js | 23 ++++++++++++++++++----- jishoScrape/src/dataFetching.js | 18 +++++++++++++++--- 4 files changed, 39 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index b367146..8fdd0d6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,10 +1,15 @@ +#LaTeX output files main.out main.log main.aux main.synctex.gz main.toc +#Data to be reprocessed data/testing/ data/pages/ data/tables/ +data/jisho + +#Libraries jishoScrape/node_modules \ No newline at end of file diff --git a/Makefile b/Makefile index 0c475cd..85aa5b5 100644 --- a/Makefile +++ b/Makefile @@ -24,6 +24,7 @@ folders: mkdir data/pages mkdir data/tables mkdir data/testing + mkdir data/jisho cd jishoScrape npm install cd .. diff --git a/jishoScrape/index.js b/jishoScrape/index.js index 463d160..98edbb0 100644 --- a/jishoScrape/index.js +++ b/jishoScrape/index.js @@ -1,7 +1,7 @@ const fs = require('fs'); /* Import local files */ -const {fetchKanjiFromTxt, fetchKanjiFromJisho} = require('./src/dataFetching.js'); +const {fetchKanjiFromTxt, fetchJishoBufferData, fetchKanjiFromJisho} = require('./src/dataFetching.js'); const {getKanjiTexData} = require('./src/texConversion.js'); const {kanjiTable} = require('./src/kanjiTables.js'); @@ -10,11 +10,24 @@ async function main(jlptLevel) { const jlptLevelCaps = jlptLevel.toUpperCase(); - const kanjiArray = await fetchKanjiFromTxt(`./data/txt/${jlptLevel}.txt`); - console.log(`${jlptLevelCaps}: Fetched txt`); + /* Fetch data from buffer if available. + * Else fetch data from txt and jisho requests, + * and make buffer files + */ + if(fs.existsSync(`./data/jisho/${jlptLevel}.json`)) { + var jishoResults = await fetchJishoBufferData(`./data/jisho/${jlptLevel}.json`); + console.log(`${jlptLevelCaps}: Fetched Jisho data from buffer`); - const jishoResults = await fetchKanjiFromJisho(kanjiArray); - console.log(`${jlptLevelCaps}: Fetched Jisho data`); + } else { + const kanjiArray = await fetchKanjiFromTxt(`./data/txt/${jlptLevel}.txt`); + console.log(`${jlptLevelCaps}: Fetched txt`); + + var jishoResults = await fetchKanjiFromJisho(kanjiArray); + console.log(`${jlptLevelCaps}: Fetched Jisho data`); + + fs.writeFile(`./data/jisho/${jlptLevel}.json`, JSON.stringify(jishoResults, null, " "), (err) => {if (err) console.error(err)}); + console.log(`${jlptLevelCaps}: Written Jisho data to buffer`); + } const sortedKanjiArray = jishoResults.map(result => result.query); const texData = getKanjiTexData(jishoResults); diff --git a/jishoScrape/src/dataFetching.js b/jishoScrape/src/dataFetching.js index b876084..8c2aba2 100644 --- a/jishoScrape/src/dataFetching.js +++ b/jishoScrape/src/dataFetching.js @@ -6,7 +6,7 @@ const jisho = new jishoApi(); /** * Reads a txt file and splits the characters into an array * @param {string} file Path to file - * @returns {string[]} Kanji + * @returns {string[]} A list of Kanji */ async function fetchKanjiFromTxt(file) { const read = util.promisify(fs.readFile); @@ -14,10 +14,21 @@ async function fetchKanjiFromTxt(file) { return data.split(''); } +/** + * Reads a json file and returns the data as an object + * @param {string} file Path to file + * @returns {object} Jisho results + */ +async function fetchJishoBufferData(file) { + const read = util.promisify(fs.readFile); + const data = await read(file, 'utf8'); + return JSON.parse(data); +} + /** * Makes a delayed kanji search request in order not to overload the server. - * @param {string} kanji The character to search for - * @param {number} delay Number of milliseconds delay to the request + * @param {string} kanji A character to search for + * @param {number} delay A number of milliseconds delay to the request * @return {promise} A promise that's going to run a request after the specified delay */ async function delayedJishoCall(kanji, delay) { @@ -43,4 +54,5 @@ async function fetchKanjiFromJisho(kanjiArray) { } exports.fetchKanjiFromTxt = fetchKanjiFromTxt; +exports.fetchJishoBufferData = fetchJishoBufferData; exports.fetchKanjiFromJisho = fetchKanjiFromJisho; \ No newline at end of file